From b2f646d9f99dd272f3b3a9d045b5039e6fc1dc50 Mon Sep 17 00:00:00 2001 From: Peter Fors Date: Sat, 25 Oct 2025 23:07:35 +0200 Subject: Refactor benchmarking to self-contained C implementation - Add mknes_bench.c with direct PMC access via perf_event_open() - Remove dependency on external perf/awk for statistics - Add RT priority and CPU affinity control in C code - Use static BSS allocation (page-aligned) instead of malloc - Add stalled-cycles-backend counter (gracefully handles AMD unavailability) - Add throughput metrics (MIPS, Mcycles/sec) - Optimize Bench.sh to only regenerate profile data when needed - Add -n and -f flags for configurable runs and frames - Suppress mapper messages during benchmark - ~6x faster benchmark workflow (20s first run, 16s subsequent) --- Bench.sh | 50 +++++++++----------------------------------------- 1 file changed, 9 insertions(+), 41 deletions(-) (limited to 'Bench.sh') diff --git a/Bench.sh b/Bench.sh index 157ad57..8be485a 100755 --- a/Bench.sh +++ b/Bench.sh @@ -1,45 +1,13 @@ #!/usr/bin/env bash -./build.sh clean -./build.sh profile -./mknes -./build.sh profile_release - -runs=10 -frames=4096 -events="cycles,instructions,task-clock" -tmp=$(mktemp) - -taskset -c 1 ./mknes - -> "$tmp" -for i in $(seq 1 $runs); do - taskset -c 1 chrt -f 99 perf stat -x, -e $events -- ./mknes 2>>"$tmp" -done - -awk -F, -v F="$frames" ' - $3=="cycles" { c[++nc]=$1/F } - $3=="instructions" { i[++ni]=$1/F } -# $3=="task-clock" { t[++nt]=$1 } # milliseconds NOTE(peter): changed to nanoseconds... - $3=="task-clock" { t[++nt]=$1/1000000 } +# Only rebuild profile data if it doesn't exist +if [ ! -f mknes.gcda ]; then + ./build.sh clean + ./build.sh profile + ./mknes -n 1 -f 1024 # Quick single run for profile generation +fi - END { - for(k=1;k<=nc;k++) sumc+=c[k]; mc=sumc/nc - for(k=1;k<=ni;k++) sumi+=i[k]; mi=sumi/ni - for(k=1;k<=nt;k++) sumt+=t[k]; mt=sumt/nt - - for(k=1;k<=nc;k++) sdc+=(c[k]-mc)^2; sdc=sqrt(sdc/(nc-1)) - for(k=1;k<=ni;k++) sdi+=(i[k]-mi)^2; sdi=sqrt(sdi/(ni-1)) - for(k=1;k<=nt;k++) sdt+=(t[k]-mt)^2; sdt=sqrt(sdt/(nt-1)) - - ms_per_frame = mt / F - fps = F / (mt / 1000) - - printf "IPC (insn/cycle) = %.3f\n", mi/mc - printf "cycles/frame mean=%.0f sd=%.0f relSD=%.3f%% n=%d\n", mc, sdc, 100*sdc/mc, nc - printf "insn/frame mean=%.0f sd=%.0f relSD=%.3f%% n=%d\n", mi, sdi, 100*sdi/mi, ni - printf "time (ms) mean=%.3f sd=%.3f relSD=%.3f%% n=%d\n", mt, sdt, 100*sdt/mt, nt - printf "FPS (frames/second) = %.2f\n", fps - printf "ms/frame = %.6f\n", ms_per_frame - }' "$tmp" +./build.sh profile_release +# Run full benchmark +./mknes -- cgit v1.2.3