summaryrefslogtreecommitdiff
path: root/Bench.sh
diff options
context:
space:
mode:
authorPeter Fors <peter.fors@mindkiller.com>2025-10-25 23:07:35 +0200
committerPeter Fors <peter.fors@mindkiller.com>2025-10-25 23:28:22 +0200
commitb2f646d9f99dd272f3b3a9d045b5039e6fc1dc50 (patch)
tree9ea8977531306f414d94ceca7dcfa6f17c204687 /Bench.sh
parent54ca8318923fcf11e1cf507bd516b210ba7cf221 (diff)
Refactor benchmarking to self-contained C implementation
- Add mknes_bench.c with direct PMC access via perf_event_open() - Remove dependency on external perf/awk for statistics - Add RT priority and CPU affinity control in C code - Use static BSS allocation (page-aligned) instead of malloc - Add stalled-cycles-backend counter (gracefully handles AMD unavailability) - Add throughput metrics (MIPS, Mcycles/sec) - Optimize Bench.sh to only regenerate profile data when needed - Add -n and -f flags for configurable runs and frames - Suppress mapper messages during benchmark - ~6x faster benchmark workflow (20s first run, 16s subsequent)
Diffstat (limited to 'Bench.sh')
-rwxr-xr-xBench.sh50
1 files changed, 9 insertions, 41 deletions
diff --git a/Bench.sh b/Bench.sh
index 157ad57..8be485a 100755
--- a/Bench.sh
+++ b/Bench.sh
@@ -1,45 +1,13 @@
#!/usr/bin/env bash
-./build.sh clean
-./build.sh profile
-./mknes
-./build.sh profile_release
-
-runs=10
-frames=4096
-events="cycles,instructions,task-clock"
-tmp=$(mktemp)
-
-taskset -c 1 ./mknes
-
-> "$tmp"
-for i in $(seq 1 $runs); do
- taskset -c 1 chrt -f 99 perf stat -x, -e $events -- ./mknes 2>>"$tmp"
-done
-
-awk -F, -v F="$frames" '
- $3=="cycles" { c[++nc]=$1/F }
- $3=="instructions" { i[++ni]=$1/F }
-# $3=="task-clock" { t[++nt]=$1 } # milliseconds NOTE(peter): changed to nanoseconds...
- $3=="task-clock" { t[++nt]=$1/1000000 }
+# Only rebuild profile data if it doesn't exist
+if [ ! -f mknes.gcda ]; then
+ ./build.sh clean
+ ./build.sh profile
+ ./mknes -n 1 -f 1024 # Quick single run for profile generation
+fi
- END {
- for(k=1;k<=nc;k++) sumc+=c[k]; mc=sumc/nc
- for(k=1;k<=ni;k++) sumi+=i[k]; mi=sumi/ni
- for(k=1;k<=nt;k++) sumt+=t[k]; mt=sumt/nt
-
- for(k=1;k<=nc;k++) sdc+=(c[k]-mc)^2; sdc=sqrt(sdc/(nc-1))
- for(k=1;k<=ni;k++) sdi+=(i[k]-mi)^2; sdi=sqrt(sdi/(ni-1))
- for(k=1;k<=nt;k++) sdt+=(t[k]-mt)^2; sdt=sqrt(sdt/(nt-1))
-
- ms_per_frame = mt / F
- fps = F / (mt / 1000)
-
- printf "IPC (insn/cycle) = %.3f\n", mi/mc
- printf "cycles/frame mean=%.0f sd=%.0f relSD=%.3f%% n=%d\n", mc, sdc, 100*sdc/mc, nc
- printf "insn/frame mean=%.0f sd=%.0f relSD=%.3f%% n=%d\n", mi, sdi, 100*sdi/mi, ni
- printf "time (ms) mean=%.3f sd=%.3f relSD=%.3f%% n=%d\n", mt, sdt, 100*sdt/mt, nt
- printf "FPS (frames/second) = %.2f\n", fps
- printf "ms/frame = %.6f\n", ms_per_frame
- }' "$tmp"
+./build.sh profile_release
+# Run full benchmark
+./mknes