feat(bench): allow running benchmarks via glob (#41)

zah · zah · commit b233b7d55ee4 · 2025-06-28T14:37:22.000+03:00
- allow specifying a glob of benchmarks to run instead of a directory
- record results for plain Ruby execution, native tracer and pure Ruby tracer
- compare traces structurally via JSON and print consolidated table
- update just bench recipe to pass a glob
diff --git a/Justfile b/Justfile
@@ -3,8 +3,8 @@ alias t := test
 test:
     ruby -Itest test/test_tracer.rb
 
-bench name="heavy_work" write_report="console":
-    ruby test/benchmarks/run_benchmarks.rb test/benchmarks/programs --write-report={{write_report}}
+bench pattern="*" write_report="console":
+    ruby test/benchmarks/run_benchmarks.rb '{{pattern}}' --write-report={{write_report}}
 
 build-extension:
     cargo build --release --manifest-path gems/native-tracer/ext/native_tracer/Cargo.toml
diff --git a/test/benchmarks/run_benchmarks.rb b/test/benchmarks/run_benchmarks.rb
@@ -3,7 +3,6 @@
 
 require 'json'
 require 'fileutils'
-require 'digest'
 require 'benchmark'
 require 'optparse'
 
@@ -14,82 +13,108 @@
 
 options = { write_report: WRITE_REPORT_DEFAULT }
 OptionParser.new do |opts|
-  opts.banner = 'Usage: ruby run_benchmarks.rb BENCHMARK_DIR [options]'
+  opts.banner = 'Usage: ruby run_benchmarks.rb GLOB [options]'
   opts.on('--write-report=DEST', 'console or path to .json/.svg report') do |dest|
     options[:write_report] = dest
   end
 end.parse!
+pattern = ARGV.shift || abort('Usage: ruby run_benchmarks.rb GLOB [options]')
 
-benchmark_dir = ARGV.shift || abort('Usage: ruby run_benchmarks.rb BENCHMARK_DIR [options]')
-unless Dir.exist?(benchmark_dir)
-  abort("Benchmark directory not found: #{benchmark_dir}")
-end
-
-# Collect benchmark names (file basenames without extension)
-benchmarks = Dir.glob(File.join(benchmark_dir, '*.rb')).map { |f| File.basename(f, '.rb') }
+# Collect benchmark names and match against the provided glob
+all_programs = Dir.glob(File.join(PROGRAMS_DIR, '*.rb')).map { |f| File.basename(f, '.rb') }
+benchmarks = all_programs.select { |name| File.fnmatch?(pattern, name) }
 if benchmarks.empty?
-  abort("No benchmark files (*.rb) found in directory: #{benchmark_dir}")
+  abort("No benchmarks match pattern: #{pattern}")
 end
 
-# Compare two files for identical content
-def files_identical?(a, b)
-  cmp_result = system('cmp', '-s', a, b)
-  return $?.success? if !cmp_result.nil?
-  File.binread(a) == File.binread(b)
+# Compare two trace files structurally
+def traces_equal?(a, b)
+  JSON.parse(File.read(a)) == JSON.parse(File.read(b))
 end
 
 # Run a single benchmark by name
-def run_benchmark(name, benchmark_dir)
-  program      = File.expand_path(File.join(benchmark_dir, "#{name}.rb"))
-  fixture      = File.join(FIXTURES_DIR, "#{name}_trace.json")
-  output_dir   = File.join(TMP_DIR, name)
-
-  FileUtils.mkdir_p(output_dir)
+def run_benchmark(name)
+  program = File.join(PROGRAMS_DIR, "#{name}.rb")
+  fixture = File.join(FIXTURES_DIR, "#{name}_trace.json")
   raise 'Reference trace unavailable' unless File.exist?(fixture)
 
+  base_dir = File.join(TMP_DIR, name)
+  FileUtils.rm_rf(base_dir)
+
+  results = { name: name }
+
+  elapsed = Benchmark.realtime do
+    system('ruby', program)
+    raise 'Program failed' unless $?.success?
+  end
+  results[:ruby_ms] = (elapsed * 1000).round
+
+  native_dir = File.join(TMP_DIR, name, 'native')
+  FileUtils.mkdir_p(native_dir)
+  elapsed = Benchmark.realtime do
+    system('ruby', File.expand_path('../../gems/native-tracer/lib/native_trace.rb', __dir__),
+           '--out-dir', native_dir, program)
+    raise 'Native trace failed' unless $?.success?
+  end
+  results[:native_ms] = (elapsed * 1000).round
+  native_trace = File.join(native_dir, 'trace.json')
+  results[:native_ok] = traces_equal?(fixture, native_trace)
+
+  pure_dir = File.join(TMP_DIR, name, 'pure')
+  FileUtils.mkdir_p(pure_dir)
   elapsed = Benchmark.realtime do
     system('ruby', File.expand_path('../../gems/pure-ruby-tracer/lib/trace.rb', __dir__),
-           '--out-dir', output_dir,
-           program)
-    raise 'Trace failed' unless $?.success?
+           '--out-dir', pure_dir, program)
+    raise 'Pure trace failed' unless $?.success?
   end
-  runtime_ms   = (elapsed * 1000).round
-  output_trace = File.join(output_dir, 'trace.json')
-  success      = files_identical?(fixture, output_trace)
-  size_bytes   = File.size(output_trace)
+  results[:pure_ms] = (elapsed * 1000).round
+  pure_trace = File.join(pure_dir, 'trace.json')
+  results[:pure_ok] = traces_equal?(fixture, pure_trace)
 
-  { name: name, runtime_ms: runtime_ms, trace_size: size_bytes, success: success }
+  results
 end
 
 # Execute all benchmarks
-results = benchmarks.map { |b| run_benchmark(b, benchmark_dir) }
+results = benchmarks.map { |b| run_benchmark(b) }
 
 # Reporting
 if options[:write_report] == 'console'
   # Determine column widths
-  name_w = [ 'Benchmark'.length, *results.map { |r| r[:name].length } ].max
-  rt_w   = [ 'Runtime'.length, *results.map { |r| r[:runtime_ms].to_s.length } ].max
-  ts_w   = [ 'Trace Size'.length, *results.map { |r| r[:trace_size].to_s.length } ].max
+  name_w   = ['Benchmark'.length, *results.map { |r| r[:name].length }].max
+  ruby_w   = ['Ruby'.length,     *results.map { |r| "#{r[:ruby_ms]}ms".length }].max
+  native_w = ['Native'.length,   *results.map { |r| "#{r[:native_ok] ? 'OK' : 'FAIL'} #{r[:native_ms]}ms".length }].max
+  pure_w   = ['Pure'.length,     *results.map { |r| "#{r[:pure_ok] ? 'OK' : 'FAIL'} #{r[:pure_ms]}ms".length }].max
 
   # Header
-  printf "%-#{name_w}s  %#{rt_w}s  %#{ts_w}s  %s\n", 'Benchmark', 'Runtime', 'Trace Size', 'Status'
-  puts '-' * (name_w + rt_w + ts_w + 10)
+  printf "%-#{name_w}s  %-#{ruby_w}s  %-#{native_w}s  %-#{pure_w}s\n", 'Benchmark', 'Ruby', 'Native', 'Pure'
+  puts '-' * (name_w + ruby_w + native_w + pure_w + 6)
 
   # Rows
   results.each do |r|
-    status = r[:success] ? 'OK' : 'FAIL'
-    printf "%-#{name_w}s  %#{rt_w}d ms  %#{ts_w}d  %s\n", r[:name], r[:runtime_ms], r[:trace_size], status
+    ruby_s   = "#{r[:ruby_ms]}ms"
+    native_s = "#{r[:native_ok] ? 'OK' : 'FAIL'} #{r[:native_ms]}ms"
+    pure_s   = "#{r[:pure_ok] ? 'OK' : 'FAIL'} #{r[:pure_ms]}ms"
+    printf "%-#{name_w}s  %-#{ruby_w}s  %-#{native_w}s  %-#{pure_w}s\n", r[:name], ruby_s, native_s, pure_s
   end
 
   # Exit with non-zero if any failed
-  exit 1 unless results.all? { |r| r[:success] }
+  exit 1 unless results.all? { |r| r[:native_ok] && r[:pure_ok] }
 else
   dest = options[:write_report]
   FileUtils.mkdir_p(File.dirname(dest))
 
   case File.extname(dest)
   when '.json'
-    data = results.map { |r| { benchmark: r[:name], runtime_ms: r[:runtime_ms], trace_bytes: r[:trace_size] } }
+    data = results.map do |r|
+      {
+        benchmark: r[:name],
+        ruby_ms: r[:ruby_ms],
+        native_ms: r[:native_ms],
+        native_ok: r[:native_ok],
+        pure_ms: r[:pure_ms],
+        pure_ok: r[:pure_ok]
+      }
+    end
     File.write(dest, JSON.pretty_generate(data))
   when '.svg'
     row_height = 25
@@ -98,11 +123,12 @@ def run_benchmark(name, benchmark_dir)
     svg << "  <foreignObject width='100%' height='100%'>\n"
     svg << "    <style>table{border-collapse:collapse;font-family:sans-serif;}td,th{border:1px solid #999;padding:4px;}</style>\n"
     svg << "    <table>\n"
-    svg << "      <thead><tr><th>Benchmark</th><th>Runtime (ms)</th><th>Trace size (bytes)</th><th>Status</th></tr></thead>\n"
+    svg << "      <thead><tr><th>Benchmark</th><th>Ruby (ms)</th><th>Native</th><th>Pure</th></tr></thead>\n"
     svg << "      <tbody>\n"
     results.each do |r|
-      status = r[:success] ? 'OK' : 'FAIL'
-      svg << "        <tr><td>#{r[:name]}</td><td>#{r[:runtime_ms]}</td><td>#{r[:trace_size]}</td><td>#{status}</td></tr>\n"
+      native_s = r[:native_ok] ? 'OK' : 'FAIL'
+      pure_s   = r[:pure_ok] ? 'OK' : 'FAIL'
+      svg << "        <tr><td>#{r[:name]}</td><td>#{r[:ruby_ms]}</td><td>#{native_s} #{r[:native_ms]}</td><td>#{pure_s} #{r[:pure_ms]}</td></tr>\n"
     end
     svg << "      </tbody>\n"
     svg << "    </table>\n"
@@ -114,7 +140,7 @@ def run_benchmark(name, benchmark_dir)
   end
 
   # Warn and exit if any failures
-  unless results.all? { |r| r[:success] }
+  unless results.all? { |r| r[:native_ok] && r[:pure_ok] }
     warn 'One or more traces differ from reference!'
     exit 1
   end