Regression enhancements (#526)

MarkWolters · web-flow · commit 19f60feef022 · 2025-09-19T16:06:55.000-05:00
* fixed ability to filter datasets
* fix for illegal char in branch name
* fixed missing space for default arg
* fixed missing space for default arg
* reordering to prevent overwriting test results
* moving default diagnostic logging behavior to NONE and adding option to set diagnostic level
* fixed erroneous change
* fixed commented out files
diff --git a/.github/workflows/run-bench.yml b/.github/workflows/run-bench.yml
@@ -158,21 +158,35 @@ jobs:
           
           # Run the benchmark
           echo "Running benchmark for branch ${{ matrix.branch }}"
+
+          # Determine optional benchmark config argument from workflow input
+          BENCH_ARG="${{ github.event.inputs.benchmark_config }}"
+          if [[ -z "$BENCH_ARG" ]]; then
+            echo "No benchmark_config provided; running with default dataset selection."
+            BENCH_SUFFIX=""
+          else
+            echo "Using benchmark_config: '$BENCH_ARG'"
+            BENCH_SUFFIX=" $BENCH_ARG"
+          fi
+
+          # Sanitize branch name for filenames: replace any non-alphanumeric, dash or underscore with underscore
+          SAFE_BRANCH=$(echo "${{ matrix.branch }}" | sed 's/[^A-Za-z0-9_-]/_/g')
+
           if [[ "${{ github.event_name }}" == "pull_request" ]]; then
             java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \
               ${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \
               -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/heap_dump/ -Xmx${HALF_MEM_GB}g \
-              -cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output ${{ matrix.branch }}-bench-results dpr-1M
+              -cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output ${SAFE_BRANCH}-bench-results dpr-1M
           else
             java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \
               ${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \
               -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/heap_dump/ -Xmx${HALF_MEM_GB}g \
-              -cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output ${{ matrix.branch }}-bench-results
+              -cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output ${SAFE_BRANCH}-bench-results${BENCH_SUFFIX:+ }${BENCH_ARG}
           fi
 
           # Move the results to the benchmark_results directory
-          mv ${{ matrix.branch }}-bench-results.csv benchmark_results/ || true
-          mv ${{ matrix.branch }}-bench-results.json benchmark_results/ || true
+          mv ${SAFE_BRANCH}-bench-results.csv benchmark_results/ || true
+          mv ${SAFE_BRANCH}-bench-results.json benchmark_results/ || true
 
           echo "Completed benchmarks for branch: ${{ matrix.branch }}"
 
@@ -190,16 +204,16 @@ jobs:
     needs: test-avx512
     runs-on: ubuntu-latest
     steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
       - name: Download all benchmark results
         uses: actions/download-artifact@v4
         with:
           pattern: benchmark-results-*
           path: all-benchmark-results
           merge-multiple: true
 
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
@@ -214,10 +228,12 @@ jobs:
         run: |
           # Discover all downloaded CSV benchmark result files
           shopt -s globstar nullglob
-          files=(all-benchmark-results/**/*-bench-results.csv)
+          echo "Listing downloaded artifact directory structure:"
+          ls -R all-benchmark-results || true
+          files=(all-benchmark-results/**/*.csv)
           if [ ${#files[@]} -eq 0 ]; then
-            echo "No benchmark results found in all-benchmark-results. Searching repo as fallback..."
-            files=(**/*-bench-results.csv)
+            echo "No CSVs found under all-benchmark-results. Searching repo as fallback..."
+            files=(**/*.csv)
           fi
           echo "Found ${#files[@]} CSV files"
           for f in "${files[@]}"; do echo "  - $f"; done
diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java
@@ -91,8 +91,13 @@ public static void main(String[] args) throws IOException {
         // Filter out --output, --config and their arguments from the args
         String finalOutputPath = outputPath;
         String configPath = null;
+        int diagnostic_level = 0;
         for (int i = 0; i < args.length - 1; i++) {
             if (args[i].equals("--config")) configPath = args[i+1];
+            if (args[i].equals("--diag")) diagnostic_level = Integer.parseInt(args[i+1]);
+        }
+        if (diagnostic_level > 0) {
+            Grid.setDiagnosticLevel(diagnostic_level);
         }
         String finalConfigPath = configPath;
         String[] filteredArgs = Arrays.stream(args)
diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/Grid.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/Grid.java
@@ -25,6 +25,7 @@
 import io.github.jbellis.jvector.example.benchmarks.QueryTester;
 import io.github.jbellis.jvector.example.benchmarks.ThroughputBenchmark;
 import io.github.jbellis.jvector.example.benchmarks.*;
+import io.github.jbellis.jvector.example.benchmarks.diagnostics.DiagnosticLevel;
 import io.github.jbellis.jvector.example.util.CompressorParameters;
 import io.github.jbellis.jvector.example.util.DataSet;
 import io.github.jbellis.jvector.example.util.FilteredForkJoinPool;
@@ -86,6 +87,8 @@ public class Grid {
 
     private static final Map<String,Double> indexBuildTimes = new HashMap<>();
 
+    private static int diagnostic_level;
+
     static void runAll(DataSet ds,
                        List<Integer> mGrid,
                        List<Integer> efConstructionGrid,
@@ -326,6 +329,25 @@ private static BuilderWithSuppliers builderWithSuppliers(Set<FeatureId> features
         return new BuilderWithSuppliers(builder, suppliers);
     }
 
+    public static void setDiagnosticLevel(int diagLevel) {
+        diagnostic_level = diagLevel;
+    }
+
+    private static DiagnosticLevel getDiagnosticLevel() {
+        switch (diagnostic_level) {
+            case 0:
+                return DiagnosticLevel.NONE;
+            case 1:
+                return DiagnosticLevel.BASIC;
+            case 2:
+                return DiagnosticLevel.DETAILED;
+            case 3:
+                return DiagnosticLevel.VERBOSE;
+            default:
+                return DiagnosticLevel.NONE; // fallback for invalid values
+        }
+    }
+
     private static class BuilderWithSuppliers {
         public final OnDiskGraphIndexWriter.Builder builder;
         public final Map<FeatureId, IntFunction<Feature.State>> suppliers;
@@ -543,7 +565,9 @@ public static List<BenchResult> runAllAndCollectResults(
                                         try (ConfiguredSystem cs = new ConfiguredSystem(ds, index, cvArg, features)) {
                                             int queryRuns = 2;
                                             List<QueryBenchmark> benchmarks = List.of(
-                                                    ThroughputBenchmark.createDefault(),
+                                                    (diagnostic_level > 0 ?
+                                                            ThroughputBenchmark.createDefault().withDiagnostics(getDiagnosticLevel()) :
+                                                            ThroughputBenchmark.createDefault()),
                                                     LatencyBenchmark.createDefault(),
                                                     CountBenchmark.createDefault(),
                                                     AccuracyBenchmark.createDefault()
diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/ThroughputBenchmark.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/ThroughputBenchmark.java
@@ -57,7 +57,7 @@ public static ThroughputBenchmark createDefault() {
         return new ThroughputBenchmark(3, 3,
                 true, false, false,
                 DEFAULT_FORMAT, DEFAULT_FORMAT, DEFAULT_FORMAT,
-                DiagnosticLevel.BASIC);
+                DiagnosticLevel.NONE);
     }
 
     public static ThroughputBenchmark createEmpty(int numWarmupRuns, int numTestRuns) {
@@ -168,20 +168,20 @@ public List<Metric> runBenchmark(
                 return totalQueries / 1.0; // Return QPS placeholder
             });
             
-            System.out.printf("Warmup Run %d: %.1f QPS%n", warmupRun, warmupQps[warmupRun]);
+            diagnostics.console("Warmup Run " + warmupRun + ": " + warmupQps[warmupRun] + " QPS\n");
         }
 
         // Analyze warmup effectiveness
         if (numWarmupRuns > 1) {
             double warmupVariance = StatUtils.variance(warmupQps);
             double warmupMean = StatUtils.mean(warmupQps);
             double warmupCV = Math.sqrt(warmupVariance) / warmupMean * 100;
-            System.out.printf("Warmup Analysis: Mean=%.1f QPS, CV=%.1f%%", warmupMean, warmupCV);
+            diagnostics.console("Warmup Analysis: Mean=" + warmupMean + " QPS, CV=" + warmupCV);
             
             if (warmupCV > 15.0) {
-                System.out.printf(" ⚠️  High warmup variance - consider more warmup runs%n");
+                diagnostics.console(" ⚠️  High warmup variance - consider more warmup runs\n");
             } else {
-                System.out.printf(" ✓ Warmup appears stable%n");
+                diagnostics.console(" ✓ Warmup appears stable\n");
             }
         }
 
@@ -224,7 +224,7 @@ public List<Metric> runBenchmark(
                 return totalQueries / elapsedSec;
             });
 
-            System.out.printf("Test Run %d: %.1f QPS%n", testRun, qpsSamples[testRun]);
+            diagnostics.console("Test Run " + testRun + ": " + qpsSamples[testRun] + " QPS\n");
         }
 
         // Performance variance analysis
@@ -236,11 +236,10 @@ public List<Metric> runBenchmark(
         double minQps = StatUtils.min(qpsSamples);
         double coefficientOfVariation = (stdDevQps / avgQps) * 100;
 
-        System.out.printf("QPS Variance Analysis: CV=%.1f%%, Range=[%.1f - %.1f]%n", 
-            coefficientOfVariation, minQps, maxQps);
+        diagnostics.console("QPS Variance Analysis: CV=" + coefficientOfVariation + ", Range=[" + minQps + " - " + maxQps + "]\n");
             
         if (coefficientOfVariation > 10.0) {
-            System.out.printf("⚠️  High performance variance detected (CV > 10%%)%n");
+            diagnostics.console("⚠️  High performance variance detected (CV > 10%%)%n");
         }
 
         // Compare test runs for performance regression detection
diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/diagnostics/BenchmarkDiagnostics.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/diagnostics/BenchmarkDiagnostics.java
@@ -156,6 +156,12 @@ public <T> T monitorPhaseWithQueryTiming(String phase, QueryTimingBenchmark<T> b
         return result;
     }
 
+    public void console(String s) {
+        if (level != DiagnosticLevel.NONE ) {
+            System.out.println(s);
+        }
+    }
+
     /**
      * Compares performance between different phases
      */