Skip to content

Commit 19f60fe

Browse files
authored
Regression enhancements (#526)
* fixed ability to filter datasets * fix for illegal char in branch name * fixed missing space for default arg * fixed missing space for default arg * reordering to prevent overwriting test results * moving default diagnostic logging behavior to NONE and adding option to set diagnostic level * fixed erroneous change * fixed commented out files
1 parent dc98818 commit 19f60fe

File tree

5 files changed

+70
-20
lines changed

5 files changed

+70
-20
lines changed

.github/workflows/run-bench.yml

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -158,21 +158,35 @@ jobs:
158158
159159
# Run the benchmark
160160
echo "Running benchmark for branch ${{ matrix.branch }}"
161+
162+
# Determine optional benchmark config argument from workflow input
163+
BENCH_ARG="${{ github.event.inputs.benchmark_config }}"
164+
if [[ -z "$BENCH_ARG" ]]; then
165+
echo "No benchmark_config provided; running with default dataset selection."
166+
BENCH_SUFFIX=""
167+
else
168+
echo "Using benchmark_config: '$BENCH_ARG'"
169+
BENCH_SUFFIX=" $BENCH_ARG"
170+
fi
171+
172+
# Sanitize branch name for filenames: replace any non-alphanumeric, dash or underscore with underscore
173+
SAFE_BRANCH=$(echo "${{ matrix.branch }}" | sed 's/[^A-Za-z0-9_-]/_/g')
174+
161175
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
162176
java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \
163177
${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \
164178
-XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/heap_dump/ -Xmx${HALF_MEM_GB}g \
165-
-cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output ${{ matrix.branch }}-bench-results dpr-1M
179+
-cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output ${SAFE_BRANCH}-bench-results dpr-1M
166180
else
167181
java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \
168182
${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \
169183
-XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/heap_dump/ -Xmx${HALF_MEM_GB}g \
170-
-cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output ${{ matrix.branch }}-bench-results
184+
-cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output ${SAFE_BRANCH}-bench-results${BENCH_SUFFIX:+ }${BENCH_ARG}
171185
fi
172186
173187
# Move the results to the benchmark_results directory
174-
mv ${{ matrix.branch }}-bench-results.csv benchmark_results/ || true
175-
mv ${{ matrix.branch }}-bench-results.json benchmark_results/ || true
188+
mv ${SAFE_BRANCH}-bench-results.csv benchmark_results/ || true
189+
mv ${SAFE_BRANCH}-bench-results.json benchmark_results/ || true
176190
177191
echo "Completed benchmarks for branch: ${{ matrix.branch }}"
178192
@@ -190,16 +204,16 @@ jobs:
190204
needs: test-avx512
191205
runs-on: ubuntu-latest
192206
steps:
207+
- name: Checkout repository
208+
uses: actions/checkout@v4
209+
193210
- name: Download all benchmark results
194211
uses: actions/download-artifact@v4
195212
with:
196213
pattern: benchmark-results-*
197214
path: all-benchmark-results
198215
merge-multiple: true
199216

200-
- name: Checkout repository
201-
uses: actions/checkout@v4
202-
203217
- name: Set up Python
204218
uses: actions/setup-python@v4
205219
with:
@@ -214,10 +228,12 @@ jobs:
214228
run: |
215229
# Discover all downloaded CSV benchmark result files
216230
shopt -s globstar nullglob
217-
files=(all-benchmark-results/**/*-bench-results.csv)
231+
echo "Listing downloaded artifact directory structure:"
232+
ls -R all-benchmark-results || true
233+
files=(all-benchmark-results/**/*.csv)
218234
if [ ${#files[@]} -eq 0 ]; then
219-
echo "No benchmark results found in all-benchmark-results. Searching repo as fallback..."
220-
files=(**/*-bench-results.csv)
235+
echo "No CSVs found under all-benchmark-results. Searching repo as fallback..."
236+
files=(**/*.csv)
221237
fi
222238
echo "Found ${#files[@]} CSV files"
223239
for f in "${files[@]}"; do echo " - $f"; done

jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,13 @@ public static void main(String[] args) throws IOException {
9191
// Filter out --output, --config and their arguments from the args
9292
String finalOutputPath = outputPath;
9393
String configPath = null;
94+
int diagnostic_level = 0;
9495
for (int i = 0; i < args.length - 1; i++) {
9596
if (args[i].equals("--config")) configPath = args[i+1];
97+
if (args[i].equals("--diag")) diagnostic_level = Integer.parseInt(args[i+1]);
98+
}
99+
if (diagnostic_level > 0) {
100+
Grid.setDiagnosticLevel(diagnostic_level);
96101
}
97102
String finalConfigPath = configPath;
98103
String[] filteredArgs = Arrays.stream(args)

jvector-examples/src/main/java/io/github/jbellis/jvector/example/Grid.java

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import io.github.jbellis.jvector.example.benchmarks.QueryTester;
2626
import io.github.jbellis.jvector.example.benchmarks.ThroughputBenchmark;
2727
import io.github.jbellis.jvector.example.benchmarks.*;
28+
import io.github.jbellis.jvector.example.benchmarks.diagnostics.DiagnosticLevel;
2829
import io.github.jbellis.jvector.example.util.CompressorParameters;
2930
import io.github.jbellis.jvector.example.util.DataSet;
3031
import io.github.jbellis.jvector.example.util.FilteredForkJoinPool;
@@ -86,6 +87,8 @@ public class Grid {
8687

8788
private static final Map<String,Double> indexBuildTimes = new HashMap<>();
8889

90+
private static int diagnostic_level;
91+
8992
static void runAll(DataSet ds,
9093
List<Integer> mGrid,
9194
List<Integer> efConstructionGrid,
@@ -326,6 +329,25 @@ private static BuilderWithSuppliers builderWithSuppliers(Set<FeatureId> features
326329
return new BuilderWithSuppliers(builder, suppliers);
327330
}
328331

332+
public static void setDiagnosticLevel(int diagLevel) {
333+
diagnostic_level = diagLevel;
334+
}
335+
336+
private static DiagnosticLevel getDiagnosticLevel() {
337+
switch (diagnostic_level) {
338+
case 0:
339+
return DiagnosticLevel.NONE;
340+
case 1:
341+
return DiagnosticLevel.BASIC;
342+
case 2:
343+
return DiagnosticLevel.DETAILED;
344+
case 3:
345+
return DiagnosticLevel.VERBOSE;
346+
default:
347+
return DiagnosticLevel.NONE; // fallback for invalid values
348+
}
349+
}
350+
329351
private static class BuilderWithSuppliers {
330352
public final OnDiskGraphIndexWriter.Builder builder;
331353
public final Map<FeatureId, IntFunction<Feature.State>> suppliers;
@@ -543,7 +565,9 @@ public static List<BenchResult> runAllAndCollectResults(
543565
try (ConfiguredSystem cs = new ConfiguredSystem(ds, index, cvArg, features)) {
544566
int queryRuns = 2;
545567
List<QueryBenchmark> benchmarks = List.of(
546-
ThroughputBenchmark.createDefault(),
568+
(diagnostic_level > 0 ?
569+
ThroughputBenchmark.createDefault().withDiagnostics(getDiagnosticLevel()) :
570+
ThroughputBenchmark.createDefault()),
547571
LatencyBenchmark.createDefault(),
548572
CountBenchmark.createDefault(),
549573
AccuracyBenchmark.createDefault()

jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/ThroughputBenchmark.java

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ public static ThroughputBenchmark createDefault() {
5757
return new ThroughputBenchmark(3, 3,
5858
true, false, false,
5959
DEFAULT_FORMAT, DEFAULT_FORMAT, DEFAULT_FORMAT,
60-
DiagnosticLevel.BASIC);
60+
DiagnosticLevel.NONE);
6161
}
6262

6363
public static ThroughputBenchmark createEmpty(int numWarmupRuns, int numTestRuns) {
@@ -168,20 +168,20 @@ public List<Metric> runBenchmark(
168168
return totalQueries / 1.0; // Return QPS placeholder
169169
});
170170

171-
System.out.printf("Warmup Run %d: %.1f QPS%n", warmupRun, warmupQps[warmupRun]);
171+
diagnostics.console("Warmup Run " + warmupRun + ": " + warmupQps[warmupRun] + " QPS\n");
172172
}
173173

174174
// Analyze warmup effectiveness
175175
if (numWarmupRuns > 1) {
176176
double warmupVariance = StatUtils.variance(warmupQps);
177177
double warmupMean = StatUtils.mean(warmupQps);
178178
double warmupCV = Math.sqrt(warmupVariance) / warmupMean * 100;
179-
System.out.printf("Warmup Analysis: Mean=%.1f QPS, CV=%.1f%%", warmupMean, warmupCV);
179+
diagnostics.console("Warmup Analysis: Mean=" + warmupMean + " QPS, CV=" + warmupCV);
180180

181181
if (warmupCV > 15.0) {
182-
System.out.printf(" ⚠️ High warmup variance - consider more warmup runs%n");
182+
diagnostics.console(" ⚠️ High warmup variance - consider more warmup runs\n");
183183
} else {
184-
System.out.printf(" ✓ Warmup appears stable%n");
184+
diagnostics.console(" ✓ Warmup appears stable\n");
185185
}
186186
}
187187

@@ -224,7 +224,7 @@ public List<Metric> runBenchmark(
224224
return totalQueries / elapsedSec;
225225
});
226226

227-
System.out.printf("Test Run %d: %.1f QPS%n", testRun, qpsSamples[testRun]);
227+
diagnostics.console("Test Run " + testRun + ": " + qpsSamples[testRun] + " QPS\n");
228228
}
229229

230230
// Performance variance analysis
@@ -236,11 +236,10 @@ public List<Metric> runBenchmark(
236236
double minQps = StatUtils.min(qpsSamples);
237237
double coefficientOfVariation = (stdDevQps / avgQps) * 100;
238238

239-
System.out.printf("QPS Variance Analysis: CV=%.1f%%, Range=[%.1f - %.1f]%n",
240-
coefficientOfVariation, minQps, maxQps);
239+
diagnostics.console("QPS Variance Analysis: CV=" + coefficientOfVariation + ", Range=[" + minQps + " - " + maxQps + "]\n");
241240

242241
if (coefficientOfVariation > 10.0) {
243-
System.out.printf("⚠️ High performance variance detected (CV > 10%%)%n");
242+
diagnostics.console("⚠️ High performance variance detected (CV > 10%%)%n");
244243
}
245244

246245
// Compare test runs for performance regression detection

jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/diagnostics/BenchmarkDiagnostics.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,12 @@ public <T> T monitorPhaseWithQueryTiming(String phase, QueryTimingBenchmark<T> b
156156
return result;
157157
}
158158

159+
public void console(String s) {
160+
if (level != DiagnosticLevel.NONE ) {
161+
System.out.println(s);
162+
}
163+
}
164+
159165
/**
160166
* Compares performance between different phases
161167
*/

0 commit comments

Comments
 (0)