Skip to content

Commit e89725c

Browse files
committed
moving gha to its own class
1 parent 10da16e commit e89725c

File tree

2 files changed

+128
-1
lines changed

2 files changed

+128
-1
lines changed

.github/workflows/run-bench.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ jobs:
6464
# Use the jar-with-dependencies which includes all required dependencies
6565
java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \
6666
${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \
67-
-jar jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar --output bench-results.json
67+
-jar jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output bench-results.json
6868
6969
- name: Upload Bench Results
7070
uses: actions/upload-artifact@v4
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
/*
2+
* Copyright DataStax, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package io.github.jbellis.jvector.example;
18+
19+
import com.fasterxml.jackson.databind.ObjectMapper;
20+
import io.github.jbellis.jvector.example.util.BenchmarkSummarizer;
21+
import io.github.jbellis.jvector.example.util.BenchmarkSummarizer.SummaryStats;
22+
import io.github.jbellis.jvector.example.util.DataSet;
23+
import io.github.jbellis.jvector.example.util.DataSetLoader;
24+
import io.github.jbellis.jvector.example.yaml.DatasetCollection;
25+
import io.github.jbellis.jvector.example.yaml.MultiConfig;
26+
import io.github.jbellis.jvector.graph.disk.feature.FeatureId;
27+
28+
import java.io.File;
29+
import java.io.IOException;
30+
import java.util.ArrayList;
31+
import java.util.Arrays;
32+
import java.util.List;
33+
import java.util.regex.Pattern;
34+
import java.util.stream.Collectors;
35+
36+
/**
37+
* Automated benchmark runner for GitHub Actions workflow.
38+
* This class is specifically designed to handle the --output argument
39+
* for regression testing in the run-bench.yml workflow.
40+
*/
41+
public class AutoBenchYAML {
42+
public static void main(String[] args) throws IOException {
43+
// Check for --output argument (required for this class)
44+
String outputPath = null;
45+
for (int i = 0; i < args.length - 1; i++) {
46+
if (args[i].equals("--output")) outputPath = args[i+1];
47+
}
48+
49+
if (outputPath == null) {
50+
System.err.println("Error: --output argument is required for AutoBenchYAML");
51+
System.exit(1);
52+
}
53+
54+
System.out.println("Heap space available is " + Runtime.getRuntime().maxMemory());
55+
56+
// Filter out --output and its argument from the args
57+
String finalOutputPath = outputPath;
58+
String[] filteredArgs = Arrays.stream(args)
59+
.filter(arg -> !arg.equals("--output") && !arg.equals(finalOutputPath))
60+
.toArray(String[]::new);
61+
62+
// generate a regex that matches any regex in filteredArgs, or if filteredArgs is empty/null, match everything
63+
var regex = filteredArgs.length == 0 ? ".*" : Arrays.stream(filteredArgs).flatMap(s -> Arrays.stream(s.split("\\s"))).map(s -> "(?:" + s + ")").collect(Collectors.joining("|"));
64+
// compile regex and do substring matching using find
65+
var pattern = Pattern.compile(regex);
66+
67+
var datasetCollection = DatasetCollection.load();
68+
var datasetNames = datasetCollection.getAll().stream().filter(dn -> pattern.matcher(dn).find()).collect(Collectors.toList());
69+
70+
System.out.println("Executing the following datasets: " + datasetNames);
71+
List<BenchResult> results = new ArrayList<>();
72+
73+
// Process datasets from regex patterns
74+
if (!datasetNames.isEmpty()) {
75+
for (var datasetName : datasetNames) {
76+
DataSet ds = DataSetLoader.loadDataSet(datasetName);
77+
78+
if (datasetName.endsWith(".hdf5")) {
79+
datasetName = datasetName.substring(0, datasetName.length() - ".hdf5".length());
80+
}
81+
MultiConfig config = MultiConfig.getDefaultConfig(datasetName);
82+
83+
results.addAll(Grid.runAllAndCollectResults(ds,
84+
config.construction.outDegree,
85+
config.construction.efConstruction,
86+
config.construction.neighborOverflow,
87+
config.construction.addHierarchy,
88+
config.construction.getFeatureSets(),
89+
config.construction.getCompressorParameters(),
90+
config.search.getCompressorParameters(),
91+
config.search.topKOverquery,
92+
config.search.useSearchPruning));
93+
}
94+
}
95+
96+
// Process YAML configuration files
97+
List<String> configNames = Arrays.stream(filteredArgs).filter(s -> s.endsWith(".yml")).collect(Collectors.toList());
98+
if (!configNames.isEmpty()) {
99+
for (var configName : configNames) {
100+
MultiConfig config = MultiConfig.getConfig(configName);
101+
String datasetName = config.dataset;
102+
103+
DataSet ds = DataSetLoader.loadDataSet(datasetName);
104+
105+
results.addAll(Grid.runAllAndCollectResults(ds,
106+
config.construction.outDegree,
107+
config.construction.efConstruction,
108+
config.construction.neighborOverflow,
109+
config.construction.addHierarchy,
110+
config.construction.getFeatureSets(),
111+
config.construction.getCompressorParameters(),
112+
config.search.getCompressorParameters(),
113+
config.search.topKOverquery,
114+
config.search.useSearchPruning));
115+
}
116+
}
117+
118+
// Calculate summary statistics
119+
SummaryStats stats = BenchmarkSummarizer.summarize(results);
120+
System.out.println(stats.toString());
121+
122+
// Write results to JSON file
123+
ObjectMapper mapper = new ObjectMapper();
124+
mapper.writerWithDefaultPrettyPrinter().writeValue(new File(outputPath), results);
125+
System.out.println("Benchmark results written to " + outputPath);
126+
}
127+
}

0 commit comments

Comments
 (0)