1+ /*
2+ * Copyright DataStax, Inc.
3+ *
4+ * Licensed under the Apache License, Version 2.0 (the "License");
5+ * you may not use this file except in compliance with the License.
6+ * You may obtain a copy of the License at
7+ *
8+ * http://www.apache.org/licenses/LICENSE-2.0
9+ *
10+ * Unless required by applicable law or agreed to in writing, software
11+ * distributed under the License is distributed on an "AS IS" BASIS,
12+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+ * See the License for the specific language governing permissions and
14+ * limitations under the License.
15+ */
16+
17+ package io .github .jbellis .jvector .example ;
18+
19+ import com .fasterxml .jackson .databind .ObjectMapper ;
20+ import io .github .jbellis .jvector .example .util .BenchmarkSummarizer ;
21+ import io .github .jbellis .jvector .example .util .BenchmarkSummarizer .SummaryStats ;
22+ import io .github .jbellis .jvector .example .util .DataSet ;
23+ import io .github .jbellis .jvector .example .util .DataSetLoader ;
24+ import io .github .jbellis .jvector .example .yaml .DatasetCollection ;
25+ import io .github .jbellis .jvector .example .yaml .MultiConfig ;
26+ import io .github .jbellis .jvector .graph .disk .feature .FeatureId ;
27+
28+ import java .io .File ;
29+ import java .io .IOException ;
30+ import java .util .ArrayList ;
31+ import java .util .Arrays ;
32+ import java .util .List ;
33+ import java .util .regex .Pattern ;
34+ import java .util .stream .Collectors ;
35+
36+ /**
37+ * Automated benchmark runner for GitHub Actions workflow.
38+ * This class is specifically designed to handle the --output argument
39+ * for regression testing in the run-bench.yml workflow.
40+ */
41+ public class AutoBenchYAML {
42+ public static void main (String [] args ) throws IOException {
43+ // Check for --output argument (required for this class)
44+ String outputPath = null ;
45+ for (int i = 0 ; i < args .length - 1 ; i ++) {
46+ if (args [i ].equals ("--output" )) outputPath = args [i +1 ];
47+ }
48+
49+ if (outputPath == null ) {
50+ System .err .println ("Error: --output argument is required for AutoBenchYAML" );
51+ System .exit (1 );
52+ }
53+
54+ System .out .println ("Heap space available is " + Runtime .getRuntime ().maxMemory ());
55+
56+ // Filter out --output and its argument from the args
57+ String finalOutputPath = outputPath ;
58+ String [] filteredArgs = Arrays .stream (args )
59+ .filter (arg -> !arg .equals ("--output" ) && !arg .equals (finalOutputPath ))
60+ .toArray (String []::new );
61+
62+ // generate a regex that matches any regex in filteredArgs, or if filteredArgs is empty/null, match everything
63+ var regex = filteredArgs .length == 0 ? ".*" : Arrays .stream (filteredArgs ).flatMap (s -> Arrays .stream (s .split ("\\ s" ))).map (s -> "(?:" + s + ")" ).collect (Collectors .joining ("|" ));
64+ // compile regex and do substring matching using find
65+ var pattern = Pattern .compile (regex );
66+
67+ var datasetCollection = DatasetCollection .load ();
68+ var datasetNames = datasetCollection .getAll ().stream ().filter (dn -> pattern .matcher (dn ).find ()).collect (Collectors .toList ());
69+
70+ System .out .println ("Executing the following datasets: " + datasetNames );
71+ List <BenchResult > results = new ArrayList <>();
72+
73+ // Process datasets from regex patterns
74+ if (!datasetNames .isEmpty ()) {
75+ for (var datasetName : datasetNames ) {
76+ DataSet ds = DataSetLoader .loadDataSet (datasetName );
77+
78+ if (datasetName .endsWith (".hdf5" )) {
79+ datasetName = datasetName .substring (0 , datasetName .length () - ".hdf5" .length ());
80+ }
81+ MultiConfig config = MultiConfig .getDefaultConfig (datasetName );
82+
83+ results .addAll (Grid .runAllAndCollectResults (ds ,
84+ config .construction .outDegree ,
85+ config .construction .efConstruction ,
86+ config .construction .neighborOverflow ,
87+ config .construction .addHierarchy ,
88+ config .construction .getFeatureSets (),
89+ config .construction .getCompressorParameters (),
90+ config .search .getCompressorParameters (),
91+ config .search .topKOverquery ,
92+ config .search .useSearchPruning ));
93+ }
94+ }
95+
96+ // Process YAML configuration files
97+ List <String > configNames = Arrays .stream (filteredArgs ).filter (s -> s .endsWith (".yml" )).collect (Collectors .toList ());
98+ if (!configNames .isEmpty ()) {
99+ for (var configName : configNames ) {
100+ MultiConfig config = MultiConfig .getConfig (configName );
101+ String datasetName = config .dataset ;
102+
103+ DataSet ds = DataSetLoader .loadDataSet (datasetName );
104+
105+ results .addAll (Grid .runAllAndCollectResults (ds ,
106+ config .construction .outDegree ,
107+ config .construction .efConstruction ,
108+ config .construction .neighborOverflow ,
109+ config .construction .addHierarchy ,
110+ config .construction .getFeatureSets (),
111+ config .construction .getCompressorParameters (),
112+ config .search .getCompressorParameters (),
113+ config .search .topKOverquery ,
114+ config .search .useSearchPruning ));
115+ }
116+ }
117+
118+ // Calculate summary statistics
119+ SummaryStats stats = BenchmarkSummarizer .summarize (results );
120+ System .out .println (stats .toString ());
121+
122+ // Write results to JSON file
123+ ObjectMapper mapper = new ObjectMapper ();
124+ mapper .writerWithDefaultPrettyPrinter ().writeValue (new File (outputPath ), results );
125+ System .out .println ("Benchmark results written to " + outputPath );
126+ }
127+ }
0 commit comments