2121import io .github .jbellis .jvector .example .util .BenchmarkSummarizer .SummaryStats ;
2222import io .github .jbellis .jvector .example .util .DataSet ;
2323import io .github .jbellis .jvector .example .util .DataSetLoader ;
24+ import io .github .jbellis .jvector .example .yaml .ConstructionParameters ;
2425import io .github .jbellis .jvector .example .yaml .MultiConfig ;
26+ import io .github .jbellis .jvector .example .yaml .SearchParameters ;
2527import io .github .jbellis .jvector .graph .disk .feature .FeatureId ;
2628
2729import org .slf4j .Logger ;
2830import org .slf4j .LoggerFactory ;
2931
3032import java .io .File ;
33+ import java .io .FileWriter ;
3134import java .io .IOException ;
3235import java .util .ArrayList ;
3336import java .util .Arrays ;
3437import java .util .List ;
38+ import java .util .Map ;
3539import java .util .regex .Pattern ;
3640import java .util .stream .Collectors ;
3741
@@ -53,7 +57,7 @@ private static List<String> getAllDatasetNames() {
5357 // neighborhood-watch-100k datasets
5458// allDatasets.add("ada002-100k");
5559 allDatasets .add ("cohere-english-v3-100k" );
56- // allDatasets.add("openai-v3-small-100k");
60+ allDatasets .add ("openai-v3-small-100k" );
5761// allDatasets.add("gecko-100k");
5862// allDatasets.add("openai-v3-large-3072-100k");
5963// allDatasets.add("openai-v3-large-1536-100k");
@@ -122,7 +126,8 @@ public static void main(String[] args) throws IOException {
122126 datasetName = datasetName .substring (0 , datasetName .length () - ".hdf5" .length ());
123127 }
124128
125- MultiConfig config = MultiConfig .getDefaultConfig (datasetName );
129+ MultiConfig config = MultiConfig .getDefaultConfig ("autoDefault" );
130+ config .dataset = datasetName ;
126131 logger .info ("Using configuration: {}" , config );
127132
128133 results .addAll (Grid .runAllAndCollectResults (ds ,
@@ -143,50 +148,39 @@ public static void main(String[] args) throws IOException {
143148 }
144149 }
145150
146- // Process YAML configuration files
147- List <String > configNames = Arrays .stream (filteredArgs ).filter (s -> s .endsWith (".yml" )).collect (Collectors .toList ());
148- if (!configNames .isEmpty ()) {
149- for (var configName : configNames ) {
150- logger .info ("Processing configuration file: {}" , configName );
151-
152- try {
153- MultiConfig config = MultiConfig .getConfig (configName );
154- String datasetName = config .dataset ;
155- logger .info ("Configuration specifies dataset: {}" , datasetName );
156-
157- logger .info ("Loading dataset: {}" , datasetName );
158- DataSet ds = DataSetLoader .loadDataSet (datasetName );
159- logger .info ("Dataset loaded: {} with {} vectors" , datasetName , ds .baseVectors .size ());
160-
161- results .addAll (Grid .runAllAndCollectResults (ds ,
162- config .construction .outDegree ,
163- config .construction .efConstruction ,
164- config .construction .neighborOverflow ,
165- config .construction .addHierarchy ,
166- config .construction .getFeatureSets (),
167- config .construction .getCompressorParameters (),
168- config .search .getCompressorParameters (),
169- config .search .topKOverquery ,
170- config .search .useSearchPruning ));
171-
172- logger .info ("Benchmark completed for YAML config: {}" , configName );
173- } catch (Exception e ) {
174- logger .error ("Exception while processing YAML config {}" , configName , e );
175- }
176- }
177- }
178-
179151 // Calculate summary statistics
180152 try {
181153 SummaryStats stats = BenchmarkSummarizer .summarize (results );
182154 logger .info ("Benchmark summary: {}" , stats .toString ());
183155
184- // Write results to JSON file
156+ // Write results to csv file and details to json
157+ File detailsFile = new File (outputPath + ".json" );
185158 ObjectMapper mapper = new ObjectMapper ();
186- File outputFile = new File ( outputPath );
187- mapper . writerWithDefaultPrettyPrinter (). writeValue ( outputFile , results );
188- logger . info ( "Benchmark results written to {} (file exists: {})" , outputPath , outputFile . exists () );
159+ mapper . writerWithDefaultPrettyPrinter (). writeValue ( detailsFile , results );
160+
161+ File outputFile = new File ( outputPath + ".csv" );
189162
163+ // Get summary statistics by dataset
164+ Map <String , SummaryStats > statsByDataset = BenchmarkSummarizer .summarizeByDataset (results );
165+
166+ // Write CSV data
167+ try (FileWriter writer = new FileWriter (outputFile )) {
168+ // Write CSV header
169+ writer .write ("dataset,QPS,Mean Latency,Recall@10\n " );
170+
171+ // Write one row per dataset with average metrics
172+ for (Map .Entry <String , SummaryStats > entry : statsByDataset .entrySet ()) {
173+ String dataset = entry .getKey ();
174+ SummaryStats datasetStats = entry .getValue ();
175+
176+ writer .write (dataset + "," );
177+ writer .write (datasetStats .getAvgQps () + "," );
178+ writer .write (datasetStats .getAvgLatency () + "," );
179+ writer .write (datasetStats .getAvgRecall () + "\n " );
180+ }
181+ }
182+
183+ logger .info ("Benchmark results written to {} (file exists: {})" , outputPath , outputFile .exists ());
190184 // Double check that the file was created and log its size
191185 if (outputFile .exists ()) {
192186 logger .info ("Output file size: {} bytes" , outputFile .length ());
@@ -197,4 +191,5 @@ public static void main(String[] args) throws IOException {
197191 logger .error ("Exception during final processing" , e );
198192 }
199193 }
194+
200195}
0 commit comments