2121import io .github .jbellis .jvector .example .util .BenchmarkSummarizer .SummaryStats ;
2222import io .github .jbellis .jvector .example .util .DataSet ;
2323import io .github .jbellis .jvector .example .util .DataSetLoader ;
24- import io .github .jbellis .jvector .example .yaml .DatasetCollection ;
2524import io .github .jbellis .jvector .example .yaml .MultiConfig ;
2625import io .github .jbellis .jvector .graph .disk .feature .FeatureId ;
2726
3938 * for regression testing in the run-bench.yml workflow.
4039 */
4140public class AutoBenchYAML {
41+ /**
42+ * Returns a list of all dataset names.
43+ * This replaces the need to load datasets.yml which may not be available in all environments.
44+ */
45+ private static List <String > getAllDatasetNames () {
46+ List <String > allDatasets = new ArrayList <>();
47+
48+ // neighborhood-watch-100k datasets
49+ // allDatasets.add("ada002-100k");
50+ // allDatasets.add("cohere-english-v3-100k");
51+ // allDatasets.add("openai-v3-small-100k");
52+ // allDatasets.add("gecko-100k");
53+ // allDatasets.add("openai-v3-large-3072-100k");
54+ // allDatasets.add("openai-v3-large-1536-100k");
55+ // allDatasets.add("e5-small-v2-100k");
56+ // allDatasets.add("e5-base-v2-100k");
57+ // allDatasets.add("e5-large-v2-100k");
58+ //
59+ // // neighborhood-watch-1M datasets
60+ // allDatasets.add("ada002-1M");
61+ // allDatasets.add("colbert-1M");
62+
63+ // ann-benchmarks datasets
64+ allDatasets .add ("glove-25-angular.hdf5" );
65+ allDatasets .add ("glove-50-angular.hdf5" );
66+ allDatasets .add ("lastfm-64-dot.hdf5" );
67+ allDatasets .add ("glove-100-angular.hdf5" );
68+ allDatasets .add ("glove-200-angular.hdf5" );
69+ allDatasets .add ("nytimes-256-angular.hdf5" );
70+ allDatasets .add ("sift-128-euclidean.hdf5" );
71+ // Large files not yet supported:
72+ // allDatasets.add("deep-image-96-angular.hdf5");
73+ // allDatasets.add("gist-960-euclidean.hdf5");
74+
75+ return allDatasets ;
76+ }
77+
4278 public static void main (String [] args ) throws IOException {
4379 // Check for --output argument (required for this class)
4480 String outputPath = null ;
@@ -64,8 +100,7 @@ public static void main(String[] args) throws IOException {
64100 // compile regex and do substring matching using find
65101 var pattern = Pattern .compile (regex );
66102
67- var datasetCollection = DatasetCollection .load ();
68- var datasetNames = datasetCollection .getAll ().stream ().filter (dn -> pattern .matcher (dn ).find ()).collect (Collectors .toList ());
103+ var datasetNames = getAllDatasetNames ().stream ().filter (dn -> pattern .matcher (dn ).find ()).collect (Collectors .toList ());
69104
70105 System .out .println ("Executing the following datasets: " + datasetNames );
71106 List <BenchResult > results = new ArrayList <>();
0 commit comments