Skip to content

Commit 849fe70

Browse files
committed
update to create csv and script to create viz
1 parent 6b0197c commit 849fe70

File tree

5 files changed

+447
-77
lines changed

5 files changed

+447
-77
lines changed

.github/workflows/run-bench.yml

Lines changed: 34 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,7 @@ on:
44
workflow_dispatch:
55
inputs:
66
benchmark_config:
7-
description: 'Benchmark configuration file (leave empty for default)'
8-
required: false
9-
default: ''
10-
jdk_version:
11-
description: 'Override JDK version (leave empty to use matrix)'
7+
description: 'Benchmark dataset regex (leave empty for all)'
128
required: false
139
default: ''
1410
push:
@@ -64,7 +60,7 @@ jobs:
6460
# Use the jar-with-dependencies which includes all required dependencies
6561
java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \
6662
${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \
67-
-cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output bench-results.json ${{ inputs.benchmark_config != '' && inputs.benchmark_config || 'jvector-examples/yaml-configs/default.yml' }}
63+
-cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output bench-results
6864
6965
# List files in current directory to help with debugging
7066
echo "Files in current directory:"
@@ -76,37 +72,37 @@ jobs:
7672
name: bench-results-${{ matrix.isa }}-jdk${{ matrix.jdk }}
7773
path: |
7874
bench-results.json
79-
bench-results.log
75+
bench-results.csv
8076
if-no-files-found: warn
8177

82-
- name: Download Previous Benchmark Results
83-
uses: dawidd6/action-download-artifact@v2
84-
continue-on-error: true
85-
with:
86-
workflow: run-bench.yml
87-
name: bench-results-${{ matrix.isa }}-jdk${{ matrix.jdk }}
88-
path: previous-results
89-
skip_unpack: false
90-
if_no_artifact_found: warn
91-
92-
- name: Set up Python
93-
uses: actions/setup-python@v4
94-
with:
95-
python-version: '3.x'
96-
97-
- name: Install Python Dependencies
98-
run: |
99-
python -m pip install --upgrade pip
100-
pip install argparse
101-
102-
- name: Compare Benchmark Results
103-
if: success() && hashFiles('previous-results/bench-results.json') != ''
104-
run: |
105-
python compare_benchmarks.py bench-results.json previous-results/bench-results.json --output benchmark-comparison.md
106-
107-
- name: Upload Comparison Report
108-
if: success() && hashFiles('benchmark-comparison.md') != ''
109-
uses: actions/upload-artifact@v4
110-
with:
111-
name: benchmark-comparison-${{ matrix.isa }}-jdk${{ matrix.jdk }}
112-
path: benchmark-comparison.md
78+
# - name: Download Previous Benchmark Results
79+
# uses: dawidd6/action-download-artifact@v2
80+
# continue-on-error: true
81+
# with:
82+
# workflow: run-bench.yml
83+
# name: bench-results-${{ matrix.isa }}-jdk${{ matrix.jdk }}
84+
# path: previous-results
85+
# skip_unpack: false
86+
# if_no_artifact_found: warn
87+
#
88+
# - name: Set up Python
89+
# uses: actions/setup-python@v4
90+
# with:
91+
# python-version: '3.x'
92+
#
93+
# - name: Install Python Dependencies
94+
# run: |
95+
# python -m pip install --upgrade pip
96+
# pip install argparse
97+
#
98+
# - name: Compare Benchmark Results
99+
# if: success() && hashFiles('previous-results/bench-results.json') != ''
100+
# run: |
101+
# python compare_benchmarks.py bench-results.json previous-results/bench-results.json --output benchmark-comparison.md
102+
#
103+
# - name: Upload Comparison Report
104+
# if: success() && hashFiles('benchmark-comparison.md') != ''
105+
# uses: actions/upload-artifact@v4
106+
# with:
107+
# name: benchmark-comparison-${{ matrix.isa }}-jdk${{ matrix.jdk }}
108+
# path: benchmark-comparison.md

jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java

Lines changed: 34 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,21 @@
2121
import io.github.jbellis.jvector.example.util.BenchmarkSummarizer.SummaryStats;
2222
import io.github.jbellis.jvector.example.util.DataSet;
2323
import io.github.jbellis.jvector.example.util.DataSetLoader;
24+
import io.github.jbellis.jvector.example.yaml.ConstructionParameters;
2425
import io.github.jbellis.jvector.example.yaml.MultiConfig;
26+
import io.github.jbellis.jvector.example.yaml.SearchParameters;
2527
import io.github.jbellis.jvector.graph.disk.feature.FeatureId;
2628

2729
import org.slf4j.Logger;
2830
import org.slf4j.LoggerFactory;
2931

3032
import java.io.File;
33+
import java.io.FileWriter;
3134
import java.io.IOException;
3235
import java.util.ArrayList;
3336
import java.util.Arrays;
3437
import java.util.List;
38+
import java.util.Map;
3539
import java.util.regex.Pattern;
3640
import java.util.stream.Collectors;
3741

@@ -53,7 +57,7 @@ private static List<String> getAllDatasetNames() {
5357
// neighborhood-watch-100k datasets
5458
// allDatasets.add("ada002-100k");
5559
allDatasets.add("cohere-english-v3-100k");
56-
// allDatasets.add("openai-v3-small-100k");
60+
allDatasets.add("openai-v3-small-100k");
5761
// allDatasets.add("gecko-100k");
5862
// allDatasets.add("openai-v3-large-3072-100k");
5963
// allDatasets.add("openai-v3-large-1536-100k");
@@ -122,7 +126,8 @@ public static void main(String[] args) throws IOException {
122126
datasetName = datasetName.substring(0, datasetName.length() - ".hdf5".length());
123127
}
124128

125-
MultiConfig config = MultiConfig.getDefaultConfig(datasetName);
129+
MultiConfig config = MultiConfig.getDefaultConfig("autoDefault");
130+
config.dataset = datasetName;
126131
logger.info("Using configuration: {}", config);
127132

128133
results.addAll(Grid.runAllAndCollectResults(ds,
@@ -143,50 +148,39 @@ public static void main(String[] args) throws IOException {
143148
}
144149
}
145150

146-
// Process YAML configuration files
147-
List<String> configNames = Arrays.stream(filteredArgs).filter(s -> s.endsWith(".yml")).collect(Collectors.toList());
148-
if (!configNames.isEmpty()) {
149-
for (var configName : configNames) {
150-
logger.info("Processing configuration file: {}", configName);
151-
152-
try {
153-
MultiConfig config = MultiConfig.getConfig(configName);
154-
String datasetName = config.dataset;
155-
logger.info("Configuration specifies dataset: {}", datasetName);
156-
157-
logger.info("Loading dataset: {}", datasetName);
158-
DataSet ds = DataSetLoader.loadDataSet(datasetName);
159-
logger.info("Dataset loaded: {} with {} vectors", datasetName, ds.baseVectors.size());
160-
161-
results.addAll(Grid.runAllAndCollectResults(ds,
162-
config.construction.outDegree,
163-
config.construction.efConstruction,
164-
config.construction.neighborOverflow,
165-
config.construction.addHierarchy,
166-
config.construction.getFeatureSets(),
167-
config.construction.getCompressorParameters(),
168-
config.search.getCompressorParameters(),
169-
config.search.topKOverquery,
170-
config.search.useSearchPruning));
171-
172-
logger.info("Benchmark completed for YAML config: {}", configName);
173-
} catch (Exception e) {
174-
logger.error("Exception while processing YAML config {}", configName, e);
175-
}
176-
}
177-
}
178-
179151
// Calculate summary statistics
180152
try {
181153
SummaryStats stats = BenchmarkSummarizer.summarize(results);
182154
logger.info("Benchmark summary: {}", stats.toString());
183155

184-
// Write results to JSON file
156+
// Write results to csv file and details to json
157+
File detailsFile = new File(outputPath + ".json");
185158
ObjectMapper mapper = new ObjectMapper();
186-
File outputFile = new File(outputPath);
187-
mapper.writerWithDefaultPrettyPrinter().writeValue(outputFile, results);
188-
logger.info("Benchmark results written to {} (file exists: {})", outputPath, outputFile.exists());
159+
mapper.writerWithDefaultPrettyPrinter().writeValue(detailsFile, results);
160+
161+
File outputFile = new File(outputPath + ".csv");
189162

163+
// Get summary statistics by dataset
164+
Map<String, SummaryStats> statsByDataset = BenchmarkSummarizer.summarizeByDataset(results);
165+
166+
// Write CSV data
167+
try (FileWriter writer = new FileWriter(outputFile)) {
168+
// Write CSV header
169+
writer.write("dataset,QPS,Mean Latency,Recall@10\n");
170+
171+
// Write one row per dataset with average metrics
172+
for (Map.Entry<String, SummaryStats> entry : statsByDataset.entrySet()) {
173+
String dataset = entry.getKey();
174+
SummaryStats datasetStats = entry.getValue();
175+
176+
writer.write(dataset + ",");
177+
writer.write(datasetStats.getAvgQps() + ",");
178+
writer.write(datasetStats.getAvgLatency() + ",");
179+
writer.write(datasetStats.getAvgRecall() + "\n");
180+
}
181+
}
182+
183+
logger.info("Benchmark results written to {} (file exists: {})", outputPath, outputFile.exists());
190184
// Double check that the file was created and log its size
191185
if (outputFile.exists()) {
192186
logger.info("Output file size: {} bytes", outputFile.length());
@@ -197,4 +191,5 @@ public static void main(String[] args) throws IOException {
197191
logger.error("Exception during final processing", e);
198192
}
199193
}
194+
200195
}

jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/BenchmarkSummarizer.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,4 +191,35 @@ private static Double convertToDouble(Object value) {
191191
}
192192
return null;
193193
}
194+
195+
/**
196+
* Calculate summary statistics grouped by dataset from a list of benchmark results
197+
* @param results List of benchmark results to summarize
198+
* @return Map of dataset names to their summary statistics
199+
*/
200+
public static Map<String, SummaryStats> summarizeByDataset(List<BenchResult> results) {
201+
if (results == null || results.isEmpty()) {
202+
return Map.of();
203+
}
204+
205+
// Group results by dataset
206+
Map<String, List<BenchResult>> resultsByDataset = new java.util.HashMap<>();
207+
for (BenchResult result : results) {
208+
if (result.dataset == null) continue;
209+
210+
resultsByDataset.computeIfAbsent(result.dataset, k -> new java.util.ArrayList<>()).add(result);
211+
}
212+
213+
// Calculate summary stats for each dataset
214+
Map<String, SummaryStats> statsByDataset = new java.util.HashMap<>();
215+
for (Map.Entry<String, List<BenchResult>> entry : resultsByDataset.entrySet()) {
216+
String dataset = entry.getKey();
217+
List<BenchResult> datasetResults = entry.getValue();
218+
219+
SummaryStats stats = summarize(datasetResults);
220+
statsByDataset.put(dataset, stats);
221+
}
222+
223+
return statsByDataset;
224+
}
194225
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
version: 5
2+
3+
dataset: cohere-english-v3-100k
4+
5+
construction:
6+
outDegree: [32]
7+
efConstruction: [100]
8+
neighborOverflow: [1.2f]
9+
addHierarchy: [Yes]
10+
refineFinalGraph: [Yes]
11+
compression:
12+
- type: PQ
13+
parameters:
14+
m: 192 # we can either specify the integer m or the integer mFactor. In this case, m will be set to the data dimensionality divided by mFactor
15+
# mFactor: 8
16+
# k: 256 # optional parameter. By default, k=256
17+
centerData: No
18+
anisotropicThreshold: -1.0 # optional parameter. By default, anisotropicThreshold=-1 (i.e., no anisotropy)
19+
reranking:
20+
- NVQ
21+
useSavedIndexIfExists: Yes
22+
23+
search:
24+
topKOverquery:
25+
10: [1.0]
26+
useSearchPruning: [Yes]
27+
compression:
28+
- type: PQ
29+
parameters:
30+
m: 192
31+
# k: 256 # optional parameter. By default, k=256
32+
centerData: No
33+
anisotropicThreshold: -1.0 # optional parameter. By default, anisotropicThreshold=-1 (i.e., no anisotropy)

0 commit comments

Comments
 (0)