Skip to content

Commit ae03ef7

Browse files
committed
adding memory and disk usage stats to bench tests
1 parent 55f902f commit ae03ef7

File tree

5 files changed

+368
-18
lines changed

5 files changed

+368
-18
lines changed

jvector-examples/src/main/java/io/github/jbellis/jvector/example/Grid.java

Lines changed: 61 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,13 @@ public class Grid {
8787

8888
private static int diagnostic_level;
8989

90+
/**
91+
* Get the index build time for a dataset
92+
*/
93+
public static Double getIndexBuildTime(String datasetName) {
94+
return indexBuildTimes.get(datasetName);
95+
}
96+
9097
static void runAll(DataSet ds,
9198
List<Integer> mGrid,
9299
List<Integer> efConstructionGrid,
@@ -158,13 +165,21 @@ static void runOneGraph(List<? extends Set<FeatureId>> featureSets,
158165
DataSet ds,
159166
Path testDirectory) throws IOException
160167
{
168+
// Capture initial memory and disk state
169+
var diagnostics = new io.github.jbellis.jvector.example.benchmarks.diagnostics.BenchmarkDiagnostics(getDiagnosticLevel());
170+
diagnostics.setMonitoredDirectory(testDirectory);
171+
diagnostics.capturePrePhaseSnapshot("Graph Build");
172+
161173
Map<Set<FeatureId>, ImmutableGraphIndex> indexes;
162174
if (buildCompressor == null) {
163175
indexes = buildInMemory(featureSets, M, efConstruction, neighborOverflow, addHierarchy, refineFinalGraph, ds, testDirectory);
164176
} else {
165177
indexes = buildOnDisk(featureSets, M, efConstruction, neighborOverflow, addHierarchy, refineFinalGraph, ds, testDirectory, buildCompressor);
166178
}
167179

180+
// Capture post-build memory and disk state
181+
diagnostics.capturePostPhaseSnapshot("Graph Build");
182+
168183
try {
169184
for (var cpSupplier : compressionGrid) {
170185
indexes.forEach((features, index) -> {
@@ -188,7 +203,7 @@ static void runOneGraph(List<? extends Set<FeatureId>> featureSets,
188203
}
189204

190205
try (var cs = new ConfiguredSystem(ds, index, cv, featureSetForIndex)) {
191-
testConfiguration(cs, topKGrid, usePruningGrid, M, efConstruction, neighborOverflow, addHierarchy, benchmarks);
206+
testConfiguration(cs, topKGrid, usePruningGrid, M, efConstruction, neighborOverflow, addHierarchy, benchmarks, testDirectory);
192207
} catch (Exception e) {
193208
throw new RuntimeException(e);
194209
}
@@ -197,6 +212,11 @@ static void runOneGraph(List<? extends Set<FeatureId>> featureSets,
197212
for (var index : indexes.values()) {
198213
index.close();
199214
}
215+
216+
// Log final diagnostics summary
217+
if (diagnostic_level > 0) {
218+
diagnostics.logSummary();
219+
}
200220
} finally {
201221
for (int n = 0; n < featureSets.size(); n++) {
202222
Files.deleteIfExists(testDirectory.resolve("graph" + n));
@@ -432,13 +452,14 @@ private static void testConfiguration(ConfiguredSystem cs,
432452
int efConstruction,
433453
float neighborOverflow,
434454
boolean addHierarchy,
435-
Map<String, List<String>> benchmarkSpec) {
455+
Map<String, List<String>> benchmarkSpec,
456+
Path testDirectory) {
436457
int queryRuns = 2;
437458
System.out.format("Using %s:%n", cs.index);
438459
// 1) Select benchmarks to run. Use .createDefault or .createEmpty (for other options)
439460

440461
var benchmarks = setupBenchmarks(benchmarkSpec);
441-
QueryTester tester = new QueryTester(benchmarks);
462+
QueryTester tester = new QueryTester(benchmarks, testDirectory, cs.ds.name);
442463

443464
// 2) Setup benchmark table for printing
444465
for (var topK : topKGrid.keySet()) {
@@ -563,11 +584,22 @@ public static List<BenchResult> runAllAndCollectResults(
563584
for (Function<DataSet, CompressorParameters> searchCompressor : compressionGrid) {
564585
Path testDirectory = Files.createTempDirectory("bench");
565586
try {
587+
// Capture initial state
588+
var diagnostics = new io.github.jbellis.jvector.example.benchmarks.diagnostics.BenchmarkDiagnostics(getDiagnosticLevel());
589+
diagnostics.setMonitoredDirectory(testDirectory);
590+
diagnostics.capturePrePhaseSnapshot("Build");
591+
566592
var compressor = getCompressor(buildCompressor, ds);
567593
var searchCompressorObj = getCompressor(searchCompressor, ds);
568594
CompressedVectors cvArg = (searchCompressorObj instanceof CompressedVectors) ? (CompressedVectors) searchCompressorObj : null;
569595
var indexes = buildOnDisk(List.of(features), m, ef, neighborOverflow, addHierarchy, false, ds, testDirectory, compressor);
570596
ImmutableGraphIndex index = indexes.get(features);
597+
598+
// Capture post-build state
599+
diagnostics.capturePostPhaseSnapshot("Build");
600+
var buildSnapshot = diagnostics.getLatestSystemSnapshot();
601+
var buildDiskSnapshot = diagnostics.getLatestDiskSnapshot();
602+
571603
try (ConfiguredSystem cs = new ConfiguredSystem(ds, index, cvArg, features)) {
572604
int queryRuns = 2;
573605
List<QueryBenchmark> benchmarks = List.of(
@@ -578,7 +610,7 @@ public static List<BenchResult> runAllAndCollectResults(
578610
CountBenchmark.createDefault(),
579611
AccuracyBenchmark.createDefault()
580612
);
581-
QueryTester tester = new QueryTester(benchmarks);
613+
QueryTester tester = new QueryTester(benchmarks, testDirectory, ds.name);
582614
for (int topK : topKGrid.keySet()) {
583615
for (boolean usePruning : usePruningGrid) {
584616
for (double overquery : topKGrid.get(topK)) {
@@ -596,11 +628,33 @@ public static List<BenchResult> runAllAndCollectResults(
596628
"overquery", overquery,
597629
"usePruning", usePruning
598630
);
631+
// Collect all metrics including memory and disk usage
632+
Map<String, Object> allMetrics = new HashMap<>();
599633
for (Metric metric : metricsList) {
600-
Map<String, Object> metrics = java.util.Map.of(metric.getHeader(), metric.getValue());
601-
results.add(new BenchResult(ds.name, params, metrics));
634+
allMetrics.put(metric.getHeader(), metric.getValue());
635+
}
636+
637+
// Add build time if available
638+
if (indexBuildTimes.containsKey(ds.name)) {
639+
allMetrics.put("Index Build Time", indexBuildTimes.get(ds.name));
640+
}
641+
642+
// Add memory metrics if available
643+
if (buildSnapshot != null) {
644+
allMetrics.put("Heap Memory Used (MB)", buildSnapshot.memoryStats.heapUsed / 1024.0 / 1024.0);
645+
allMetrics.put("Heap Memory Max (MB)", buildSnapshot.memoryStats.heapMax / 1024.0 / 1024.0);
646+
allMetrics.put("Off-Heap Direct (MB)", buildSnapshot.memoryStats.directBufferMemory / 1024.0 / 1024.0);
647+
allMetrics.put("Off-Heap Mapped (MB)", buildSnapshot.memoryStats.mappedBufferMemory / 1024.0 / 1024.0);
648+
allMetrics.put("Total Off-Heap (MB)", buildSnapshot.memoryStats.getTotalOffHeapMemory() / 1024.0 / 1024.0);
649+
}
650+
651+
// Add disk metrics if available
652+
if (buildDiskSnapshot != null) {
653+
allMetrics.put("Disk Usage (MB)", buildDiskSnapshot.totalBytes / 1024.0 / 1024.0);
654+
allMetrics.put("File Count", buildDiskSnapshot.fileCount);
602655
}
603-
results.add(new BenchResult(ds.name, params, Map.of("Index Build Time", indexBuildTimes.get(ds.name))));
656+
657+
results.add(new BenchResult(ds.name, params, allMetrics));
604658
}
605659
}
606660
}

jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/QueryTester.java

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,25 +16,49 @@
1616

1717
package io.github.jbellis.jvector.example.benchmarks;
1818

19+
import java.nio.file.Path;
1920
import java.util.ArrayList;
2021
import java.util.LinkedHashMap;
2122
import java.util.List;
2223
import java.util.Map;
2324

25+
import io.github.jbellis.jvector.example.Grid;
2426
import io.github.jbellis.jvector.example.Grid.ConfiguredSystem;
27+
import io.github.jbellis.jvector.example.benchmarks.diagnostics.BenchmarkDiagnostics;
2528

2629
/**
2730
* Orchestrates running a set of QueryBenchmark instances
2831
* and collects their summary results.
2932
*/
3033
public class QueryTester {
3134
private final List<QueryBenchmark> benchmarks;
35+
private final Path monitoredDirectory;
36+
private final String datasetName;
3237

3338
/**
3439
* @param benchmarks the benchmarks to run, in the order provided
3540
*/
3641
public QueryTester(List<QueryBenchmark> benchmarks) {
42+
this(benchmarks, null, null);
43+
}
44+
45+
/**
46+
* @param benchmarks the benchmarks to run, in the order provided
47+
* @param monitoredDirectory optional directory to monitor for disk usage
48+
*/
49+
public QueryTester(List<QueryBenchmark> benchmarks, Path monitoredDirectory) {
50+
this(benchmarks, monitoredDirectory, null);
51+
}
52+
53+
/**
54+
* @param benchmarks the benchmarks to run, in the order provided
55+
* @param monitoredDirectory optional directory to monitor for disk usage
56+
* @param datasetName optional dataset name for retrieving build time
57+
*/
58+
public QueryTester(List<QueryBenchmark> benchmarks, Path monitoredDirectory, String datasetName) {
3759
this.benchmarks = benchmarks;
60+
this.monitoredDirectory = monitoredDirectory;
61+
this.datasetName = datasetName;
3862
}
3963

4064
/**
@@ -56,11 +80,52 @@ public List<Metric> run(
5680

5781
List<Metric> results = new ArrayList<>();
5882

83+
// Capture memory and disk usage before running queries
84+
// Use NONE level to suppress logging output that would break the table
85+
var diagnostics = new BenchmarkDiagnostics(io.github.jbellis.jvector.example.benchmarks.diagnostics.DiagnosticLevel.NONE);
86+
if (monitoredDirectory != null) {
87+
diagnostics.setMonitoredDirectory(monitoredDirectory);
88+
}
89+
diagnostics.capturePrePhaseSnapshot("Query");
90+
5991
for (var benchmark : benchmarks) {
6092
var metrics = benchmark.runBenchmark(cs, topK, rerankK, usePruning, queryRuns);
6193
results.addAll(metrics);
6294
}
6395

96+
// Capture memory and disk usage after running queries
97+
diagnostics.capturePostPhaseSnapshot("Query");
98+
99+
// Add memory and disk metrics to results
100+
var systemSnapshot = diagnostics.getLatestSystemSnapshot();
101+
var diskSnapshot = diagnostics.getLatestDiskSnapshot();
102+
103+
if (systemSnapshot != null) {
104+
// Max heap usage in MB
105+
results.add(Metric.of("Max heap usage", ".1f",
106+
systemSnapshot.memoryStats.heapUsed / (1024.0 * 1024.0)));
107+
108+
// Max off-heap usage (direct + mapped) in MB
109+
results.add(Metric.of("Max offheap usage", ".1f",
110+
systemSnapshot.memoryStats.getTotalOffHeapMemory() / (1024.0 * 1024.0)));
111+
}
112+
113+
if (diskSnapshot != null) {
114+
// Total file size in MB
115+
results.add(Metric.of("Total file size", ".1f",
116+
diskSnapshot.totalBytes / (1024.0 * 1024.0)));
117+
118+
// Number of files
119+
results.add(Metric.of("Number of files", ".0f",
120+
(double) diskSnapshot.fileCount));
121+
}
122+
123+
// Add index build time if available
124+
if (datasetName != null && Grid.getIndexBuildTime(datasetName) != null) {
125+
results.add(Metric.of("Index build time", ".2f",
126+
Grid.getIndexBuildTime(datasetName)));
127+
}
128+
64129
return results;
65130
}
66131
}

0 commit comments

Comments
 (0)