@@ -96,6 +96,9 @@ var flagDBConnStr = flag.String("db", "postgresql://root@localhost:26257",
96
96
// laion-1m-test-ip (1M vectors, 768 dims)
97
97
// coco-t2i-512-angular (113K vectors, 512 dims)
98
98
// coco-i2i-512-angular (113K vectors, 512 dims)
99
+ // wiki-cohere-768-100k-angular (100K vectors, 768 dims)
100
+ // wiki-cohere-768-1m-angular (1M vectors, 768 dims)
101
+ // wiki-cohere-768-10m-angular (10M vectors, 768 dims)
99
102
//
100
103
// After download, the datasets are cached in a local temp directory and a
101
104
// vector index is created. The built vector index is also cached in the temp
@@ -244,21 +247,29 @@ func (vb *vectorBench) SearchIndex() {
244
247
panic (err )
245
248
}
246
249
247
- start := timeutil .Now ()
250
+ // Create percentile estimator for search latencies.
251
+ latencyEstimator := NewPercentileEstimator (1000 )
248
252
249
253
// Search for test vectors.
250
- var sumRecall , sumVectors , sumLeafVectors , sumFullVectors , sumPartitions float64
254
+ var sumRecall , sumVectors , sumLeafVectors , sumFullVectors , sumPartitions , sumElapsed float64
251
255
count := vb .data .Test .Count
252
256
for i := range count {
253
257
// Calculate truth set for the vector.
254
258
queryVector := vb .data .Test .At (i )
255
259
260
+ // Time individual search.
261
+ start := timeutil .Now ()
256
262
var stats cspann.SearchStats
257
263
prediction , err := vb .provider .Search (vb .ctx , state , queryVector , & stats )
258
264
if err != nil {
259
265
panic (err )
260
266
}
261
267
268
+ // Record latency in seconds.
269
+ elapsed := timeutil .Since (start ).Seconds ()
270
+ sumElapsed += elapsed
271
+ latencyEstimator .Add (elapsed )
272
+
262
273
primaryKeys := make ([]byte , maxResults * 4 )
263
274
truth := make ([]cspann.KeyBytes , maxResults )
264
275
for neighbor := range maxResults {
@@ -274,12 +285,18 @@ func (vb *vectorBench) SearchIndex() {
274
285
sumPartitions += float64 (stats .PartitionCount )
275
286
}
276
287
277
- elapsed := timeutil .Since (start )
278
- fmt .Printf ("%d\t %0.2f%%\t %0.0f\t %0.0f\t %0.2f\t %0.2f\t %0.2f\n " ,
288
+ qps := float64 (count ) / sumElapsed
289
+
290
+ // Calculate percentile latencies.
291
+ p50Latency := latencyEstimator .Estimate (0.50 ) * 1000
292
+ p95Latency := latencyEstimator .Estimate (0.95 ) * 1000
293
+ p99Latency := latencyEstimator .Estimate (0.99 ) * 1000
294
+
295
+ fmt .Printf ("%d\t %0.2f%%\t %0.0f\t %0.0f\t %0.2f\t %0.2f\t %0.2f\t %0.2f\t %0.2f\t %0.2f\n " ,
279
296
beamSize , sumRecall / float64 (count )* 100 ,
280
297
sumLeafVectors / float64 (count ), sumVectors / float64 (count ),
281
298
sumFullVectors / float64 (count ), sumPartitions / float64 (count ),
282
- float64 ( count ) / elapsed . Seconds () )
299
+ qps , p50Latency , p95Latency , p99Latency )
283
300
}
284
301
285
302
fmt .Println ()
@@ -290,7 +307,7 @@ func (vb *vectorBench) SearchIndex() {
290
307
minPartitionSize , maxPartitionSize , * flagBeamSize )
291
308
fmt .Println (vb .provider .FormatStats ())
292
309
293
- fmt .Printf ("beam\t recall\t leaf\t all\t full\t partns\t qps\n " )
310
+ fmt .Printf ("beam\t recall\t leaf\t all\t full\t partns\t qps\t p50(ms) \t p95(ms) \t p99(ms) \ n " )
294
311
295
312
// Search multiple times with different search beam sizes.
296
313
beamSizeStrs := strings .Split (* flagSearchBeamSizes , "," )
0 commit comments