Skip to content

Commit c9ffd9a

Browse files
craig[bot]andy-kimball
andcommitted
Merge #152983
152983: vecbench: show p50/p95/p99 vector search latencies r=drewkimball a=andy-kimball Calculate the p50/p95/p99 latencies for vector search in the vecbench tool. Print the latencies as part of output. Epic: CRDB-42943 Release note: None Co-authored-by: Andrew Kimball <[email protected]>
2 parents 28ae229 + 56250be commit c9ffd9a

File tree

2 files changed

+24
-6
lines changed

2 files changed

+24
-6
lines changed

pkg/cmd/dev/build.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ var buildTargetMapping = map[string]string{
106106
"sql-bootstrap-data": "//pkg/cmd/sql-bootstrap-data:sql-bootstrap-data",
107107
"staticcheck": "@co_honnef_go_tools//cmd/staticcheck:staticcheck",
108108
"tests": "//pkg:all_tests",
109+
"vecbench": "//pkg/cmd/vecbench:vecbench",
109110
"whoownsit": "//pkg/cmd/whoownsit:whoownsit",
110111
"workload": "//pkg/cmd/workload:workload",
111112
}

pkg/cmd/vecbench/main.go

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,9 @@ var flagDBConnStr = flag.String("db", "postgresql://root@localhost:26257",
9696
// laion-1m-test-ip (1M vectors, 768 dims)
9797
// coco-t2i-512-angular (113K vectors, 512 dims)
9898
// coco-i2i-512-angular (113K vectors, 512 dims)
99+
// wiki-cohere-768-100k-angular (100K vectors, 768 dims)
100+
// wiki-cohere-768-1m-angular (1M vectors, 768 dims)
101+
// wiki-cohere-768-10m-angular (10M vectors, 768 dims)
99102
//
100103
// After download, the datasets are cached in a local temp directory and a
101104
// vector index is created. The built vector index is also cached in the temp
@@ -244,21 +247,29 @@ func (vb *vectorBench) SearchIndex() {
244247
panic(err)
245248
}
246249

247-
start := timeutil.Now()
250+
// Create percentile estimator for search latencies.
251+
latencyEstimator := NewPercentileEstimator(1000)
248252

249253
// Search for test vectors.
250-
var sumRecall, sumVectors, sumLeafVectors, sumFullVectors, sumPartitions float64
254+
var sumRecall, sumVectors, sumLeafVectors, sumFullVectors, sumPartitions, sumElapsed float64
251255
count := vb.data.Test.Count
252256
for i := range count {
253257
// Calculate truth set for the vector.
254258
queryVector := vb.data.Test.At(i)
255259

260+
// Time individual search.
261+
start := timeutil.Now()
256262
var stats cspann.SearchStats
257263
prediction, err := vb.provider.Search(vb.ctx, state, queryVector, &stats)
258264
if err != nil {
259265
panic(err)
260266
}
261267

268+
// Record latency in seconds.
269+
elapsed := timeutil.Since(start).Seconds()
270+
sumElapsed += elapsed
271+
latencyEstimator.Add(elapsed)
272+
262273
primaryKeys := make([]byte, maxResults*4)
263274
truth := make([]cspann.KeyBytes, maxResults)
264275
for neighbor := range maxResults {
@@ -274,12 +285,18 @@ func (vb *vectorBench) SearchIndex() {
274285
sumPartitions += float64(stats.PartitionCount)
275286
}
276287

277-
elapsed := timeutil.Since(start)
278-
fmt.Printf("%d\t%0.2f%%\t%0.0f\t%0.0f\t%0.2f\t%0.2f\t%0.2f\n",
288+
qps := float64(count) / sumElapsed
289+
290+
// Calculate percentile latencies.
291+
p50Latency := latencyEstimator.Estimate(0.50) * 1000
292+
p95Latency := latencyEstimator.Estimate(0.95) * 1000
293+
p99Latency := latencyEstimator.Estimate(0.99) * 1000
294+
295+
fmt.Printf("%d\t%0.2f%%\t%0.0f\t%0.0f\t%0.2f\t%0.2f\t%0.2f\t%0.2f\t%0.2f\t%0.2f\n",
279296
beamSize, sumRecall/float64(count)*100,
280297
sumLeafVectors/float64(count), sumVectors/float64(count),
281298
sumFullVectors/float64(count), sumPartitions/float64(count),
282-
float64(count)/elapsed.Seconds())
299+
qps, p50Latency, p95Latency, p99Latency)
283300
}
284301

285302
fmt.Println()
@@ -290,7 +307,7 @@ func (vb *vectorBench) SearchIndex() {
290307
minPartitionSize, maxPartitionSize, *flagBeamSize)
291308
fmt.Println(vb.provider.FormatStats())
292309

293-
fmt.Printf("beam\trecall\tleaf\tall\tfull\tpartns\tqps\n")
310+
fmt.Printf("beam\trecall\tleaf\tall\tfull\tpartns\tqps\tp50(ms)\tp95(ms)\tp99(ms)\n")
294311

295312
// Search multiple times with different search beam sizes.
296313
beamSizeStrs := strings.Split(*flagSearchBeamSizes, ",")

0 commit comments

Comments
 (0)