Skip to content

Commit 0d56d39

Browse files
committed
vecann: add new approximate nearest neighbor vector workload
vecann is a new workload that tests vector indexes - both inserting vectors into them and searching them for approximate nearest neighbors. We can use this for load and scale testing. The scenario is very simple - insert vectors from one of the datasets we've stored in a GCP cloud bucket and search over them. Similar to KV, the percentage of searches can be specified. The schema is also simple - a single table containing group id and an embedded vector. The group column is the prefix column for the vector index, so that a separate K-means tree is created for each unique group of vectors. This simulates a common case where companies want to insert and search embedding vectors for each of their customers in isolation. Epic: CRDB-42943 Release note: None
1 parent c98cf90 commit 0d56d39

23 files changed

+1085
-363
lines changed

pkg/BUILD.bazel

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -824,6 +824,7 @@ ALL_TESTS = [
824824
"//pkg/workload/sqlstats:sqlstats_test",
825825
"//pkg/workload/tpcc:tpcc_test",
826826
"//pkg/workload/tpch:tpch_test",
827+
"//pkg/workload/vecann:vecann_test",
827828
"//pkg/workload/workloadimpl:workloadimpl_test",
828829
"//pkg/workload/workloadsql:workloadsql_test",
829830
"//pkg/workload/ycsb:ycsb_test",
@@ -2808,6 +2809,8 @@ GO_TARGETS = [
28082809
"//pkg/workload/tpch:tpch_test",
28092810
"//pkg/workload/ttlbench:ttlbench",
28102811
"//pkg/workload/ttllogger:ttllogger",
2812+
"//pkg/workload/vecann:vecann",
2813+
"//pkg/workload/vecann:vecann_test",
28112814
"//pkg/workload/workloadimpl:workloadimpl",
28122815
"//pkg/workload/workloadimpl:workloadimpl_test",
28132816
"//pkg/workload/workloadsql:workloadsql",

pkg/ccl/workloadccl/allccl/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ go_library(
3131
"//pkg/workload/tpch",
3232
"//pkg/workload/ttlbench",
3333
"//pkg/workload/ttllogger",
34+
"//pkg/workload/vecann",
3435
"//pkg/workload/ycsb",
3536
],
3637
)

pkg/ccl/workloadccl/allccl/all.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,5 +35,6 @@ import (
3535
_ "github.com/cockroachdb/cockroach/pkg/workload/tpch"
3636
_ "github.com/cockroachdb/cockroach/pkg/workload/ttlbench"
3737
_ "github.com/cockroachdb/cockroach/pkg/workload/ttllogger"
38+
_ "github.com/cockroachdb/cockroach/pkg/workload/vecann"
3839
_ "github.com/cockroachdb/cockroach/pkg/workload/ycsb"
3940
)

pkg/ccl/workloadccl/allccl/all_test.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,10 +139,14 @@ func TestAllRegisteredSetup(t *testing.T) {
139139
t.Fatal(err)
140140
}
141141
case `interleavedpartitioned`:
142-
// This require a specific node locality setup
142+
// This require a specific node locality setup.
143143
continue
144144
case `ttlbench`:
145145
continue
146+
case `vecann`:
147+
// This requires downloading from a GCP bucket and storing in the
148+
// machine's ~/.cache directory.
149+
continue
146150
}
147151

148152
t.Run(meta.Name, func(t *testing.T) {

pkg/cmd/vecbench/BUILD.bazel

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ go_library(
77
"main.go",
88
"mem_provider.go",
99
"percentile_estimator.go",
10-
"progress_writer.go",
1110
"sql_provider.go",
1211
"vector_provider.go",
1312
],
@@ -18,18 +17,18 @@ go_library(
1817
"//pkg/sql/vecindex/cspann/memstore",
1918
"//pkg/sql/vecindex/cspann/quantize",
2019
"//pkg/util/httputil",
20+
"//pkg/util/humanizeutil",
2121
"//pkg/util/stop",
2222
"//pkg/util/syncutil",
2323
"//pkg/util/timeutil",
2424
"//pkg/util/vector",
25+
"//pkg/workload/vecann",
2526
"@com_github_cockroachdb_crlib//crtime",
2627
"@com_github_cockroachdb_errors//:errors",
2728
"@com_github_cockroachdb_errors//oserror",
2829
"@com_github_guptarohit_asciigraph//:asciigraph",
29-
"@com_github_jackc_pgx_v5//:pgx",
3030
"@com_github_jackc_pgx_v5//pgconn",
3131
"@com_github_jackc_pgx_v5//pgxpool",
32-
"@com_google_cloud_go_storage//:storage",
3332
"@org_golang_x_term//:term",
3433
],
3534
)

0 commit comments

Comments
 (0)