Skip to content

Commit 7c9cfa3

Browse files
craig[bot]andy-kimball
andcommitted
Merge #150573
150573: vecindex: fix and enhance vecindex testing r=drewkimball a=andy-kimball #### vecindex: add datadriven tests for vecindex Refactor the C-SPANN datadriven tests so they can be shared by other Store implementations. Add new datadriven test for vecindex that tests recall for several interesting datasets. #### cspann: fix bug in CalculateTruth Fix bug in CalculateTruth, which was calling vecpb.MeasureDistance with un-normalized vectors for Cosine distance. This returns incorrect results. #### vecindex: fix determinism violations Fix a couple cases where the vector index is not behaving deterministically even when configured to do so. In one case, partition keys were not generated deterministically. In the other case, the seed used in crdb_test builds was different than the seed used otherwise. #### builtins: add process_vector_index_fixups builtin Add a new builtin function that waits until all pending fixups have been processed for a given index. This is used for testing. Previously, we were processing fixups in deterministic tests by calling ProcessFixups just before inserting vectors or searching for them. However, that was causing deadlocks, because fixups were being processed in the same transaction that was updating the index. The vector_index test is now updated to call the new builtin when it needs to force fixup processing. #### cspann: use brackets to format vectors Postgres uses brackets to format vectors, e.g. [1, 2]. However, our test code currently uses parentheses, e.g. (1, 2). Update our tests to use brackets instead, so that we're more consistent. Co-authored-by: Andrew Kimball <[email protected]>
2 parents 65c856a + c6603d3 commit 7c9cfa3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+3860
-3093
lines changed

pkg/cmd/vecbench/mem_provider.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,9 @@ func (m *MemProvider) Search(
177177

178178
// Get result keys.
179179
results := searchSet.PopResults()
180+
if len(results) > memState.maxResults {
181+
results = results[:memState.maxResults]
182+
}
180183
keys = make([]cspann.KeyBytes, len(results))
181184
for i, res := range results {
182185
keys[i] = []byte(res.ChildKey.KeyBytes)
@@ -196,7 +199,9 @@ func (m *MemProvider) Save(ctx context.Context) error {
196199
}
197200

198201
// Wait for any remaining background fixups to be processed.
199-
m.index.ProcessFixups()
202+
if err := m.index.ProcessFixups(ctx); err != nil {
203+
return err
204+
}
200205

201206
startTime := timeutil.Now()
202207

pkg/sql/faketreeeval/evalctx.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,13 @@ func (ep *DummyEvalPlanner) RetryCounter() int {
577577
return 0
578578
}
579579

580+
// ProcessVectorIndexFixups is part of the eval.Planner interface.
581+
func (ep *DummyEvalPlanner) ProcessVectorIndexFixups(
582+
ctx context.Context, tableID descpb.ID, indexID descpb.IndexID,
583+
) error {
584+
return nil
585+
}
586+
580587
// DummyPrivilegedAccessor implements the tree.PrivilegedAccessor interface by returning errors.
581588
type DummyPrivilegedAccessor struct{}
582589

pkg/sql/logictest/testdata/logic_test/vector_index

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -539,12 +539,37 @@ INSERT INTO exec_test (a, b, vec1) VALUES
539539
(2, 1, '[4, 5, 6]'),
540540
(3, 2, '[7, 8, 9]'),
541541
(4, 2, '[10, 11, 12]'),
542-
(5, 2, '[13, 14, 15]'),
542+
(5, 2, '[13, 14, 15]');
543+
544+
statement ok
545+
INSERT INTO exec_test (a, b, vec1) VALUES
543546
(6, NULL, '[16, 17, 18]'),
544547
(7, NULL, '[1, 1, 1]'),
545548
(8, NULL, NULL),
546549
(9, 3, NULL);
547550

551+
# Define helper to force fixup processing for the given index.
552+
statement ok
553+
CREATE PROCEDURE process_vector_index_fixups_for(desc_name STRING, idx_name STRING)
554+
LANGUAGE SQL
555+
AS $$
556+
SELECT crdb_internal.process_vector_index_fixups(
557+
(SELECT id FROM system.namespace WHERE name = desc_name),
558+
(
559+
SELECT index_id
560+
FROM crdb_internal.table_indexes
561+
WHERE descriptor_name = desc_name AND index_name = idx_name
562+
)
563+
)
564+
$$;
565+
566+
# Process split fixups for the two indexes.
567+
statement ok
568+
CALL process_vector_index_fixups_for('exec_test', 'idx1');
569+
570+
statement ok
571+
CALL process_vector_index_fixups_for('exec_test', 'idx2');
572+
548573
statement error pgcode 22000 pq: expected 3 dimensions, not 1
549574
INSERT INTO exec_test (a, b, vec1) VALUES (10, 1, '[1]');
550575

@@ -765,9 +790,22 @@ INSERT INTO distance_metrics (a, v) VALUES
765790
(1, '[0, 0]'),
766791
(2, '[-2, -2]'),
767792
(3, '[2, 2]'),
768-
(4, '[4, 4]'),
793+
(4, '[4, 4]');
794+
795+
statement ok
796+
INSERT INTO distance_metrics (a, v) VALUES
769797
(5, '[-2, 2]');
770798

799+
# Process split fixups for the three indexes.
800+
statement ok
801+
CALL process_vector_index_fixups_for('distance_metrics', 'idx1');
802+
803+
statement ok
804+
CALL process_vector_index_fixups_for('distance_metrics', 'idx2');
805+
806+
statement ok
807+
CALL process_vector_index_fixups_for('distance_metrics', 'idx3');
808+
771809
# Results using L2 distance.
772810
query ITF rowsort
773811
SELECT a, v, round(v <-> '[2, 2]', 2) dist FROM distance_metrics ORDER BY v <-> '[2, 2]' LIMIT 5;

pkg/sql/planner.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,3 +1077,14 @@ func (p *planner) ExtendHistoryRetention(ctx context.Context, jobID jobspb.JobID
10771077
func (p *planner) RetryCounter() int {
10781078
return p.autoRetryCounter + p.autoRetryStmtCounter
10791079
}
1080+
1081+
// ProcessVectorIndexFixups is part of the eval.Planner interface.
1082+
func (p *planner) ProcessVectorIndexFixups(
1083+
ctx context.Context, tableID descpb.ID, indexID descpb.IndexID,
1084+
) error {
1085+
vi, err := p.execCfg.VecIndexManager.Get(ctx, tableID, indexID)
1086+
if err != nil {
1087+
return err
1088+
}
1089+
return vi.ProcessFixups(ctx)
1090+
}

pkg/sql/sem/builtins/builtins.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9357,6 +9357,30 @@ WHERE object_id = table_descriptor_id
93579357
},
93589358
},
93599359
),
9360+
"crdb_internal.process_vector_index_fixups": makeBuiltin(
9361+
tree.FunctionProperties{
9362+
Category: builtinconstants.CategoryTesting,
9363+
Undocumented: true,
9364+
},
9365+
tree.Overload{
9366+
Types: tree.ParamTypes{
9367+
{Name: "table_id", Typ: types.Int},
9368+
{Name: "index_id", Typ: types.Int},
9369+
},
9370+
ReturnType: tree.FixedReturnType(types.Void),
9371+
Fn: func(ctx context.Context, evalCtx *eval.Context, args tree.Datums) (tree.Datum, error) {
9372+
tableID := descpb.ID(tree.MustBeDInt(args[0]))
9373+
indexID := descpb.IndexID(tree.MustBeDInt(args[1]))
9374+
err := evalCtx.Planner.ProcessVectorIndexFixups(ctx, tableID, indexID)
9375+
if err != nil {
9376+
return nil, err
9377+
}
9378+
return tree.DVoidDatum, nil
9379+
},
9380+
Info: "Waits until all outstanding fixups for the vector index with the given ID have been processed.",
9381+
Volatility: volatility.Volatile,
9382+
},
9383+
),
93609384
}
93619385

93629386
var lengthImpls = func(incBitOverload bool) builtinDefinition {

pkg/sql/sem/builtins/fixed_oids.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2795,6 +2795,7 @@ var builtinOidsArray = []string{
27952795
2840: `lead(val: ltree, n: int) -> ltree`,
27962796
2841: `lead(val: ltree, n: int, default: ltree) -> ltree`,
27972797
2842: `last_value(val: ltree) -> ltree`,
2798+
2843: `crdb_internal.process_vector_index_fixups(table_id: int, index_id: int) -> void`,
27982799
}
27992800

28002801
var builtinOidsBySignature map[string]oid.Oid

pkg/sql/sem/eval/deps.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,10 @@ type Planner interface {
455455

456456
// RetryCounter is the number of times this statement has been retried.
457457
RetryCounter() int
458+
459+
// ProcessVectorIndexFixups waits until all outstanding fixups for the vector
460+
// index with the given ID have been processed.
461+
ProcessVectorIndexFixups(ctx context.Context, tableID descpb.ID, indexID descpb.IndexID) error
458462
}
459463

460464
// InternalRows is an iterator interface that's exposed by the internal

pkg/sql/vecindex/BUILD.bazel

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ go_library(
3232
"//pkg/sql/vecindex/vecpb",
3333
"//pkg/sql/vecindex/vecstore",
3434
"//pkg/sql/vecindex/vecstore/vecstorepb",
35-
"//pkg/util/buildutil",
3635
"//pkg/util/errorutil/unimplemented",
3736
"//pkg/util/log",
3837
"//pkg/util/metric",
@@ -51,7 +50,7 @@ go_test(
5150
"searcher_test.go",
5251
"vecindex_test.go",
5352
],
54-
data = ["//pkg/sql/vecindex/cspann:datasets"],
53+
data = ["//pkg/sql/vecindex/cspann:datasets"] + glob(["testdata/**"]),
5554
embed = [":vecindex"],
5655
exec_properties = select({
5756
"//build/toolchains:is_heavy": {"test.Pool": "large"},
@@ -83,6 +82,7 @@ go_test(
8382
"//pkg/sql/sem/tree",
8483
"//pkg/sql/types",
8584
"//pkg/sql/vecindex/cspann",
85+
"//pkg/sql/vecindex/cspann/commontest",
8686
"//pkg/sql/vecindex/cspann/quantize",
8787
"//pkg/sql/vecindex/cspann/testutils",
8888
"//pkg/sql/vecindex/vecencoding",
@@ -98,6 +98,7 @@ go_test(
9898
"//pkg/util/log",
9999
"//pkg/util/randutil",
100100
"//pkg/util/vector",
101+
"@com_github_cockroachdb_datadriven//:datadriven",
101102
"@com_github_stretchr_testify//require",
102103
],
103104
)

pkg/sql/vecindex/cspann/commontest/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
33
go_library(
44
name = "commontest",
55
srcs = [
6+
"indextests.go",
67
"storetests.go",
78
"utils.go",
89
],
@@ -16,6 +17,7 @@ go_library(
1617
"//pkg/sql/vecindex/vecpb",
1718
"//pkg/util/encoding",
1819
"//pkg/util/vector",
20+
"@com_github_cockroachdb_datadriven//:datadriven",
1921
"@com_github_cockroachdb_errors//:errors",
2022
"@com_github_stretchr_testify//require",
2123
"@com_github_stretchr_testify//suite",

0 commit comments

Comments
 (0)