Skip to content

Commit 759625f

Browse files
committed
rowenc: handle encoding of NULL or missing vectors
Previously if the mutationSearch operator failed to find a partition for a vector because it was NULL or missing (e.g. being moved by fixup), rowenc would panic, causing the mutation to fail. This prevented the insertion of NULL vectors and would sometimes cause deletes to erroneously fail. This patch allows mutationSearch to signal "partition not found, but it's okay" by sending a NULL partition instead of setting the partition value to nil. The 'nil' value now indicates a bad failure that should never happen whereas a NULL partition value means 'skip this row' ala a partial index. Fixes: #144621 Release note (bug fix): NULL vectors can now be inserted into tables with vector indexes.
1 parent 61868f0 commit 759625f

File tree

6 files changed

+125
-12
lines changed

6 files changed

+125
-12
lines changed

pkg/sql/logictest/testdata/logic_test/vector_index

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,3 +548,25 @@ statement error vector_search_beam_size cannot be less than 1 or greater than 51
548548
SET vector_search_beam_size=513
549549

550550
subtest end
551+
552+
subtest test_144621
553+
554+
statement ok
555+
CREATE TABLE test_144621 (a INT PRIMARY KEY, v VECTOR(3))
556+
557+
statement ok
558+
CREATE VECTOR INDEX vec_idx ON test_144621 (v)
559+
560+
statement ok
561+
INSERT INTO test_144621 VALUES (1, NULL)
562+
563+
query I
564+
SELECT a FROM test_144621;
565+
----
566+
1
567+
568+
query I
569+
SELECT a FROM test_144621@vec_idx ORDER BY v <-> '[1, 2, 3]' LIMIT 1;
570+
----
571+
572+
subtest end

pkg/sql/row/vector_index.go

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -82,29 +82,23 @@ func (vm *VectorIndexUpdateHelper) initImpl(
8282
if vm.PutPartitionKeys == nil {
8383
vm.PutPartitionKeys = make(map[descpb.IndexID]tree.Datum)
8484
}
85-
if putPartitionKeys[valIdx] != tree.DNull {
86-
vm.PutPartitionKeys[idx.GetID()] = putPartitionKeys[valIdx]
87-
}
85+
vm.PutPartitionKeys[idx.GetID()] = putPartitionKeys[valIdx]
8886
}
8987

9088
// Retrieve the quantized vector, if it exists.
9189
if valIdx < len(putQuantizedVecs) {
9290
if vm.PutQuantizedVecs == nil {
9391
vm.PutQuantizedVecs = make(map[descpb.IndexID]tree.Datum)
9492
}
95-
if putQuantizedVecs[valIdx] != tree.DNull {
96-
vm.PutQuantizedVecs[idx.GetID()] = putQuantizedVecs[valIdx]
97-
}
93+
vm.PutQuantizedVecs[idx.GetID()] = putQuantizedVecs[valIdx]
9894
}
9995

10096
// Retrieve the partition key value for delete, if it exists.
10197
if valIdx < len(delPartitionKeys) {
10298
if vm.DelPartitionKeys == nil {
10399
vm.DelPartitionKeys = make(map[descpb.IndexID]tree.Datum)
104100
}
105-
if delPartitionKeys[valIdx] != tree.DNull {
106-
vm.DelPartitionKeys[idx.GetID()] = delPartitionKeys[valIdx]
107-
}
101+
vm.DelPartitionKeys[idx.GetID()] = delPartitionKeys[valIdx]
108102
}
109103

110104
valIdx++

pkg/sql/rowenc/index_encoding.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1382,7 +1382,14 @@ func encodeSecondaryIndexKeyWithKeyPrefix(
13821382
secondaryIndexKey, err = encodeVectorIndexKey(
13831383
secondaryIndex, colMap, values, keyPrefix, vh,
13841384
)
1385-
secondaryKeys = [][]byte{secondaryIndexKey}
1385+
// This can happen if the search returns a NULL partition, which means the
1386+
// partition was not found in the DELETE case or the vector being inserted
1387+
// is NULL.
1388+
if secondaryIndexKey == nil {
1389+
secondaryKeys = [][]byte{}
1390+
} else {
1391+
secondaryKeys = [][]byte{secondaryIndexKey}
1392+
}
13861393
}
13871394
return secondaryKeys, containsNull, err
13881395
}

pkg/sql/rowexec/vector_search.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -322,8 +322,13 @@ func (v *vectorMutationSearchProcessor) Next() (rowenc.EncDatumRow, *execinfrapb
322322
break
323323
}
324324
// It is possible for the search not to find the target index entry, in
325-
// which case the result is nil.
326-
partitionKey = v.searcher.PartitionKey()
325+
// which case the result is nil. In this case, we leave the partition key
326+
// as NULL to signal to rowenc that we didn't find a partition key, but that
327+
// it's okay since it's expected that this can happen due to fixups moving
328+
// vector index entries around.
329+
if v.searcher.PartitionKey() != nil {
330+
partitionKey = v.searcher.PartitionKey()
331+
}
327332
}
328333
}
329334
row = append(row, rowenc.DatumToEncDatum(types.Int, partitionKey))

pkg/sql/vecindex/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ go_test(
5252
deps = [
5353
"//pkg/base",
5454
"//pkg/keys",
55+
"//pkg/roachpb",
5556
"//pkg/security/securityassets",
5657
"//pkg/security/securitytest",
5758
"//pkg/security/username",
@@ -65,6 +66,7 @@ go_test(
6566
"//pkg/sql/catalog/descs",
6667
"//pkg/sql/catalog/desctestutils",
6768
"//pkg/sql/catalog/tabledesc",
69+
"//pkg/sql/rowenc",
6870
"//pkg/sql/sem/catid",
6971
"//pkg/sql/sem/idxtype",
7072
"//pkg/sql/sem/tree",

pkg/sql/vecindex/vecindex_test.go

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,14 @@ import (
1616
"time"
1717

1818
"github.com/cockroachdb/cockroach/pkg/base"
19+
"github.com/cockroachdb/cockroach/pkg/roachpb"
1920
"github.com/cockroachdb/cockroach/pkg/sql"
21+
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
22+
"github.com/cockroachdb/cockroach/pkg/sql/rowenc"
2023
"github.com/cockroachdb/cockroach/pkg/sql/vecindex"
24+
"github.com/cockroachdb/cockroach/pkg/sql/vecindex/cspann"
2125
"github.com/cockroachdb/cockroach/pkg/sql/vecindex/cspann/testutils"
26+
"github.com/cockroachdb/cockroach/pkg/sql/vecindex/vecencoding"
2227
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
2328
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
2429
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
@@ -117,3 +122,81 @@ func buildIndex(
117122

118123
wait.Wait()
119124
}
125+
126+
// TestVecindexDeletion tests that rows can be properly deleted from a vector index.
127+
func TestVecindexDeletion(t *testing.T) {
128+
defer leaktest.AfterTest(t)()
129+
defer log.Scope(t).Close(t)
130+
131+
ctx := context.Background()
132+
srv, sqlDB, _ := serverutils.StartServer(t, base.TestServerArgs{})
133+
runner := sqlutils.MakeSQLRunner(sqlDB)
134+
defer srv.Stopper().Stop(ctx)
135+
136+
// Enable vector indexes.
137+
runner.Exec(t, `SET CLUSTER SETTING feature.vector_index.enabled = true`)
138+
139+
// Construct the table.
140+
runner.Exec(t, "CREATE TABLE t (id INT PRIMARY KEY, v VECTOR(512), VECTOR INDEX (v))")
141+
142+
// Load a small set of vectors for testing.
143+
vectors := testutils.LoadFeatures(t, 10)
144+
145+
// Insert the vectors.
146+
for i := 0; i < vectors.Count; i++ {
147+
runner.Exec(t, "INSERT INTO t (id, v) VALUES ($1, $2)", i, vectors.At(i).String())
148+
}
149+
150+
// Verify the vectors were inserted.
151+
var count int
152+
runner.QueryRow(t, "SELECT count(*) FROM t").Scan(&count)
153+
if count != vectors.Count {
154+
t.Errorf("expected %d rows, got %d", vectors.Count, count)
155+
}
156+
157+
// Get the table descriptor to find the vector index ID.
158+
var tableID uint32
159+
runner.QueryRow(t, "SELECT id FROM system.namespace WHERE name = 't'").Scan(&tableID)
160+
161+
// Get the index ID from crdb_internal.table_indexes
162+
var indexID uint32
163+
runner.QueryRow(t, "SELECT index_id FROM crdb_internal.table_indexes WHERE descriptor_id = $1 AND index_name = 't_v_idx'", tableID).Scan(&indexID)
164+
if indexID == 0 {
165+
t.Fatal("vector index not found")
166+
}
167+
168+
// Start a KV transaction to manually delete the vector index keys.
169+
db := srv.DB()
170+
txn := db.NewTxn(ctx, "delete-vector-index-keys")
171+
172+
// Delete all vector index keys for the table.
173+
codec := srv.ApplicationLayer().Codec()
174+
prefix := rowenc.MakeIndexKeyPrefix(codec, descpb.ID(tableID), descpb.IndexID(indexID))
175+
prefix = vecencoding.EncodePartitionKey(prefix, cspann.RootKey)
176+
prefix = vecencoding.EncodePartitionLevel(prefix, cspann.LeafLevel)
177+
key := roachpb.Key(prefix)
178+
if _, err := txn.DelRange(ctx, key.Next(), key.PrefixEnd(), false); err != nil {
179+
t.Fatal(err)
180+
}
181+
if err := txn.Commit(ctx); err != nil {
182+
t.Fatal(err)
183+
}
184+
185+
// Delete rows one at a time and verify the count after each deletion.
186+
for i := 0; i < vectors.Count; i++ {
187+
runner.Exec(t, "DELETE FROM t WHERE id = $1", i)
188+
189+
// Verify the count after each deletion
190+
runner.QueryRow(t, "SELECT count(*) FROM t").Scan(&count)
191+
expectedCount := vectors.Count - (i + 1)
192+
if count != expectedCount {
193+
t.Errorf("after deleting id %d: expected %d rows, got %d", i, expectedCount, count)
194+
}
195+
}
196+
197+
// Final verification that table is empty
198+
runner.QueryRow(t, "SELECT count(*) FROM t").Scan(&count)
199+
if count != 0 {
200+
t.Errorf("expected 0 rows after all deletions, got %d", count)
201+
}
202+
}

0 commit comments

Comments
 (0)