Skip to content

Commit 3b6e988

Browse files
committed
vecstore: correctly handle composite pk columns
Previously, the vector store would blindly use primary key column bytes encoded in a vector index key to rebuild the primary key when retrieving full vector values. SpanBuilder will attempt to reused already encoded datums when building a span, but it can't do so if the encoded datum was encoded in a different direction than required for the span being built. For non-composite columns this was not an issue because they have all the data needed to re-encode in the key value itself. For composite column, the data encoded in the key is a key analogue and can't be used to reconstruct the original column value, which is stored as a suffix column. For vector search, the search processor has access to the ValueBytes array as well as the key, so it's able to rebuild column values as needed. However, the fixup processor only has access to the key bytes, which requires a bit of trickery. In this patch, we disallow assigning directionality to vector index key columns. The mechanics of similarity search preclude scannign prefixes, so this was always a nonsensical thing to do. We now disallow it entirely and take the further step to mechanically switch the directionality of vector index prefix columns to match the primary key, ensuring that the encoded value of these shared columns is always directly usable. We also move the logic for decoding and rebuilding the primary key into the vector store so that it can be shared between fixups and the vector search processor. Note that there is an additional composite key decoder available for the vector search processor because it does indeed need to decode the primary key bytes back to column values. Along the way, we: * Remove ASC/DESC from vector index columns. * Introduce a new index type method to describe indexes with non-directional prefixes. * Alter SHOW commands to no longer show vector index column directionality. * Introduce a bunch of test-gated code to ensure everything I said above is in fact true and remains so in the future. Informs: #146046 Release note (sql change): It is no longer permitted to assign directionality to any vector index column. Prefix columns (those columns before the vector column in a compound vector index key) are not scannable in a vector index, so directionality isn't relevant to them.
1 parent 972e20f commit 3b6e988

File tree

17 files changed

+241
-119
lines changed

17 files changed

+241
-119
lines changed

pkg/ccl/logictestccl/testdata/logic_test/partitioning_index

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -211,11 +211,11 @@ SHOW CREATE TABLE vector
211211
b INT8 NULL,
212212
v VECTOR(3) NULL,
213213
CONSTRAINT vector_pkey PRIMARY KEY (a ASC),
214-
VECTOR INDEX vector_b_v_idx (b ASC, v vector_l2_ops) PARTITION BY LIST (b) (
214+
VECTOR INDEX vector_b_v_idx (b, v vector_l2_ops) PARTITION BY LIST (b) (
215215
PARTITION p1 VALUES IN ((1)),
216216
PARTITION pu VALUES IN ((NULL))
217217
),
218-
VECTOR INDEX vector_idx (a ASC, b ASC, v vector_l2_ops) PARTITION BY LIST (a, b) (
218+
VECTOR INDEX vector_idx (a, b, v vector_l2_ops) PARTITION BY LIST (a, b) (
219219
PARTITION p1 VALUES IN ((1, 1), (2, 2))
220220
)
221221
) WITH (schema_locked = true)

pkg/sql/alter_primary_key.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -851,6 +851,11 @@ func setKeySuffixAndStoredColumnIDsFromPrimary(
851851
"indexed vector column cannot be part of the primary key")
852852
}
853853
}
854+
855+
if vecIdx {
856+
tabledesc.UpdateVectorIndexPrefixColDirections(toAdd, primary)
857+
}
858+
854859
// Finally, add all the stored columns if it is not already a key or key suffix column.
855860
toAddOldStoredColumnIDs := toAdd.StoreColumnIDs
856861
toAddOldStoredColumnNames := toAdd.StoreColumnNames

pkg/sql/catalog/catformat/index.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -274,10 +274,14 @@ func FormatIndexElements(
274274
}
275275
}
276276
}
277-
// The last column of an inverted or vector index cannot have a DESC
278-
// direction because it does not have a linear ordering. Since the default
279-
// direction is ASC, we omit the direction entirely for inverted/vector
280-
// index columns.
277+
// Vector indexes do not support ASC/DESC modifiers.
278+
if !index.Type.HasScannablePrefix() {
279+
continue
280+
}
281+
// The last column of an inverted index cannot have a DESC direction
282+
// because it does not have a linear ordering. Since the default
283+
// direction is ASC, we omit the direction entirely for inverted index
284+
// columns.
281285
if i < n-1 || index.Type.HasLinearOrdering() {
282286
f.WriteByte(' ')
283287
f.WriteString(index.KeyColumnDirections[i].String())

pkg/sql/catalog/descpb/index.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ import (
99
"fmt"
1010

1111
"github.com/cockroachdb/cockroach/pkg/sql/catalog/catenumpb"
12+
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
13+
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
1214
"github.com/cockroachdb/cockroach/pkg/sql/sem/idxtype"
1315
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
1416
"github.com/cockroachdb/cockroach/pkg/sql/types"
@@ -47,6 +49,11 @@ func (desc *IndexDescriptor) FillColumns(elems tree.IndexElemList) error {
4749
if c.Expr != nil {
4850
return errors.AssertionFailedf("index elem expression should have been replaced with a column")
4951
}
52+
// Vector index prefix columns don't have a direction, nor do the vector columns themselves.
53+
if !desc.Type.HasScannablePrefix() && c.Direction != tree.DefaultDirection {
54+
return pgerror.Newf(pgcode.FeatureNotSupported,
55+
"%s does not support the %s option", idxtype.ErrorText(desc.Type), c.Direction)
56+
}
5057
desc.KeyColumnNames = append(desc.KeyColumnNames, string(c.Column))
5158
switch c.Direction {
5259
case tree.Ascending, tree.DefaultDirection:

pkg/sql/catalog/tabledesc/structured.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2797,3 +2797,17 @@ func (desc *Mutable) BumpExternalAsOf(timestamp hlc.Timestamp) error {
27972797
func (desc *Mutable) ForceModificationTime(modificationTime hlc.Timestamp) {
27982798
desc.ModificationTime = modificationTime
27992799
}
2800+
2801+
func UpdateVectorIndexPrefixColDirections(
2802+
vecIndexDesc *descpb.IndexDescriptor, primaryIndexDesc *descpb.IndexDescriptor,
2803+
) {
2804+
numPrefixCols := len(vecIndexDesc.KeyColumnIDs) - 1
2805+
for i := 0; i < numPrefixCols; i++ {
2806+
for j, pkColID := range primaryIndexDesc.KeyColumnIDs {
2807+
if vecIndexDesc.KeyColumnIDs[i] == pkColID {
2808+
vecIndexDesc.KeyColumnDirections[i] = primaryIndexDesc.KeyColumnDirections[j]
2809+
break
2810+
}
2811+
}
2812+
}
2813+
}

pkg/sql/create_index.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,8 @@ func makeIndexDescriptor(
267267
if err != nil {
268268
return nil, err
269269
}
270+
271+
tabledesc.UpdateVectorIndexPrefixColDirections(&indexDesc, tableDesc.GetPrimaryIndex().IndexDesc())
270272
}
271273

272274
if n.Sharded != nil {

pkg/sql/create_table.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2849,6 +2849,9 @@ func replaceLikeTableOpts(n *tree.CreateTable, params runParams) (tree.TableDefs
28492849
Column: tree.Name(name),
28502850
Direction: tree.Ascending,
28512851
}
2852+
if !indexDef.Type.HasScannablePrefix() {
2853+
elem.Direction = tree.DefaultDirection
2854+
}
28522855
col, err := catalog.MustFindColumnByID(td, idx.GetKeyColumnID(j))
28532856
if err != nil {
28542857
return nil, err

pkg/sql/logictest/testdata/logic_test/expression_index

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ CREATE TABLE t (
1010
INDEX t_a_plus_b_idx ((a + b)),
1111
INDEX t_lower_c_a_plus_b_idx (lower(c), (a + b)),
1212
INVERTED INDEX t_b_j_a_asc (b DESC, (j->'a') ASC),
13-
VECTOR INDEX t_vec (a DESC, (v::VECTOR(3))),
13+
VECTOR INDEX t_vec (a, (v::VECTOR(3))),
1414
FAMILY (k, a, b, c, j, v)
1515
)
1616

@@ -30,7 +30,7 @@ CREATE TABLE public.t (
3030
INDEX t_a_plus_b_idx ((a + b) ASC),
3131
INDEX t_lower_c_a_plus_b_idx (lower(c) ASC, (a + b) ASC),
3232
INVERTED INDEX t_b_j_a_asc (b DESC, (j->'a':::STRING)),
33-
VECTOR INDEX t_vec (a DESC, (v::VECTOR(3)) vector_l2_ops),
33+
VECTOR INDEX t_vec (a, (v::VECTOR(3)) vector_l2_ops),
3434
FAMILY fam_0_k_a_b_c_j_v (k, a, b, c, j, v)
3535
);
3636

@@ -50,7 +50,7 @@ CREATE TABLE public.t (
5050
INDEX t_a_plus_b_idx ((a + b) ASC),
5151
INDEX t_lower_c_a_plus_b_idx (lower(c) ASC, (a + b) ASC),
5252
INVERTED INDEX t_b_j_a_asc (b DESC, (j->'a':::STRING)),
53-
VECTOR INDEX t_vec (a DESC, (v::VECTOR(3)) vector_l2_ops),
53+
VECTOR INDEX t_vec (a, (v::VECTOR(3)) vector_l2_ops),
5454
FAMILY fam_0_k_a_b_c_j_v (k, a, b, c, j, v)
5555
) WITH (schema_locked = true);
5656

@@ -70,7 +70,7 @@ CREATE TABLE public.t (
7070
INDEX t_a_plus_b_idx ((a + b) ASC),
7171
INDEX t_lower_c_a_plus_b_idx (lower(c) ASC, (a + b) ASC),
7272
INVERTED INDEX t_b_j_a_asc (b DESC, (j->‹×›:::STRING)),
73-
VECTOR INDEX t_vec (a DESC, (v::VECTOR(3)) vector_l2_ops),
73+
VECTOR INDEX t_vec (a, (v::VECTOR(3)) vector_l2_ops),
7474
FAMILY fam_0_k_a_b_c_j_v (k, a, b, c, j, v)
7575
);
7676

@@ -90,7 +90,7 @@ CREATE TABLE public.t (
9090
INDEX t_a_plus_b_idx ((a + b) ASC),
9191
INDEX t_lower_c_a_plus_b_idx (lower(c) ASC, (a + b) ASC),
9292
INVERTED INDEX t_b_j_a_asc (b DESC, (j->‹×›:::STRING)),
93-
VECTOR INDEX t_vec (a DESC, (v::VECTOR(3)) vector_l2_ops),
93+
VECTOR INDEX t_vec (a, (v::VECTOR(3)) vector_l2_ops),
9494
FAMILY fam_0_k_a_b_c_j_v (k, a, b, c, j, v)
9595
) WITH (schema_locked = true);
9696

@@ -178,7 +178,7 @@ CREATE TABLE err (a INT, b INT, VECTOR INDEX (a, (a + b)))
178178
statement error column \(vec1::VECTOR\(3\)\) has type vector, which is only allowed as the last column in a vector index
179179
CREATE TABLE err (a INT, vec1 VECTOR, VECTOR INDEX ((vec1::VECTOR(3)), a))
180180

181-
statement error the last column in a vector index cannot have the DESC option
181+
statement error pq: a vector index does not support the DESC option
182182
CREATE TABLE err (a INT, vec1 VECTOR, VECTOR INDEX (a, (vec1::VECTOR(3)) DESC))
183183

184184
statement error vector column \(vec1::VECTOR\) does not have a fixed number of dimensions, so it cannot be indexed\nDETAIL: specify the number of dimensions in the type, like VECTOR\(128\) for 128 dimensions
@@ -476,7 +476,7 @@ CREATE VECTOR INDEX err ON t (a, (a + b))
476476
statement error column \(v::VECTOR\(3\)\) has type vector, which is only allowed as the last column in a vector index
477477
CREATE VECTOR INDEX err ON t ((v::VECTOR(3)), a)
478478

479-
statement error the last column in a vector index cannot have the DESC option
479+
statement error pq: a vector index does not support the DESC option
480480
CREATE VECTOR INDEX err ON t (a, (v::VECTOR(3)) DESC)
481481

482482
statement error vector column \(v::VECTOR\) does not have a fixed number of dimensions, so it cannot be indexed\nDETAIL: specify the number of dimensions in the type, like VECTOR\(128\) for 128 dimensions
@@ -640,7 +640,7 @@ CREATE TABLE public.copy_indexes (
640640
INDEX named_idx ((a + 1:::INT8) ASC),
641641
UNIQUE INDEX src_expr_key ((a + 10:::INT8) ASC),
642642
INVERTED INDEX src_expr_expr1_idx ((a + b) ASC, (j->'a':::STRING)),
643-
VECTOR INDEX src_b_expr_idx (b ASC, (v::VECTOR(3)) vector_l2_ops)
643+
VECTOR INDEX src_b_expr_idx (b, (v::VECTOR(3)) vector_l2_ops)
644644
);
645645

646646
skipif config schema-locked-disabled
@@ -659,7 +659,7 @@ CREATE TABLE public.copy_indexes (
659659
INDEX named_idx ((a + 1:::INT8) ASC),
660660
UNIQUE INDEX src_expr_key ((a + 10:::INT8) ASC),
661661
INVERTED INDEX src_expr_expr1_idx ((a + b) ASC, (j->'a':::STRING)),
662-
VECTOR INDEX src_b_expr_idx (b ASC, (v::VECTOR(3)) vector_l2_ops)
662+
VECTOR INDEX src_b_expr_idx (b, (v::VECTOR(3)) vector_l2_ops)
663663
) WITH (schema_locked = true);
664664

665665
# Inaccessible expression index columns should be copied if the indexes are
@@ -689,7 +689,7 @@ CREATE TABLE public.copy_all (
689689
INDEX named_idx ((a + 1:::INT8) ASC),
690690
UNIQUE INDEX src_expr_key ((a + 10:::INT8) ASC),
691691
INVERTED INDEX src_expr_expr1_idx ((a + b) ASC, (j->'a':::STRING)),
692-
VECTOR INDEX src_b_expr_idx (b ASC, (v::VECTOR(3)) vector_l2_ops)
692+
VECTOR INDEX src_b_expr_idx (b, (v::VECTOR(3)) vector_l2_ops)
693693
);
694694

695695
skipif config schema-locked-disabled
@@ -708,7 +708,7 @@ CREATE TABLE public.copy_all (
708708
INDEX named_idx ((a + 1:::INT8) ASC),
709709
UNIQUE INDEX src_expr_key ((a + 10:::INT8) ASC),
710710
INVERTED INDEX src_expr_expr1_idx ((a + b) ASC, (j->'a':::STRING)),
711-
VECTOR INDEX src_b_expr_idx (b ASC, (v::VECTOR(3)) vector_l2_ops)
711+
VECTOR INDEX src_b_expr_idx (b, (v::VECTOR(3)) vector_l2_ops)
712712
) WITH (schema_locked = true);
713713

714714
# Inaccessible expression index columns should be copied if the indexes are

0 commit comments

Comments
 (0)