Skip to content

Commit 54966d5

Browse files
craig[bot]DrewKimball
andcommitted
Merge #143628
143628: sql: add support for prefix columns to vector search r=michae2,andy-kimball a=DrewKimball #### opt: use lookup-join instead of index-join for vector search Index joins expect all input rows to find a match in the primary index, excluding locked rows when in skip locked mode. Vector indexes can have "dangling" vectors after a delete operation fails to locate a vector in the index. Accordingly, this commit changes the vector-search optimizer rule to use a lookup-join instead of an index-join to avoid violating assumptions made elsewhere in the optimizer. Epic: CRDB-42943 Release note: None #### opt: do not plan vector search for a locking scan This commit disallows vector-search when the original scan is performing locking. Even if we did support this, it is unclear which rows should be locked, so it seems best to disallow it entirely. Epic: CRDB-42943 Release note: None #### sql: add execution support for vector search with multiple prefix keys This commit changes the vector-search operator to propagate a constraint set rather than a single list of values to be used in constraining index prefix columns. Execution has been modified as well to handle multiple prefixes. This change will allow the vector-search optimizer rule to handle cases where prefix columns can take on multiple possible values. Note that modifying the vector search rule to take advantage of this change is left for a following commit. Epic: CRDB-42943 Release note: None #### opt: add support for prefix columns to vector search rule This commit adds support for prefix columns to the `GenerateVectorSearch` optimizer rule. Select filters over the matched scan as well as check constraint filters can be used if they constrain every prefix column to single-key spans, like the following examples: ``` prefix1 = 1 AND prefix2 = 2 (prefix1, prefix2) IN ((1, 2), (3, 4)) ``` Currently, additional filters beyond those used to constrain the prefix are not yet supported. This will have to wait until the vector index is able to stream results in approximate order of distance. Fixes #143206 Release note: None Co-authored-by: Drew Kimball <[email protected]>
2 parents 8c55935 + 497e52d commit 54966d5

27 files changed

+969
-290
lines changed

pkg/sql/distsql_physical_planner.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4569,7 +4569,7 @@ func (dsp *DistSQLPlanner) planVectorSearch(
45694569

45704570
colTypes := getTypesFromResultColumns(planInfo.columns)
45714571
spec := &execinfrapb.VectorSearchSpec{
4572-
PrefixKey: planInfo.prefixKey,
4572+
PrefixKeys: planInfo.prefixKeys,
45734573
QueryVector: queryVector,
45744574
TargetNeighborCount: planInfo.targetNeighborCount,
45754575
}

pkg/sql/distsql_spec_exec_factory.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1452,7 +1452,7 @@ func (e *distSQLSpecExecFactory) ConstructVectorSearch(
14521452
table cat.Table,
14531453
index cat.Index,
14541454
outCols exec.TableColumnOrdinalSet,
1455-
prefixKey constraint.Key,
1455+
prefixConstraint *constraint.Constraint,
14561456
queryVector tree.TypedExpr,
14571457
targetNeighborCount uint64,
14581458
) (exec.Node, error) {
@@ -1461,17 +1461,17 @@ func (e *distSQLSpecExecFactory) ConstructVectorSearch(
14611461
cols := makeColList(table, outCols)
14621462
resultCols := colinfo.ResultColumnsFromColumns(tabDesc.GetID(), cols)
14631463

1464-
// Encode the prefix values as a roachpb.Key.
1464+
// Encode the prefix constraint as a list of roachpb.Keys.
14651465
var sb span.Builder
14661466
sb.Init(e.planner.EvalContext(), e.planner.ExecCfg().Codec, tabDesc, indexDesc)
1467-
encPrefixKey, _, err := sb.EncodeConstraintKey(prefixKey)
1467+
prefixKeys, err := sb.KeysFromVectorPrefixConstraint(e.ctx, prefixConstraint)
14681468
if err != nil {
14691469
return nil, err
14701470
}
14711471
planInfo := &vectorSearchPlanningInfo{
14721472
table: tabDesc,
14731473
index: indexDesc,
1474-
prefixKey: encPrefixKey,
1474+
prefixKeys: prefixKeys,
14751475
queryVector: queryVector,
14761476
targetNeighborCount: targetNeighborCount,
14771477
cols: cols,

pkg/sql/execinfrapb/flow_diagram.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,15 @@ func (v *VectorSearchSpec) summary() (string, []string) {
103103
fmt.Sprintf("Nearest Neighbor Target Count: %d", v.TargetNeighborCount),
104104
fmt.Sprintf("Query Vector: %s", vector.T(v.QueryVector).String()),
105105
}
106-
if len(v.PrefixKey) > 0 {
107-
vals, _ := encoding.PrettyPrintValuesWithTypes(nil /* valDirs */, v.PrefixKey)
108-
details = append(details, fmt.Sprintf("Prefix Vals: %s", strings.Join(vals, "/")))
106+
if len(v.PrefixKeys) > 0 {
107+
// Only show the first prefix key.
108+
var spanStr strings.Builder
109+
vals, _ := encoding.PrettyPrintValuesWithTypes(nil /* valDirs */, v.PrefixKeys[0])
110+
spanStr.WriteString(fmt.Sprintf("Prefix Vals: %s", strings.Join(vals, "/")))
111+
if len(v.PrefixKeys) > 1 {
112+
spanStr.WriteString(fmt.Sprintf(" and %d more", len(v.PrefixKeys)-1))
113+
}
114+
details = append(details, spanStr.String())
109115
}
110116
return "VectorSearch", details
111117
}

pkg/sql/execinfrapb/processors_sql.proto

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,9 +1102,9 @@ message InsertSpec {
11021102
message VectorSearchSpec {
11031103
optional sqlbase.IndexFetchSpec fetch_spec = 1 [(gogoproto.nullable) = false];
11041104

1105-
// PrefixKey constrains the prefix index columns to a single value. It is
1106-
// empty for an index without prefix columns.
1107-
optional bytes prefix_key = 2 [(gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/roachpb.Key"];
1105+
// PrefixKeys, if set, contains keys that each constrain every index prefix
1106+
// column to a single value. It is set IFF the index has prefix columns.
1107+
repeated bytes prefix_keys = 2 [(gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/roachpb.Key"];
11081108

11091109
// QueryVector is the vector to search for.
11101110
repeated float query_vector = 3;

pkg/sql/opt/exec/execbuilder/relational.go

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3949,25 +3949,6 @@ func (b *Builder) buildVectorSearch(
39493949
"vector search output column %d is not a primary key column", col)
39503950
}
39513951
}
3952-
// Evaluate the prefix expressions.
3953-
var prefixKey constraint.Key
3954-
if len(search.PrefixVals) > 0 {
3955-
values := make([]tree.Datum, len(search.PrefixVals))
3956-
for i, expr := range search.PrefixVals {
3957-
// The expression is either a placeholder or a constant.
3958-
if p, ok := expr.(*memo.PlaceholderExpr); ok {
3959-
val, err := eval.Expr(b.ctx, b.evalCtx, p.Value)
3960-
if err != nil {
3961-
return execPlan{}, colOrdMap{}, err
3962-
}
3963-
values[i] = val
3964-
} else {
3965-
values[i] = memo.ExtractConstDatum(expr)
3966-
}
3967-
}
3968-
prefixKey = constraint.MakeCompositeKey(values...)
3969-
}
3970-
39713952
outColOrds, outColMap := b.getColumns(search.Cols, search.Table)
39723953
ctx := buildScalarCtx{}
39733954
queryVector, err := b.buildScalar(&ctx, search.QueryVector)
@@ -3978,7 +3959,7 @@ func (b *Builder) buildVectorSearch(
39783959

39793960
var res execPlan
39803961
res.root, err = b.factory.ConstructVectorSearch(
3981-
table, index, outColOrds, prefixKey, queryVector, targetNeighborCount,
3962+
table, index, outColOrds, search.PrefixConstraint, queryVector, targetNeighborCount,
39823963
)
39833964
if err != nil {
39843965
return execPlan{}, colOrdMap{}, err

pkg/sql/opt/exec/execbuilder/testdata/not_visible_index

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -576,8 +576,10 @@ vectorized: false
576576
577577
└── • render
578578
579-
└── • index join
579+
└── • lookup join
580580
│ table: t1@t1_pkey
581+
│ equality: (rowid) = (rowid)
582+
│ equality cols are key
581583
582584
└── • vector search
583585
table: t1@idx_vec_visible

pkg/sql/opt/exec/execbuilder/testdata/vector_search

Lines changed: 62 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@ vectorized: false
3232
3333
└── • render
3434
35-
└── • index join
35+
└── • lookup join
3636
│ table: t@t_pkey
37+
│ equality: (k) = (k)
38+
│ equality cols are key
3739
3840
└── • vector search
3941
table: t@t_v_idx
@@ -60,11 +62,12 @@ vectorized: false
6062
│ render k: k
6163
│ render v: v
6264
63-
└── • index join
65+
└── • lookup join (inner)
6466
│ columns: (k, v)
6567
│ estimated row count: 2 (missing stats)
6668
│ table: t@t_pkey
67-
│ key columns: k
69+
│ equality: (k) = (k)
70+
│ equality cols are key
6871
6972
└── • vector search
7073
columns: (k)
@@ -94,11 +97,12 @@ vectorized: false
9497
│ render k: k
9598
│ render v: v
9699
97-
└── • index join
100+
└── • lookup join (inner)
98101
│ columns: (k, v)
99102
│ estimated row count: 10 (missing stats)
100103
│ table: t@t_pkey
101-
│ key columns: k
104+
│ equality: (k) = (k)
105+
│ equality cols are key
102106
103107
└── • vector search
104108
columns: (k)
@@ -107,12 +111,11 @@ vectorized: false
107111
target count: 5
108112
query vector: '[1,2,3]'
109113

110-
# NOTE: the vector-search rule doesn't yet handle prefix columns.
111114
query T
112115
EXPLAIN (VERBOSE) SELECT * FROM t_multi WHERE a = 1 AND b = 2 ORDER BY v <-> '[1, 2, 3]' LIMIT 1;
113116
----
114117
distribution: local
115-
vectorized: true
118+
vectorized: false
116119
·
117120
• project
118121
│ columns: (x, y, a, b, c, v)
@@ -133,16 +136,60 @@ vectorized: true
133136
│ render c: c
134137
│ render v: v
135138
136-
└── • filter
139+
└── • lookup join (inner)
137140
│ columns: (x, y, a, b, c, v)
138-
│ estimated row count: 1 (missing stats)
139-
│ filter: (a = 1) AND (b = 2)
141+
│ estimated row count: 2 (missing stats)
142+
│ table: t_multi@t_multi_pkey
143+
│ equality: (x, y) = (x, y)
144+
│ equality cols are key
140145
141-
└── • scan
142-
columns: (x, y, a, b, c, v)
143-
estimated row count: 1,000 (missing stats)
144-
table: t_multi@t_multi_pkey
145-
spans: FULL SCAN
146+
└── • vector search
147+
columns: (x, y)
148+
estimated row count: 2 (missing stats)
149+
table: t_multi@t_multi_a_b_v_idx
150+
target count: 1
151+
prefix spans: /1/2-/1/3
152+
query vector: '[1,2,3]'
153+
154+
query T
155+
EXPLAIN (VERBOSE) SELECT * FROM t_multi WHERE (a, b) IN ((1, 2), (3, 4), (5, 6)) ORDER BY v <-> '[1, 2, 3]' LIMIT 1;
156+
----
157+
distribution: local
158+
vectorized: false
159+
·
160+
• project
161+
│ columns: (x, y, a, b, c, v)
162+
163+
└── • top-k
164+
│ columns: (column11, x, y, a, b, c, v)
165+
│ estimated row count: 1 (missing stats)
166+
│ order: +column11
167+
│ k: 1
168+
169+
└── • render
170+
│ columns: (column11, x, y, a, b, c, v)
171+
│ render column11: v <-> '[1,2,3]'
172+
│ render x: x
173+
│ render y: y
174+
│ render a: a
175+
│ render b: b
176+
│ render c: c
177+
│ render v: v
178+
179+
└── • lookup join (inner)
180+
│ columns: (x, y, a, b, c, v)
181+
│ estimated row count: 2 (missing stats)
182+
│ table: t_multi@t_multi_pkey
183+
│ equality: (x, y) = (x, y)
184+
│ equality cols are key
185+
186+
└── • vector search
187+
columns: (x, y)
188+
estimated row count: 2 (missing stats)
189+
table: t_multi@t_multi_a_b_v_idx
190+
target count: 1
191+
prefix spans: /1/2-/1/3 /3/4-/3/5 /5/6-/5/7
192+
query vector: '[1,2,3]'
146193

147194
# ==============================================================================
148195
# Vector Mutation Search Tests

pkg/sql/opt/exec/explain/emit.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -944,10 +944,15 @@ func (e *emitter) emitNodeAttributes(ctx context.Context, evalCtx *eval.Context,
944944
a := n.args.(*vectorSearchArgs)
945945
e.emitTableAndIndex("table", a.Table, a.Index, "" /* suffix */)
946946
ob.Attr("target count", a.TargetNeighborCount)
947-
if ob.flags.Verbose {
948-
if !a.PrefixKey.IsEmpty() {
949-
ob.Attr("prefix key", a.PrefixKey)
947+
if a.PrefixConstraint != nil {
948+
params := exec.ScanParams{
949+
NeededCols: a.OutCols,
950+
IndexConstraint: a.PrefixConstraint,
950951
}
952+
e.emitSpans("prefix spans", a.Table, a.Index, params)
953+
}
954+
if ob.flags.Verbose {
955+
// Vectors can have many dimensions, so don't print them unless verbose.
951956
ob.Expr("query vector", a.QueryVector, nil /* varColumns */)
952957
}
953958

pkg/sql/opt/exec/factory.opt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -860,7 +860,7 @@ define VectorSearch {
860860
Table cat.Table
861861
Index cat.Index
862862
OutCols exec.TableColumnOrdinalSet
863-
PrefixKey constraint.Key
863+
PrefixConstraint *constraint.Constraint
864864
QueryVector tree.TypedExpr
865865
TargetNeighborCount uint64
866866
}

pkg/sql/opt/idxconstraint/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ go_library(
77
visibility = ["//visibility:public"],
88
deps = [
99
"//pkg/sql/opt",
10+
"//pkg/sql/opt/cat",
1011
"//pkg/sql/opt/constraint",
1112
"//pkg/sql/opt/memo",
1213
"//pkg/sql/opt/norm",

0 commit comments

Comments
 (0)