Skip to content

Commit 497e52d

Browse files
committed
opt: add support for prefix columns to vector search rule
This commit adds support for prefix columns to the `GenerateVectorSearch` optimizer rule. Select filters over the matched scan as well as check constraint filters can be used if they constrain every prefix column to single-key spans, like the following examples: ``` prefix1 = 1 AND prefix2 = 2 (prefix1, prefix2) IN ((1, 2), (3, 4)) ``` Currently, additional filters beyond those used to constrain the prefix are not yet supported. This will have to wait until the vector index is able to stream results in approximate order of distance. Fixes #143206 Release note: None
1 parent 808258a commit 497e52d

File tree

10 files changed

+612
-142
lines changed

10 files changed

+612
-142
lines changed

pkg/sql/opt/exec/execbuilder/testdata/vector_search

Lines changed: 53 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -111,12 +111,11 @@ vectorized: false
111111
target count: 5
112112
query vector: '[1,2,3]'
113113

114-
# NOTE: the vector-search rule doesn't yet handle prefix columns.
115114
query T
116115
EXPLAIN (VERBOSE) SELECT * FROM t_multi WHERE a = 1 AND b = 2 ORDER BY v <-> '[1, 2, 3]' LIMIT 1;
117116
----
118117
distribution: local
119-
vectorized: true
118+
vectorized: false
120119
·
121120
• project
122121
│ columns: (x, y, a, b, c, v)
@@ -137,16 +136,60 @@ vectorized: true
137136
│ render c: c
138137
│ render v: v
139138
140-
└── • filter
139+
└── • lookup join (inner)
141140
│ columns: (x, y, a, b, c, v)
142-
│ estimated row count: 1 (missing stats)
143-
│ filter: (a = 1) AND (b = 2)
141+
│ estimated row count: 2 (missing stats)
142+
│ table: t_multi@t_multi_pkey
143+
│ equality: (x, y) = (x, y)
144+
│ equality cols are key
144145
145-
└── • scan
146-
columns: (x, y, a, b, c, v)
147-
estimated row count: 1,000 (missing stats)
148-
table: t_multi@t_multi_pkey
149-
spans: FULL SCAN
146+
└── • vector search
147+
columns: (x, y)
148+
estimated row count: 2 (missing stats)
149+
table: t_multi@t_multi_a_b_v_idx
150+
target count: 1
151+
prefix spans: /1/2-/1/3
152+
query vector: '[1,2,3]'
153+
154+
query T
155+
EXPLAIN (VERBOSE) SELECT * FROM t_multi WHERE (a, b) IN ((1, 2), (3, 4), (5, 6)) ORDER BY v <-> '[1, 2, 3]' LIMIT 1;
156+
----
157+
distribution: local
158+
vectorized: false
159+
·
160+
• project
161+
│ columns: (x, y, a, b, c, v)
162+
163+
└── • top-k
164+
│ columns: (column11, x, y, a, b, c, v)
165+
│ estimated row count: 1 (missing stats)
166+
│ order: +column11
167+
│ k: 1
168+
169+
└── • render
170+
│ columns: (column11, x, y, a, b, c, v)
171+
│ render column11: v <-> '[1,2,3]'
172+
│ render x: x
173+
│ render y: y
174+
│ render a: a
175+
│ render b: b
176+
│ render c: c
177+
│ render v: v
178+
179+
└── • lookup join (inner)
180+
│ columns: (x, y, a, b, c, v)
181+
│ estimated row count: 2 (missing stats)
182+
│ table: t_multi@t_multi_pkey
183+
│ equality: (x, y) = (x, y)
184+
│ equality cols are key
185+
186+
└── • vector search
187+
columns: (x, y)
188+
estimated row count: 2 (missing stats)
189+
table: t_multi@t_multi_a_b_v_idx
190+
target count: 1
191+
prefix spans: /1/2-/1/3 /3/4-/3/5 /5/6-/5/7
192+
query vector: '[1,2,3]'
150193

151194
# ==============================================================================
152195
# Vector Mutation Search Tests

pkg/sql/opt/idxconstraint/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ go_library(
77
visibility = ["//visibility:public"],
88
deps = [
99
"//pkg/sql/opt",
10+
"//pkg/sql/opt/cat",
1011
"//pkg/sql/opt/constraint",
1112
"//pkg/sql/opt/memo",
1213
"//pkg/sql/opt/norm",

pkg/sql/opt/idxconstraint/index_constraints.go

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"strings"
1212

1313
"github.com/cockroachdb/cockroach/pkg/sql/opt"
14+
"github.com/cockroachdb/cockroach/pkg/sql/opt/cat"
1415
"github.com/cockroachdb/cockroach/pkg/sql/opt/constraint"
1516
"github.com/cockroachdb/cockroach/pkg/sql/opt/memo"
1617
"github.com/cockroachdb/cockroach/pkg/sql/opt/norm"
@@ -1333,3 +1334,88 @@ func (c *indexConstraintCtx) computedColInSuffix(offset int) bool {
13331334
}
13341335
return false
13351336
}
1337+
1338+
// IndexPrefixCols returns a slice of ordering columns for each of the prefix
1339+
// columns of the inverted or vector index. It also returns a set of those
1340+
// columns that are NOT NULL. If the index is a single-column inverted index,
1341+
// the function returns nil ordering columns.
1342+
func IndexPrefixCols(
1343+
tabID opt.TableID, index cat.Index,
1344+
) (_ []opt.OrderingColumn, notNullCols opt.ColSet) {
1345+
prefixColumnCount := index.PrefixColumnCount()
1346+
1347+
// If this is a single-column inverted/vector index, there are no prefix
1348+
// columns.
1349+
if prefixColumnCount == 0 {
1350+
return nil, opt.ColSet{}
1351+
}
1352+
1353+
prefixColumns := make([]opt.OrderingColumn, prefixColumnCount)
1354+
for i := range prefixColumns {
1355+
col := index.Column(i)
1356+
colID := tabID.ColumnID(col.Ordinal())
1357+
prefixColumns[i] = opt.MakeOrderingColumn(colID, col.Descending)
1358+
if !col.IsNullable() {
1359+
notNullCols.Add(colID)
1360+
}
1361+
}
1362+
return prefixColumns, notNullCols
1363+
}
1364+
1365+
// ConstrainIndexPrefixCols attempts to build a constraint for the prefix
1366+
// columns of the given inverted or vector index. If a constraint is
1367+
// successfully built, it is returned along with remaining filters and ok=true.
1368+
// The function is only successful if it can generate a constraint where all
1369+
// spans have the same start and end keys for all prefix columns. This is
1370+
// required for building spans for scanning multi-column inverted/vector indexes
1371+
// (see span.Builder.SpansFromInvertedSpans).
1372+
func ConstrainIndexPrefixCols(
1373+
ctx context.Context,
1374+
evalCtx *eval.Context,
1375+
factory *norm.Factory,
1376+
columns []opt.OrderingColumn,
1377+
notNullCols opt.ColSet,
1378+
filters memo.FiltersExpr,
1379+
optionalFilters memo.FiltersExpr,
1380+
tabID opt.TableID,
1381+
index cat.Index,
1382+
checkCancellation func(),
1383+
) (_ *constraint.Constraint, remainingFilters memo.FiltersExpr, ok bool) {
1384+
tabMeta := factory.Metadata().TableMeta(tabID)
1385+
prefixColumnCount := index.PrefixColumnCount()
1386+
ps := tabMeta.IndexPartitionLocality(index.Ordinal())
1387+
1388+
// Consolidation of a constraint converts contiguous spans into a single
1389+
// span. By definition, the consolidated span would have different start and
1390+
// end keys and could not be used for multi-column inverted index scans.
1391+
// Therefore, we only generate and check the unconsolidated constraint,
1392+
// allowing the optimizer to plan multi-column inverted/vector index scans in
1393+
// more cases.
1394+
//
1395+
// For example, the consolidated constraint for (x IN (1, 2, 3)) is:
1396+
//
1397+
// /x: [/1 - /3]
1398+
// Prefix: 0
1399+
//
1400+
// The unconsolidated constraint for the same expression is:
1401+
//
1402+
// /x: [/1 - /1] [/2 - /2] [/3 - /3]
1403+
// Prefix: 1
1404+
//
1405+
var ic Instance
1406+
ic.Init(
1407+
ctx, filters, optionalFilters,
1408+
columns, notNullCols, tabMeta.ComputedCols,
1409+
tabMeta.ColsInComputedColsExpressions,
1410+
false, /* consolidate */
1411+
evalCtx, factory, ps, checkCancellation,
1412+
)
1413+
var c constraint.Constraint
1414+
ic.UnconsolidatedConstraint(&c)
1415+
if c.Prefix(ctx, evalCtx) != prefixColumnCount {
1416+
// The prefix columns must be constrained to single values.
1417+
return nil, nil, false
1418+
}
1419+
1420+
return &c, ic.RemainingFilters(), true
1421+
}

pkg/sql/opt/invertedidx/inverted_index_expr.go

Lines changed: 3 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,9 @@ func TryFilterInvertedIndex(
9090
) {
9191
// Attempt to constrain the prefix columns, if there are any. If they cannot
9292
// be constrained to single values, the index cannot be used.
93-
columns, notNullCols := prefixCols(tabID, index)
93+
columns, notNullCols := idxconstraint.IndexPrefixCols(tabID, index)
9494
if len(columns) > 0 {
95-
constraint, filters, ok = constrainNonInvertedCols(
95+
constraint, filters, ok = idxconstraint.ConstrainIndexPrefixCols(
9696
ctx, evalCtx, factory, columns, notNullCols, filters,
9797
optionalFilters, tabID, index, checkCancellation,
9898
)
@@ -299,7 +299,7 @@ func TryFilterInvertedIndexBySimilarity(
299299
// If the index is a multi-column index, then we need to constrain the
300300
// prefix columns.
301301
var prefixConstraint *constraint.Constraint
302-
prefixConstraint, remainingFilters, ok = constrainNonInvertedCols(
302+
prefixConstraint, remainingFilters, ok = idxconstraint.ConstrainIndexPrefixCols(
303303
ctx, evalCtx, f, cols, notNullCols, filters,
304304
optionalFilters, tabID, index, checkCancellation,
305305
)
@@ -634,91 +634,6 @@ func evalInvertedExpr(
634634
}
635635
}
636636

637-
// prefixCols returns a slice of ordering columns for each of the non-inverted
638-
// prefix of the index. It also returns a set of those columns that are NOT
639-
// NULL. If the index is a single-column inverted index, the function returns
640-
// nil ordering columns.
641-
func prefixCols(
642-
tabID opt.TableID, index cat.Index,
643-
) (_ []opt.OrderingColumn, notNullCols opt.ColSet) {
644-
prefixColumnCount := index.PrefixColumnCount()
645-
646-
// If this is a single-column inverted index, there are no prefix columns.
647-
// constrain.
648-
if prefixColumnCount == 0 {
649-
return nil, opt.ColSet{}
650-
}
651-
652-
prefixColumns := make([]opt.OrderingColumn, prefixColumnCount)
653-
for i := range prefixColumns {
654-
col := index.Column(i)
655-
colID := tabID.ColumnID(col.Ordinal())
656-
prefixColumns[i] = opt.MakeOrderingColumn(colID, col.Descending)
657-
if !col.IsNullable() {
658-
notNullCols.Add(colID)
659-
}
660-
}
661-
return prefixColumns, notNullCols
662-
}
663-
664-
// constrainNonInvertedCols attempts to build a constraint for the non-inverted
665-
// prefix columns of the given index. If a constraint is successfully built, it
666-
// is returned along with remaining filters and ok=true. The function is only
667-
// successful if it can generate a constraint where all spans have the same
668-
// start and end keys for all non-inverted prefix columns. This is required for
669-
// building spans for scanning multi-column inverted indexes (see
670-
// span.Builder.SpansFromInvertedSpans).
671-
func constrainNonInvertedCols(
672-
ctx context.Context,
673-
evalCtx *eval.Context,
674-
factory *norm.Factory,
675-
columns []opt.OrderingColumn,
676-
notNullCols opt.ColSet,
677-
filters memo.FiltersExpr,
678-
optionalFilters memo.FiltersExpr,
679-
tabID opt.TableID,
680-
index cat.Index,
681-
checkCancellation func(),
682-
) (_ *constraint.Constraint, remainingFilters memo.FiltersExpr, ok bool) {
683-
tabMeta := factory.Metadata().TableMeta(tabID)
684-
prefixColumnCount := index.PrefixColumnCount()
685-
ps := tabMeta.IndexPartitionLocality(index.Ordinal())
686-
687-
// Consolidation of a constraint converts contiguous spans into a single
688-
// span. By definition, the consolidated span would have different start and
689-
// end keys and could not be used for multi-column inverted index scans.
690-
// Therefore, we only generate and check the unconsolidated constraint,
691-
// allowing the optimizer to plan multi-column inverted index scans in more
692-
// cases.
693-
//
694-
// For example, the consolidated constraint for (x IN (1, 2, 3)) is:
695-
//
696-
// /x: [/1 - /3]
697-
// Prefix: 0
698-
//
699-
// The unconsolidated constraint for the same expression is:
700-
//
701-
// /x: [/1 - /1] [/2 - /2] [/3 - /3]
702-
// Prefix: 1
703-
//
704-
var ic idxconstraint.Instance
705-
ic.Init(
706-
ctx, filters, optionalFilters,
707-
columns, notNullCols, tabMeta.ComputedCols,
708-
tabMeta.ColsInComputedColsExpressions,
709-
false, /* consolidate */
710-
evalCtx, factory, ps, checkCancellation,
711-
)
712-
var c constraint.Constraint
713-
ic.UnconsolidatedConstraint(&c)
714-
if c.Prefix(ctx, evalCtx) != prefixColumnCount {
715-
// The prefix columns must be constrained to single values.
716-
return nil, nil, false
717-
}
718-
719-
return &c, ic.RemainingFilters(), true
720-
}
721-
722637
type invertedFilterPlanner interface {
723638
// extractInvertedFilterConditionFromLeaf extracts an inverted filter
724639
// condition from the given expression, which represents a leaf of an

pkg/sql/opt/xform/general_funcs.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -681,3 +681,27 @@ func (c *CustomFuncs) getKnownScanConstraint(
681681
instance.Constraint(&cons)
682682
return &cons, !cons.IsUnconstrained()
683683
}
684+
685+
// GetFilteredCanonicalScan looks at a *ScanExpr or *SelectExpr "relation" and
686+
// returns the input *ScanExpr and FiltersExpr, along with ok=true, if the Scan
687+
// is a canonical scan. If "relation" is a different type, or if it's a
688+
// *SelectExpr with an Input other than a *ScanExpr, ok=false is returned. Scans
689+
// or Selects with no filters may return filters as nil.
690+
func (c *CustomFuncs) GetFilteredCanonicalScan(
691+
relation memo.RelExpr,
692+
) (scanExpr *memo.ScanExpr, filters memo.FiltersExpr, ok bool) {
693+
var selectExpr *memo.SelectExpr
694+
if selectExpr, ok = relation.(*memo.SelectExpr); ok {
695+
if scanExpr, ok = selectExpr.Input.(*memo.ScanExpr); !ok {
696+
return nil, nil, false
697+
}
698+
filters = selectExpr.Filters
699+
} else if scanExpr, ok = relation.(*memo.ScanExpr); !ok {
700+
return nil, nil, false
701+
}
702+
scanPrivate := &scanExpr.ScanPrivate
703+
if !c.IsCanonicalScan(scanPrivate) {
704+
return nil, nil, false
705+
}
706+
return scanExpr, filters, true
707+
}

0 commit comments

Comments
 (0)