Skip to content

Commit 7f3eb70

Browse files
craig[bot]DrewKimball
andcommitted
Merge #144536
144536: opt: allow additional projections in vector search rule r=andy-kimball a=DrewKimball This commit extends the `GenerateVectorSearch` exploration rule to allow additional projections beyond the distance expression. Since we already add a lookup-join against the primary index to fetch any needed columns, the change is to just pass in the original projections instead of re-constructing the distance expression. Fixes #143694 Release note (performance improvement): The vector search optimizer rule now supports additional projections beyond the distance column, including the implicit projections added for virtual columns. Co-authored-by: Drew Kimball <[email protected]>
2 parents b6ae107 + 89d35b9 commit 7f3eb70

File tree

3 files changed

+63
-26
lines changed

3 files changed

+63
-26
lines changed

pkg/sql/opt/xform/limit_funcs.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -468,9 +468,11 @@ func (c *CustomFuncs) TryGenerateVectorSearch(
468468
scanExpr *memo.ScanExpr,
469469
filters memo.FiltersExpr,
470470
passthrough opt.ColSet,
471-
vectorCol, distanceCol opt.ColumnID,
472-
distanceExpr, queryVector opt.ScalarExpr,
471+
vectorCol opt.ColumnID,
472+
queryVector opt.ScalarExpr,
473+
projections memo.ProjectionsExpr,
473474
limit tree.Datum,
475+
limitOrd props.OrderingChoice,
474476
) {
475477
sp := &scanExpr.ScanPrivate
476478

@@ -552,15 +554,13 @@ func (c *CustomFuncs) TryGenerateVectorSearch(
552554
}
553555
vectorSearch = c.e.f.ConstructLookupJoin(vectorSearch, nil /* on */, lookupPrivate)
554556

555-
// Project the distance column.
556-
projections := memo.ProjectionsExpr{c.e.f.ConstructProjectionsItem(distanceExpr, distanceCol)}
557+
// Add back the projections, including the distance column.
557558
vectorSearch = c.e.f.ConstructProject(vectorSearch, projections, passthrough)
558559

559560
// Build a top-k operator ordering by the distance column and limited by the
560-
// NN count to obtain the final result.
561-
var ord props.OrderingChoice
562-
ord.AppendCol(distanceCol, false /* descending */)
563-
topKPrivate := memo.TopKPrivate{K: limitInt, Ordering: ord}
561+
// NN count to obtain the final result. We verified when the rule matched
562+
// that the limit is ordering by the distance column.
563+
topKPrivate := memo.TopKPrivate{K: limitInt, Ordering: limitOrd}
564564
c.e.mem.AddTopKToGroup(&memo.TopKExpr{Input: vectorSearch, TopKPrivate: topKPrivate}, grp)
565565
})
566566
}

pkg/sql/opt/xform/rules/limit.opt

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -308,14 +308,16 @@
308308
$ok
309309
)
310310
$projections:[
311+
...
311312
(ProjectionsItem
312-
$distanceExpr:(VectorDistance
313+
(VectorDistance
313314
(Variable $vectorCol:*) &
314315
(IsFixedWidthVectorCol $vectorCol)
315316
$queryVector:(Const | Placeholder)
316317
)
317318
$distanceCol:*
318319
)
320+
...
319321
]
320322
$passthrough:*
321323
) &
@@ -329,8 +331,8 @@
329331
$filters
330332
$passthrough
331333
$vectorCol
332-
$distanceCol
333-
$distanceExpr
334334
$queryVector
335+
$projections
335336
$limit
337+
$ordering
336338
)

pkg/sql/opt/xform/testdata/rules/limit

Lines changed: 50 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3620,6 +3620,42 @@ top-k
36203620
└── projections
36213621
└── vec1:9 <-> '[3,1,2]' [as=column15:15, outer=(9), immutable]
36223622

3623+
# Case with additional projected expressions.
3624+
opt expect=GenerateVectorSearch
3625+
SELECT id, vec1, latitude*longitude, data1*data2 FROM index_tab
3626+
ORDER BY vec1 <-> '[3,1,2]' LIMIT 5;
3627+
----
3628+
top-k
3629+
├── columns: id:1!null vec1:9 "?column?":15 "?column?":16!null [hidden: column17:17]
3630+
├── internal-ordering: +17
3631+
├── k: 5
3632+
├── cardinality: [0 - 5]
3633+
├── immutable
3634+
├── key: (1)
3635+
├── fd: (1)-->(9,15-17), (9)-->(17)
3636+
├── ordering: +17
3637+
└── project
3638+
├── columns: "?column?":15 "?column?":16!null column17:17 id:1!null vec1:9
3639+
├── immutable
3640+
├── key: (1)
3641+
├── fd: (1)-->(9,15-17), (9)-->(17)
3642+
├── inner-join (lookup index_tab)
3643+
│ ├── columns: id:1!null latitude:4 longitude:5 data1:6!null data2:7!null vec1:9
3644+
│ ├── key columns: [1] = [1]
3645+
│ ├── lookup columns are key
3646+
│ ├── key: (1)
3647+
│ ├── fd: (1)-->(4-7,9)
3648+
│ ├── vector-search index_tab@index_tab_vec1_idx,vector
3649+
│ │ ├── columns: id:1!null
3650+
│ │ ├── target nearest neighbors: 5
3651+
│ │ ├── key: (1)
3652+
│ │ └── '[3,1,2]'
3653+
│ └── filters (true)
3654+
└── projections
3655+
├── latitude:4 * longitude:5 [as="?column?":15, outer=(4,5), immutable]
3656+
├── data1:6 * data2:7 [as="?column?":16, outer=(6,7), immutable]
3657+
└── vec1:9 <-> '[3,1,2]' [as=column17:17, outer=(9), immutable]
3658+
36233659
# Case with prefix columns.
36243660
opt expect=GenerateVectorSearch
36253661
SELECT id, val, vec1 FROM index_tab WHERE data1 = 1 AND data2 = 2 ORDER BY vec1 <-> '[3,1,2]' LIMIT 5;
@@ -3783,8 +3819,7 @@ top-k
37833819
└── projections
37843820
└── vec:5 <-> '[3,1,2]' [as=column9:9, outer=(5), immutable]
37853821

3786-
# Currently, the extra projection for col4 prevents the vector search.
3787-
opt expect-not=GenerateVectorSearch
3822+
opt expect=GenerateVectorSearch
37883823
SELECT * FROM index_with_computed_tab WHERE col3 = 5 ORDER BY vec <-> '[3,1,2]' LIMIT 5;
37893824
----
37903825
top-k
@@ -3798,20 +3833,20 @@ top-k
37983833
└── project
37993834
├── columns: column9:9 col4:4!null col1:1!null col2:2!null col3:3!null vec:5
38003835
├── immutable
3801-
├── fd: ()-->(3), (5)-->(9), (1,2)-->(4)
3802-
├── select
3836+
├── fd: (1,2)-->(3,4), (5)-->(9)
3837+
├── inner-join (lookup index_with_computed_tab)
38033838
│ ├── columns: col1:1!null col2:2!null col3:3!null vec:5
3804-
│ ├── fd: ()-->(3)
3805-
│ ├── scan index_with_computed_tab
3806-
├── columns: col1:1!null col2:2!null col3:3!null vec:5
3807-
├── computed column expressions
3808-
│ │ ├── col3:3
3809-
│ │ │ │ └── col1:1 + col2:2
3810-
│ │ │ └── col4:4
3811-
│ │ │ └── col1:1 * col2:2
3812-
│ │ ── fd: (1,2)-->(3)
3813-
│ └── filters
3814-
└── col3:3 = 5 [outer=(3), constraints=(/3: [/5 - /5]; tight), fd=()-->(3)]
3839+
│ ├── key columns: [6] = [6]
3840+
│ ├── lookup columns are key
3841+
│ ├── fd: (1,2)-->(3)
3842+
│ ├── vector-search index_with_computed_tab@index_with_computed_tab_col3_vec_idx,vector
3843+
│ │ ├── columns: rowid:6!null
3844+
│ │ ── target nearest neighbors: 5
3845+
│ │ ── prefix constraint: /3
3846+
│ │ │ └── [/5 - /5]
3847+
│ │ ── key: (6)
3848+
└── '[3,1,2]'
3849+
│ └── filters (true)
38153850
└── projections
38163851
├── vec:5 <-> '[3,1,2]' [as=column9:9, outer=(5), immutable]
38173852
└── col1:1 * col2:2 [as=col4:4, outer=(1,2), immutable]

0 commit comments

Comments
 (0)