Skip to content

Commit 5cb0795

Browse files
committed
opt: use lookup-join instead of index-join for vector search
Index joins expect all input rows to find a match in the primary index, excluding locked rows when in skip locked mode. Vector indexes can have "dangling" vectors after a delete operation fails to locate a vector in the index. Accordingly, this commit changes the vector-search optimizer rule to use a lookup-join instead of an index-join to avoid violating assumptions made elsewhere in the optimizer. Epic: CRDB-42943 Release note: None
1 parent 59dedfe commit 5cb0795

File tree

5 files changed

+124
-75
lines changed

5 files changed

+124
-75
lines changed

pkg/sql/opt/exec/execbuilder/testdata/not_visible_index

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -576,8 +576,10 @@ vectorized: false
576576
577577
└── • render
578578
579-
└── • index join
579+
└── • lookup join
580580
│ table: t1@t1_pkey
581+
│ equality: (rowid) = (rowid)
582+
│ equality cols are key
581583
582584
└── • vector search
583585
table: t1@idx_vec_visible

pkg/sql/opt/exec/execbuilder/testdata/vector_search

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@ vectorized: false
3232
3333
└── • render
3434
35-
└── • index join
35+
└── • lookup join
3636
│ table: t@t_pkey
37+
│ equality: (k) = (k)
38+
│ equality cols are key
3739
3840
└── • vector search
3941
table: t@t_v_idx
@@ -60,11 +62,12 @@ vectorized: false
6062
│ render k: k
6163
│ render v: v
6264
63-
└── • index join
65+
└── • lookup join (inner)
6466
│ columns: (k, v)
6567
│ estimated row count: 2 (missing stats)
6668
│ table: t@t_pkey
67-
│ key columns: k
69+
│ equality: (k) = (k)
70+
│ equality cols are key
6871
6972
└── • vector search
7073
columns: (k)
@@ -94,11 +97,12 @@ vectorized: false
9497
│ render k: k
9598
│ render v: v
9699
97-
└── • index join
100+
└── • lookup join (inner)
98101
│ columns: (k, v)
99102
│ estimated row count: 10 (missing stats)
100103
│ table: t@t_pkey
101-
│ key columns: k
104+
│ equality: (k) = (k)
105+
│ equality cols are key
102106
103107
└── • vector search
104108
columns: (k)

pkg/sql/opt/memo/testdata/stats/vector-search

Lines changed: 30 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -107,17 +107,20 @@ top-k
107107
├── stats: [rows=2]
108108
├── key: (1)
109109
├── fd: (1)-->(2), (2)-->(5)
110-
├── index-join t
110+
├── inner-join (lookup t)
111111
│ ├── columns: x:1(int!null) v:2(vector)
112+
│ ├── key columns: [1] = [1]
113+
│ ├── lookup columns are key
112114
│ ├── stats: [rows=2]
113115
│ ├── key: (1)
114116
│ ├── fd: (1)-->(2)
115-
│ └── vector-search t@t_v_idx,vector
116-
│ ├── columns: x:1(int!null)
117-
│ ├── target nearest neighbors: 1
118-
│ ├── stats: [rows=2]
119-
│ ├── key: (1)
120-
│ └── '[1,2,3]' [type=vector]
117+
│ ├── vector-search t@t_v_idx,vector
118+
│ │ ├── columns: x:1(int!null)
119+
│ │ ├── target nearest neighbors: 1
120+
│ │ ├── stats: [rows=2]
121+
│ │ ├── key: (1)
122+
│ │ └── '[1,2,3]' [type=vector]
123+
│ └── filters (true)
121124
└── projections
122125
└── v:2 <-> '[1,2,3]' [as=column5:5, type=float, outer=(2), immutable]
123126

@@ -140,17 +143,20 @@ top-k
140143
├── stats: [rows=10]
141144
├── key: (1)
142145
├── fd: (1)-->(2), (2)-->(5)
143-
├── index-join t
146+
├── inner-join (lookup t)
144147
│ ├── columns: x:1(int!null) v:2(vector)
148+
│ ├── key columns: [1] = [1]
149+
│ ├── lookup columns are key
145150
│ ├── stats: [rows=10]
146151
│ ├── key: (1)
147152
│ ├── fd: (1)-->(2)
148-
│ └── vector-search t@t_v_idx,vector
149-
│ ├── columns: x:1(int!null)
150-
│ ├── target nearest neighbors: 5
151-
│ ├── stats: [rows=10]
152-
│ ├── key: (1)
153-
│ └── '[1,2,3]' [type=vector]
153+
│ ├── vector-search t@t_v_idx,vector
154+
│ │ ├── columns: x:1(int!null)
155+
│ │ ├── target nearest neighbors: 5
156+
│ │ ├── stats: [rows=10]
157+
│ │ ├── key: (1)
158+
│ │ └── '[1,2,3]' [type=vector]
159+
│ └── filters (true)
154160
└── projections
155161
└── v:2 <-> '[1,2,3]' [as=column5:5, type=float, outer=(2), immutable]
156162

@@ -174,16 +180,19 @@ top-k
174180
├── stats: [rows=1000]
175181
├── key: (1)
176182
├── fd: (1)-->(2), (2)-->(5)
177-
├── index-join t
183+
├── inner-join (lookup t)
178184
│ ├── columns: x:1(int!null) v:2(vector)
185+
│ ├── key columns: [1] = [1]
186+
│ ├── lookup columns are key
179187
│ ├── stats: [rows=1000]
180188
│ ├── key: (1)
181189
│ ├── fd: (1)-->(2)
182-
│ └── vector-search t@t_v_idx,vector
183-
│ ├── columns: x:1(int!null)
184-
│ ├── target nearest neighbors: 10000
185-
│ ├── stats: [rows=1000]
186-
│ ├── key: (1)
187-
│ └── '[1,2,3]' [type=vector]
190+
│ ├── vector-search t@t_v_idx,vector
191+
│ │ ├── columns: x:1(int!null)
192+
│ │ ├── target nearest neighbors: 10000
193+
│ │ ├── stats: [rows=1000]
194+
│ │ ├── key: (1)
195+
│ │ └── '[1,2,3]' [type=vector]
196+
│ └── filters (true)
188197
└── projections
189198
└── v:2 <-> '[1,2,3]' [as=column5:5, type=float, outer=(2), immutable]

pkg/sql/opt/xform/limit_funcs.go

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -487,11 +487,24 @@ func (c *CustomFuncs) TryGenerateVectorSearch(
487487
TargetNeighborCount: limitInt,
488488
},
489489
)
490-
// Add an index join to get the rest of the columns. The index join is
491-
// always necessary because the vector column is not projected by the
492-
// VectorSearch operator.
493-
indexJoinPrivate := memo.IndexJoinPrivate{Table: sp.Table, Cols: sp.Cols}
494-
vectorSearch = c.e.f.ConstructIndexJoin(vectorSearch, &indexJoinPrivate)
490+
491+
// Add a lookup-join against the primary index to get the rest of the
492+
// columns. The primary-key join is always necessary because the vector
493+
// column is not projected by the VectorSearch operator.
494+
primaryIndex := c.e.mem.Metadata().Table(sp.Table).Index(cat.PrimaryIndex)
495+
lookupCols := make(opt.ColList, primaryIndex.KeyColumnCount())
496+
for i := 0; i < primaryIndex.KeyColumnCount(); i++ {
497+
lookupCols[i] = sp.Table.IndexColumnID(primaryIndex, i)
498+
}
499+
lookupPrivate := &memo.LookupJoinPrivate{
500+
JoinType: opt.InnerJoinOp,
501+
Table: sp.Table,
502+
Index: cat.PrimaryIndex,
503+
KeyCols: lookupCols,
504+
Cols: sp.Cols,
505+
LookupColsAreTableKey: true,
506+
}
507+
vectorSearch = c.e.f.ConstructLookupJoin(vectorSearch, nil /* on */, lookupPrivate)
495508

496509
// Project the distance column.
497510
projections := memo.ProjectionsExpr{c.e.f.ConstructProjectionsItem(distanceExpr, distanceCol)}

pkg/sql/opt/xform/testdata/rules/limit

Lines changed: 64 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -3505,15 +3505,18 @@ top-k
35053505
├── immutable
35063506
├── key: (1)
35073507
├── fd: (1)-->(2-11), (9)-->(15)
3508-
├── index-join index_tab
3508+
├── inner-join (lookup index_tab)
35093509
│ ├── columns: id:1!null val:2 region:3 latitude:4 longitude:5 data1:6!null data2:7!null geom:8 vec1:9 vec2:10 vec3:11
3510+
│ ├── key columns: [1] = [1]
3511+
│ ├── lookup columns are key
35103512
│ ├── key: (1)
35113513
│ ├── fd: (1)-->(2-11)
3512-
│ └── vector-search index_tab@index_tab_vec1_idx,vector
3513-
│ ├── columns: id:1!null
3514-
│ ├── target nearest neighbors: 1
3515-
│ ├── key: (1)
3516-
│ └── '[3,1,2]'
3514+
│ ├── vector-search index_tab@index_tab_vec1_idx,vector
3515+
│ │ ├── columns: id:1!null
3516+
│ │ ├── target nearest neighbors: 1
3517+
│ │ ├── key: (1)
3518+
│ │ └── '[3,1,2]'
3519+
│ └── filters (true)
35173520
└── projections
35183521
└── vec1:9 <-> '[3,1,2]' [as=column15:15, outer=(9), immutable]
35193522

@@ -3535,15 +3538,18 @@ top-k
35353538
├── immutable
35363539
├── key: (1)
35373540
├── fd: (1)-->(2-11), (9)-->(15)
3538-
├── index-join index_tab
3541+
├── inner-join (lookup index_tab)
35393542
│ ├── columns: id:1!null val:2 region:3 latitude:4 longitude:5 data1:6!null data2:7!null geom:8 vec1:9 vec2:10 vec3:11
3543+
│ ├── key columns: [1] = [1]
3544+
│ ├── lookup columns are key
35403545
│ ├── key: (1)
35413546
│ ├── fd: (1)-->(2-11)
3542-
│ └── vector-search index_tab@index_tab_vec1_idx,vector
3543-
│ ├── columns: id:1!null
3544-
│ ├── target nearest neighbors: 5
3545-
│ ├── key: (1)
3546-
│ └── '[3,1,2]'
3547+
│ ├── vector-search index_tab@index_tab_vec1_idx,vector
3548+
│ │ ├── columns: id:1!null
3549+
│ │ ├── target nearest neighbors: 5
3550+
│ │ ├── key: (1)
3551+
│ │ └── '[3,1,2]'
3552+
│ └── filters (true)
35473553
└── projections
35483554
└── vec1:9 <-> '[3,1,2]' [as=column15:15, outer=(9), immutable]
35493555

@@ -3565,15 +3571,18 @@ top-k
35653571
├── immutable
35663572
├── key: (1)
35673573
├── fd: (1)-->(2-11), (10)-->(15)
3568-
├── index-join index_tab
3574+
├── inner-join (lookup index_tab)
35693575
│ ├── columns: id:1!null val:2 region:3 latitude:4 longitude:5 data1:6!null data2:7!null geom:8 vec1:9 vec2:10 vec3:11
3576+
│ ├── key columns: [1] = [1]
3577+
│ ├── lookup columns are key
35703578
│ ├── key: (1)
35713579
│ ├── fd: (1)-->(2-11)
3572-
│ └── vector-search index_tab@index_tab_vec2_idx,vector
3573-
│ ├── columns: id:1!null
3574-
│ ├── target nearest neighbors: 5
3575-
│ ├── key: (1)
3576-
│ └── '[3,1,2]'
3580+
│ ├── vector-search index_tab@index_tab_vec2_idx,vector
3581+
│ │ ├── columns: id:1!null
3582+
│ │ ├── target nearest neighbors: 5
3583+
│ │ ├── key: (1)
3584+
│ │ └── '[3,1,2]'
3585+
│ └── filters (true)
35773586
└── projections
35783587
└── vec2:10 <-> '[3,1,2]' [as=column15:15, outer=(10), immutable]
35793588

@@ -3595,15 +3604,18 @@ top-k
35953604
├── immutable
35963605
├── key: (1)
35973606
├── fd: (1)-->(9), (9)-->(15)
3598-
├── index-join index_tab
3607+
├── inner-join (lookup index_tab)
35993608
│ ├── columns: id:1!null vec1:9
3609+
│ ├── key columns: [1] = [1]
3610+
│ ├── lookup columns are key
36003611
│ ├── key: (1)
36013612
│ ├── fd: (1)-->(9)
3602-
│ └── vector-search index_tab@index_tab_vec1_idx,vector
3603-
│ ├── columns: id:1!null
3604-
│ ├── target nearest neighbors: 5
3605-
│ ├── key: (1)
3606-
│ └── '[3,1,2]'
3613+
│ ├── vector-search index_tab@index_tab_vec1_idx,vector
3614+
│ │ ├── columns: id:1!null
3615+
│ │ ├── target nearest neighbors: 5
3616+
│ │ ├── key: (1)
3617+
│ │ └── '[3,1,2]'
3618+
│ └── filters (true)
36073619
└── projections
36083620
└── vec1:9 <-> '[3,1,2]' [as=column15:15, outer=(9), immutable]
36093621

@@ -3625,17 +3637,20 @@ top-k
36253637
├── immutable, has-placeholder
36263638
├── key: (1)
36273639
├── fd: (1)-->(2-11), (9)-->(15)
3628-
├── index-join index_tab
3640+
├── inner-join (lookup index_tab)
36293641
│ ├── columns: id:1!null val:2 region:3 latitude:4 longitude:5 data1:6!null data2:7!null geom:8 vec1:9 vec2:10 vec3:11
3642+
│ ├── key columns: [1] = [1]
3643+
│ ├── lookup columns are key
36303644
│ ├── has-placeholder
36313645
│ ├── key: (1)
36323646
│ ├── fd: (1)-->(2-11)
3633-
│ └── vector-search index_tab@index_tab_vec1_idx,vector
3634-
│ ├── columns: id:1!null
3635-
│ ├── target nearest neighbors: 5
3636-
│ ├── has-placeholder
3637-
│ ├── key: (1)
3638-
│ └── $1
3647+
│ ├── vector-search index_tab@index_tab_vec1_idx,vector
3648+
│ │ ├── columns: id:1!null
3649+
│ │ ├── target nearest neighbors: 5
3650+
│ │ ├── has-placeholder
3651+
│ │ ├── key: (1)
3652+
│ │ └── $1
3653+
│ └── filters (true)
36393654
└── projections
36403655
└── vec1:9 <-> $1 [as=column15:15, outer=(9), immutable]
36413656

@@ -3657,15 +3672,18 @@ top-k
36573672
├── immutable
36583673
├── key: (1)
36593674
├── fd: (1)-->(2-11), (10)-->(15)
3660-
├── index-join index_tab
3675+
├── inner-join (lookup index_tab)
36613676
│ ├── columns: id:1!null val:2 region:3 latitude:4 longitude:5 data1:6!null data2:7!null geom:8 vec1:9 vec2:10 vec3:11
3677+
│ ├── key columns: [1] = [1]
3678+
│ ├── lookup columns are key
36623679
│ ├── key: (1)
36633680
│ ├── fd: (1)-->(2-11)
3664-
│ └── vector-search index_tab@index_tab_vec2_idx,vector
3665-
│ ├── columns: id:1!null
3666-
│ ├── target nearest neighbors: 500
3667-
│ ├── key: (1)
3668-
│ └── '[3,1,2]'
3681+
│ ├── vector-search index_tab@index_tab_vec2_idx,vector
3682+
│ │ ├── columns: id:1!null
3683+
│ │ ├── target nearest neighbors: 500
3684+
│ │ ├── key: (1)
3685+
│ │ └── '[3,1,2]'
3686+
│ └── filters (true)
36693687
└── projections
36703688
└── vec2:10 <-> '[3,1,2]' [as=column15:15, outer=(10), immutable]
36713689

@@ -3903,12 +3921,15 @@ top-k
39033921
└── project
39043922
├── columns: column8:8 photo_id:2!null photo_url:3!null description:4
39053923
├── immutable
3906-
├── index-join image_embeddings
3924+
├── inner-join (lookup image_embeddings)
39073925
│ ├── columns: photo_id:2!null photo_url:3!null description:4 embedding:5!null
3908-
│ └── vector-search image_embeddings@image_embeddings_embedding_idx,vector
3909-
│ ├── columns: id:1!null
3910-
│ ├── target nearest neighbors: 5
3911-
│ ├── key: (1)
3912-
│ └── '[0.016068,-0.033417,-0.020309,-0.031494,-0.014076,0.03653]'
3926+
│ ├── key columns: [1] = [1]
3927+
│ ├── lookup columns are key
3928+
│ ├── vector-search image_embeddings@image_embeddings_embedding_idx,vector
3929+
│ │ ├── columns: id:1!null
3930+
│ │ ├── target nearest neighbors: 5
3931+
│ │ ├── key: (1)
3932+
│ │ └── '[0.016068,-0.033417,-0.020309,-0.031494,-0.014076,0.03653]'
3933+
│ └── filters (true)
39133934
└── projections
39143935
└── embedding:5 <-> '[0.016068,-0.033417,-0.020309,-0.031494,-0.014076,0.03653]' [as=column8:8, outer=(5), immutable]

0 commit comments

Comments
 (0)