Skip to content

Commit a0593d7

Browse files
committed
cspann: fix searcher bugs
Fix a couple of bugs in the tree searcher code found by the concurrency test: - When the tree has more than 5 levels, we were resizing the searcher levels slice, but without copying the root level to the resized slice. This caused an unexpected error during search. - The searcher logic was sometimes exiting too early, before it had considered all available parent results. This was causing deletions to sometimes miss finding vectors in the tree. Epic: CRDB-42943 Release note: None
1 parent 37c46db commit a0593d7

File tree

4 files changed

+35
-10
lines changed

4 files changed

+35
-10
lines changed

pkg/sql/vecindex/cspann/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ go_library(
3434
"//pkg/sql/vecindex/cspann/quantize",
3535
"//pkg/sql/vecindex/cspann/utils",
3636
"//pkg/sql/vecindex/cspann/workspace",
37+
"//pkg/util/buildutil",
3738
"//pkg/util/container/heap",
3839
"//pkg/util/log",
3940
"//pkg/util/num32",

pkg/sql/vecindex/cspann/index.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616

1717
"github.com/cockroachdb/cockroach/pkg/sql/vecindex/cspann/quantize"
1818
"github.com/cockroachdb/cockroach/pkg/sql/vecindex/cspann/utils"
19+
"github.com/cockroachdb/cockroach/pkg/util/buildutil"
1920
"github.com/cockroachdb/cockroach/pkg/util/log"
2021
"github.com/cockroachdb/cockroach/pkg/util/num32"
2122
"github.com/cockroachdb/cockroach/pkg/util/stop"
@@ -1047,8 +1048,12 @@ func ensureSliceCap[T any](s []T, c int) []T {
10471048
// ensureSliceLen returns a slice of the given length and generic type. If the
10481049
// existing slice has enough capacity, that slice is returned after adjusting
10491050
// its length. Otherwise, a new, larger slice is allocated.
1051+
// NOTE: Every element of the new slice is uninitialized; callers are
1052+
// responsible for initializing the memory.
10501053
func ensureSliceLen[T any](s []T, l int) []T {
1051-
if cap(s) < l {
1054+
// In test builds, always allocate new memory, to catch bugs where callers
1055+
// assume existing slice elements will be copied.
1056+
if cap(s) < l || buildutil.CrdbTestBuild {
10521057
return make([]T, l)
10531058
}
10541059
return s[:l]

pkg/sql/vecindex/cspann/searcher.go

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ func (s *searcher) Init(idx *Index, idxCtx *Context, searchSet *SearchSet) {
7272
func (s *searcher) Next(ctx context.Context) (ok bool, err error) {
7373
if len(s.levels) == 0 {
7474
s.levels = ensureSliceLen(s.levels, 1)
75-
root := &s.levels[0]
75+
root := &s.levelStorage[0]
7676
root.Init(s.idx, s.idxCtx, nil /* parent */, &s.searchSet.Stats)
7777

7878
// Return enough search results to ensure that:
@@ -105,6 +105,7 @@ func (s *searcher) Next(ctx context.Context) (ok bool, err error) {
105105
// Set up remainder of searchers now that we know the root's level.
106106
n := int(root.Level()-s.idxCtx.level) + 1
107107
s.levels = ensureSliceLen(s.levels, n)
108+
s.levels[0] = *root
108109
for i := 1; i < n; i++ {
109110
var maxResults, maxExtraResults int
110111
var matchKey KeyBytes
@@ -238,6 +239,9 @@ func (s *levelSearcher) Init(
238239
searchSet: s.searchSet, // Preserve existing searchSet memory.
239240
}
240241
if parent != nil {
242+
if parent.Level() == InvalidLevel {
243+
panic(errors.AssertionFailedf("parent level cannot be InvalidLevel"))
244+
}
241245
s.level = parent.Level() - 1
242246
}
243247

@@ -293,17 +297,28 @@ func (s *levelSearcher) NextBatch(ctx context.Context) (ok bool, err error) {
293297
// overlap with the previous batch, in terms of ordering and duplicates.
294298
s.searchSet.Clear()
295299

296-
if firstBatch || len(s.parentResults) < s.beamSize {
297-
// Get more results from parent to fetch the next batch of child results.
300+
if firstBatch {
301+
ok, err := s.parent.NextBatch(ctx)
302+
if err != nil || !ok {
303+
return ok, err
304+
}
305+
s.parentResults = s.parent.SearchSet().PopResults()
306+
} else if len(s.parentResults) < s.beamSize {
307+
// Get more results from parent to try and fill the beam size.
298308
parentResults := s.parent.SearchSet().PopResults()
299309
if len(parentResults) == 0 {
300310
// Get next batch of results from parent.
301311
ok, err := s.parent.NextBatch(ctx)
302-
if err != nil || !ok {
303-
return ok, err
312+
if err != nil {
313+
return false, err
314+
}
315+
if !ok && len(s.parentResults) == 0 {
316+
// Only exit if there are no more results to process.
317+
return false, nil
304318
}
305-
s.parentResults = s.parent.SearchSet().PopResults()
306-
} else if len(s.parentResults) == 0 {
319+
parentResults = s.parent.SearchSet().PopResults()
320+
}
321+
if len(s.parentResults) == 0 {
307322
s.parentResults = parentResults
308323
} else {
309324
s.parentResults = append(s.parentResults, parentResults...)
@@ -405,7 +420,10 @@ func (s *levelSearcher) searchChildPartitions(
405420
s.stats.SearchedPartition(level, count)
406421

407422
// If searching for vector to delete, skip partitions that are in a state
408-
// that does not allow add and remove operations.
423+
// that does not allow add and remove operations. This is not possible to
424+
// do here for the insert case, because we do not actually search the
425+
// partition in which to insert; we only search its parent and never get
426+
// the metadata for the insert partition itself.
409427
// TODO(andyk): This should probably be checked in the Store, perhaps by
410428
// passing a "forUpdate" parameter to SearchPartitions, so that the Store
411429
// doesn't even add vectors from partitions that do not allow updates.

pkg/sql/vecindex/vecindex_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,9 @@ func TestVecindexConcurrency(t *testing.T) {
121121
// Query for a vector while inserts happen in the background.
122122
if insertCount.Load() > 0 {
123123
var id int
124+
vec := vectors.At(vecOffset % vectors.Count)
124125
row := runner.QueryRow(t,
125-
`SELECT id FROM t ORDER BY v <-> $1 LIMIT 1`, vectors.At(vecOffset).String())
126+
`SELECT id FROM t ORDER BY v <-> $1 LIMIT 1`, vec.String())
126127
row.Scan(&id)
127128
vecOffset++
128129
}

0 commit comments

Comments
 (0)