Skip to content

Commit 7b9fd1a

Browse files
craig[bot]andy-kimball
andcommitted
Merge #148713
148713: cspann: move vectors from sibling partitions during split r=drewkimball a=andy-kimball #### cspann: move vectors from sibling partitions during split When a partition is split, the left and right target partitions are assigned new centroids. This makes it possible for vectors in other partitions at the same level to now be closer to one of those new centroids than they are to their own centroid. In that case, we need to move those vectors to whichever target partition is now closer. #### cspann: enhance SearchSet Add support in SearchSet for excluding partitions during search. Any vectors in these partitions will not be added to the set. Also add an option that includes the distance of each vector from its centroid in search results. These options will be used by the split operation. #### cspann: add BestCentroids vector index test Add a new "best-centroids" test that prints out the partitions with the closest centroids for a query vector. This is useful when gauging the quality of the index. Also, update the "recall" test to only sample from vectors that are not part of the index. Achieving high recall is more challenging when searching for such vectors. #### cspann: move level searcher init code to Init method Previously, the searcher initialized the search set for each level in the Next method. This commit moves that code into the levelSearcher.Init method. This makes some planned refactoring easier in an another commit. #### cspann: add utils.ReplaceWithLast helper function Add generic ReplaceWithLast function that removes an element from a slice by replacing it with the last element and truncating the slice. #### vecindex: add TryMoveVector method to Store interface Add a new TryMoveVector to the Store interface. This method moves a single vector from one partition to another as an atomic operation, provided the destination metadata is as expected. Co-authored-by: Andrew Kimball <[email protected]>
2 parents 1dfa255 + 7d42220 commit 7b9fd1a

34 files changed

+1819
-1017
lines changed

pkg/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,7 @@ ALL_TESTS = [
658658
"//pkg/sql/types:types_test",
659659
"//pkg/sql/vecindex/cspann/memstore:memstore_test",
660660
"//pkg/sql/vecindex/cspann/quantize:quantize_test",
661+
"//pkg/sql/vecindex/cspann/utils:utils_test",
661662
"//pkg/sql/vecindex/cspann/workspace:workspace_test",
662663
"//pkg/sql/vecindex/cspann:cspann_test",
663664
"//pkg/sql/vecindex/vecencoding:vecencoding_test",
@@ -2384,6 +2385,7 @@ GO_TARGETS = [
23842385
"//pkg/sql/vecindex/cspann/quantize:quantize_test",
23852386
"//pkg/sql/vecindex/cspann/testutils:testutils",
23862387
"//pkg/sql/vecindex/cspann/utils:utils",
2388+
"//pkg/sql/vecindex/cspann/utils:utils_test",
23872389
"//pkg/sql/vecindex/cspann/workspace:workspace",
23882390
"//pkg/sql/vecindex/cspann/workspace:workspace_test",
23892391
"//pkg/sql/vecindex/cspann:cspann",

pkg/sql/vecindex/cspann/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ go_test(
8989
"@com_github_cockroachdb_errors//:errors",
9090
"@com_github_guptarohit_asciigraph//:asciigraph",
9191
"@com_github_stretchr_testify//require",
92+
"@org_golang_x_exp//slices",
9293
"@org_gonum_v1_gonum//floats/scalar",
9394
"@org_gonum_v1_gonum//stat",
9495
],

pkg/sql/vecindex/cspann/commontest/storetests.go

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
package commontest
77

88
import (
9+
"cmp"
910
"context"
11+
"slices"
1012
"testing"
1113

1214
"github.com/cockroachdb/cockroach/pkg/keys"
@@ -987,6 +989,135 @@ func (suite *StoreTestSuite) TestTryRemoveFromPartition() {
987989
}
988990
}
989991

992+
func (suite *StoreTestSuite) TestTryMoveVector() {
993+
store := suite.makeStore(suite.quantizer)
994+
defer store.Close(suite.T())
995+
996+
doTest := func(treeID int) {
997+
treeKey := store.MakeTreeKey(suite.T(), treeID)
998+
999+
// Create source partition with some vectors.
1000+
sourcePartitionKey, _ := suite.createTestPartition(store, treeKey)
1001+
1002+
// Create empty target partition.
1003+
targetPartitionKey := cspann.PartitionKey(20)
1004+
metadata := cspann.PartitionMetadata{
1005+
Level: cspann.SecondLevel,
1006+
Centroid: vector.T{2, 4},
1007+
}
1008+
metadata.StateDetails.MakeReady()
1009+
suite.NoError(store.TryCreateEmptyPartition(suite.ctx, treeKey, targetPartitionKey, metadata))
1010+
targetPartition, err := store.TryGetPartition(suite.ctx, treeKey, targetPartitionKey)
1011+
suite.NoError(err)
1012+
1013+
// Source partition does not yet exist.
1014+
expected := *targetPartition.Metadata()
1015+
moved, err := store.TryMoveVector(
1016+
suite.ctx, treeKey, cspann.PartitionKey(99), targetPartitionKey,
1017+
vec1, partitionKey1, valueBytes1, expected)
1018+
suite.NoError(err)
1019+
suite.False(moved)
1020+
1021+
// Destination partition does not yet exist.
1022+
moved, err = store.TryMoveVector(
1023+
suite.ctx, treeKey, sourcePartitionKey, cspann.PartitionKey(99),
1024+
vec1, partitionKey1, valueBytes1, cspann.PartitionMetadata{})
1025+
suite.NoError(err)
1026+
suite.False(moved)
1027+
1028+
// Source partition is the same as destination partition.
1029+
moved, err = store.TryMoveVector(
1030+
suite.ctx, treeKey, sourcePartitionKey, sourcePartitionKey,
1031+
vec1, partitionKey1, valueBytes1, expected)
1032+
suite.NoError(err)
1033+
suite.False(moved)
1034+
1035+
// Now move should work.
1036+
moved, err = store.TryMoveVector(
1037+
suite.ctx, treeKey, sourcePartitionKey, targetPartitionKey,
1038+
vec1, partitionKey1, valueBytes1, expected)
1039+
suite.NoError(err)
1040+
suite.True(moved)
1041+
1042+
// Fetch back the target partition and validate it.
1043+
targetPartition, err = store.TryGetPartition(suite.ctx, treeKey, targetPartitionKey)
1044+
suite.NoError(err)
1045+
suite.Equal([]cspann.ChildKey{partitionKey1}, targetPartition.ChildKeys())
1046+
suite.Equal([]cspann.ValueBytes{valueBytes1}, targetPartition.ValueBytes())
1047+
1048+
// Try to move again, but with mismatched expected metadata.
1049+
var errConditionFailed *cspann.ConditionFailedError
1050+
metadata = expected
1051+
metadata.StateDetails.State = cspann.DrainingForMergeState
1052+
moved, err = store.TryMoveVector(
1053+
suite.ctx, treeKey, sourcePartitionKey, targetPartitionKey,
1054+
vec1, partitionKey3, valueBytes3, metadata)
1055+
suite.ErrorAs(err, &errConditionFailed)
1056+
suite.False(moved)
1057+
suite.True(errConditionFailed.Actual.Equal(&expected))
1058+
1059+
// Try again, this time with correct expected metadata.
1060+
moved, err = store.TryMoveVector(
1061+
suite.ctx, treeKey, sourcePartitionKey, targetPartitionKey,
1062+
vec1, partitionKey3, valueBytes3, expected)
1063+
suite.NoError(err)
1064+
suite.True(moved)
1065+
1066+
// Fetch back the source partition and validate it.
1067+
sourcePartition, err := store.TryGetPartition(suite.ctx, treeKey, sourcePartitionKey)
1068+
suite.NoError(err)
1069+
suite.Equal([]cspann.ChildKey{partitionKey2}, sourcePartition.ChildKeys())
1070+
suite.Equal([]cspann.ValueBytes{valueBytes2}, sourcePartition.ValueBytes())
1071+
1072+
// Try to move a vector that no longer exists in the source partition.
1073+
moved, err = store.TryMoveVector(
1074+
suite.ctx, treeKey, sourcePartitionKey, targetPartitionKey,
1075+
vec1, partitionKey3, valueBytes3, expected)
1076+
suite.NoError(err)
1077+
suite.False(moved)
1078+
1079+
// Try to move a vector that already exists in the target partition.
1080+
added, err := store.TryAddToPartition(
1081+
suite.ctx, treeKey, targetPartitionKey, vec2.AsSet(),
1082+
[]cspann.ChildKey{partitionKey2}, []cspann.ValueBytes{valueBytes2}, expected)
1083+
suite.NoError(err)
1084+
suite.True(added)
1085+
1086+
moved, err = store.TryMoveVector(
1087+
suite.ctx, treeKey, sourcePartitionKey, targetPartitionKey,
1088+
vec1, partitionKey2, valueBytes2, expected)
1089+
suite.NoError(err)
1090+
suite.False(moved)
1091+
1092+
// Ensure that the vector was not removed from the source partition.
1093+
sourcePartition, err = store.TryGetPartition(suite.ctx, treeKey, sourcePartitionKey)
1094+
suite.NoError(err)
1095+
suite.Equal([]cspann.ChildKey{partitionKey2}, sourcePartition.ChildKeys())
1096+
suite.Equal([]cspann.ValueBytes{valueBytes2}, sourcePartition.ValueBytes())
1097+
1098+
// Ensure that the target partition now has all three vectors.
1099+
targetPartition, err = store.TryGetPartition(suite.ctx, treeKey, targetPartitionKey)
1100+
suite.NoError(err)
1101+
suite.Equal(3, targetPartition.Count())
1102+
childKeys := slices.Clone(targetPartition.ChildKeys())
1103+
slices.SortFunc(childKeys, func(a, b cspann.ChildKey) int {
1104+
return cmp.Compare(a.PartitionKey, b.PartitionKey)
1105+
})
1106+
suite.Equal([]cspann.ChildKey{partitionKey1, partitionKey2, partitionKey3}, childKeys)
1107+
}
1108+
1109+
suite.Run("default tree", func() {
1110+
doTest(0)
1111+
})
1112+
1113+
if store.AllowMultipleTrees() {
1114+
// Ensure that vectors are independent across trees.
1115+
suite.Run("different tree", func() {
1116+
doTest(1)
1117+
})
1118+
}
1119+
}
1120+
9901121
func (suite *StoreTestSuite) TestTryClearPartition() {
9911122
store := suite.makeStore(suite.quantizer)
9921123
defer store.Close(suite.T())

pkg/sql/vecindex/cspann/fixup_processor.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,8 @@ func (fp *FixupProcessor) nextFixup(ctx context.Context) (next fixup, ok bool) {
486486
}()
487487
}
488488

489-
if discard {
489+
// Always process fixup if it's single-stepping.
490+
if discard && !next.SingleStep {
490491
fp.removeFixup(next)
491492
continue
492493
}

0 commit comments

Comments
 (0)