cockroachdb
diff --git a/‎pkg/sql/vecindex/cspann/BUILD.bazel‎
Lines changed: 2 additions & 0 deletions b/‎pkg/sql/vecindex/cspann/BUILD.bazel‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎pkg/sql/vecindex/cspann/childkey_dedup.go‎
Lines changed: 126 additions & 0 deletions b/‎pkg/sql/vecindex/cspann/childkey_dedup.go‎
Lines changed: 126 additions & 0 deletions
diff --git a/‎pkg/sql/vecindex/cspann/childkey_dedup_test.go‎
Lines changed: 121 additions & 0 deletions b/‎pkg/sql/vecindex/cspann/childkey_dedup_test.go‎
Lines changed: 121 additions & 0 deletions
diff --git a/‎pkg/sql/vecindex/cspann/fixup_worker.go‎
Lines changed: 1 addition & 1 deletion b/‎pkg/sql/vecindex/cspann/fixup_worker.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pkg/sql/vecindex/cspann/index.go‎
Lines changed: 6 additions & 45 deletions b/‎pkg/sql/vecindex/cspann/index.go‎
Lines changed: 6 additions & 45 deletions
@@ -11,6 +11,7 @@ filegroup(
 go_library(
     name = "cspann",
     srcs = [
+        "childkey_dedup.go",
         "cspannpb.go",
         "fixup_processor.go",
         "fixup_split.go",
@@ -49,6 +50,7 @@ go_library(
 go_test(
     name = "cspann_test",
     srcs = [
+        "childkey_dedup_test.go",
         "cspannpb_test.go",
         "fixup_processor_test.go",
         "fixup_worker_test.go",
 
@@ -0,0 +1,126 @@
+// Copyright 2025 The Cockroach Authors.
+//
+// Use of this software is governed by the CockroachDB Software License
+// included in the /LICENSE file.
+
+package cspann
+
+import (
+	"bytes"
+	"hash/maphash"
+
+	"github.com/cockroachdb/errors"
+)
+
+// hashKeyFunc is a function type for hashing KeyBytes.
+type hashKeyFunc func(KeyBytes) uint64
+
+// childKeyDeDup provides de-duplication for ChildKey values. It supports both
+// PartitionKey and KeyBytes child keys efficiently without making unnecessary
+// allocations.
+type childKeyDeDup struct {
+	// initialCapacity is used to initialize the size of the data structures used
+	// by the de-duplicator.
+	initialCapacity int
+
+	// partitionKeys is used for PartitionKey deduplication.
+	partitionKeys map[PartitionKey]struct{}
+
+	// keyBytesMap maps from a KeyBytes hash to the actual KeyBytes for direct
+	// lookup and deduplication.
+	keyBytesMap map[uint64]KeyBytes
+
+	// seed pseudo-randomizes the hash function used by the de-duplicator.
+	seed maphash.Seed
+
+	// hashKeyBytes is the function used to hash KeyBytes. This is primarily used
+	// for testing to override the default hash function.
+	hashKeyBytes hashKeyFunc
+}
+
+// Init initializes the de-duplicator.
+func (dd *childKeyDeDup) Init(capacity int) {
+	dd.initialCapacity = capacity
+	dd.seed = maphash.MakeSeed()
+	dd.hashKeyBytes = dd.defaultHashKeyBytes
+	dd.Clear()
+}
+
+// TryAdd attempts to add a child key to the deduplication set. It returns true
+// if the key was added (wasn't a duplicate), or false if the key already exists
+// (is a duplicate).
+func (dd *childKeyDeDup) TryAdd(childKey ChildKey) bool {
+	// Handle PartitionKey case - simple map lookup.
+	if childKey.PartitionKey != 0 {
+		// Lazily initialize the partitionKeys map.
+		if dd.partitionKeys == nil {
+			dd.partitionKeys = make(map[PartitionKey]struct{}, dd.initialCapacity)
+		}
+
+		if _, exists := dd.partitionKeys[childKey.PartitionKey]; exists {
+			return false
+		}
+		dd.partitionKeys[childKey.PartitionKey] = struct{}{}
+		return true
+	}
+
+	// Handle KeyBytes case. Lazily initialize the KeyBytes structures.
+	if dd.keyBytesMap == nil {
+		dd.keyBytesMap = make(map[uint64]KeyBytes, dd.initialCapacity)
+	}
+
+	// Calculate original hash for the key bytes.
+	hash := dd.hashKeyBytes(childKey.KeyBytes)
+
+	// Check for the key, possibly having to look at multiple rehashed slots.
+	iterations := 0
+	for {
+		existingKey, exists := dd.keyBytesMap[hash]
+		if !exists {
+			// No collision, we can use this hash.
+			break
+		}
+
+		// Check if this is the same key.
+		if bytes.Equal(existingKey, childKey.KeyBytes) {
+			return false
+		}
+
+		// Hash collision, rehash to find a new slot.
+		hash = dd.rehash(hash)
+
+		iterations++
+		if iterations >= 100000 {
+			// We must have hit a cycle, which should never happen.
+			panic(errors.AssertionFailedf("rehash function cycled"))
+		}
+	}
+
+	// Add the key to the map.
+	dd.keyBytesMap[hash] = childKey.KeyBytes
+	return true
+}
+
+// Clear removes all entries from the deduplication set.
+func (dd *childKeyDeDup) Clear() {
+	// Reset all the data structures.
+	clear(dd.partitionKeys)
+	clear(dd.keyBytesMap)
+}
+
+// defaultHashKeyBytes is the default implementation of hashKeyBytes.
+func (dd *childKeyDeDup) defaultHashKeyBytes(key KeyBytes) uint64 {
+	return maphash.Bytes(dd.seed, key)
+}
+
+// rehash creates a new hash from an existing hash to resolve collisions.
+func (dd *childKeyDeDup) rehash(hash uint64) uint64 {
+	// These constants are large 64-bit primes.
+	hash ^= 0xc3a5c85c97cb3127
+	hash ^= hash >> 33
+	hash *= 0xff51afd7ed558ccd
+	hash ^= hash >> 33
+	hash *= 0xc4ceb9fe1a85ec53
+	hash ^= hash >> 33
+	return hash
+}
@@ -0,0 +1,121 @@
+// Copyright 2025 The Cockroach Authors.
+//
+// Use of this software is governed by the CockroachDB Software License
+// included in the /LICENSE file.
+
+package cspann
+
+import (
+	"hash/maphash"
+	"strconv"
+	"testing"
+
+	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
+	"github.com/cockroachdb/cockroach/pkg/util/log"
+	"github.com/stretchr/testify/require"
+)
+
+func TestChildKeyDeDupAddPartitionKey(t *testing.T) {
+	defer leaktest.AfterTest(t)()
+	defer log.Scope(t).Close(t)
+
+	var dd childKeyDeDup
+	dd.Init(10)
+
+	// Add a new PartitionKey.
+	added := dd.TryAdd(ChildKey{PartitionKey: 123})
+	require.True(t, added)
+
+	// Try to add the same key again (should be a duplicate).
+	added = dd.TryAdd(ChildKey{PartitionKey: 123})
+	require.False(t, added)
+
+	// Add a different PartitionKey.
+	added = dd.TryAdd(ChildKey{PartitionKey: 456})
+	require.True(t, added)
+}
+
+func TestChildKeyDeDupAddKeyBytes(t *testing.T) {
+	defer leaktest.AfterTest(t)()
+	defer log.Scope(t).Close(t)
+
+	var dd childKeyDeDup
+	dd.Init(10)
+
+	// Add a new KeyBytes.
+	key1 := []byte("key1")
+	added := dd.TryAdd(ChildKey{KeyBytes: key1})
+	require.True(t, added)
+
+	// Try to add the same key again (should be a duplicate).
+	added = dd.TryAdd(ChildKey{KeyBytes: key1})
+	require.False(t, added)
+
+	// Add a different KeyBytes.
+	key2 := []byte("key2")
+	added = dd.TryAdd(ChildKey{KeyBytes: key2})
+	require.True(t, added)
+
+	// Verify that both keys are properly stored by checking for duplicates.
+	added = dd.TryAdd(ChildKey{KeyBytes: key1})
+	require.False(t, added, "Key1 should be detected as duplicate.")
+	added = dd.TryAdd(ChildKey{KeyBytes: key2})
+	require.False(t, added, "Key2 should be detected as duplicate.")
+}
+
+func TestChildKeyDeDupClear(t *testing.T) {
+	defer leaktest.AfterTest(t)()
+	defer log.Scope(t).Close(t)
+
+	var dd childKeyDeDup
+	dd.Init(10)
+
+	// Add a mix of keys.
+	require.True(t, dd.TryAdd(ChildKey{PartitionKey: 123}))
+	require.True(t, dd.TryAdd(ChildKey{KeyBytes: []byte("key1")}))
+
+	// Clear the deduplicator.
+	dd.Clear()
+
+	// Verify keys were cleared.
+	require.True(t, dd.TryAdd(ChildKey{PartitionKey: 123}))
+	require.True(t, dd.TryAdd(ChildKey{KeyBytes: []byte("key1")}))
+}
+
+func TestChildKeyDeDupRehashing(t *testing.T) {
+	defer leaktest.AfterTest(t)()
+	defer log.Scope(t).Close(t)
+
+	// This test simulates a scenario with many keys and random collisions.
+
+	// Create a custom hash function that has a small range, forcing collisions
+	// but making it more realistic than the previous test.
+	seed := maphash.MakeSeed()
+	customHashFunc := func(key KeyBytes) uint64 {
+		// Use only 10 possible hash values.
+		return maphash.Bytes(seed, key)
+	}
+
+	var dd childKeyDeDup
+	dd.Init(100)
+	dd.hashKeyBytes = customHashFunc
+
+	// Create 50 keys, which have guaranteed collisions with only 10 possible
+	// hash values.
+	const numKeys = 50
+	keys := make([]KeyBytes, 0, numKeys)
+	for i := 0; i < numKeys; i++ {
+		key := []byte("test-key-" + strconv.Itoa(i))
+		keys = append(keys, key)
+	}
+
+	// Add all keys.
+	for _, key := range keys {
+		require.True(t, dd.TryAdd(ChildKey{KeyBytes: key}))
+	}
+
+	// Verify all keys are findable.
+	for _, key := range keys {
+		require.False(t, dd.TryAdd(ChildKey{KeyBytes: key}))
+	}
+}
@@ -639,7 +639,7 @@ func (fw *fixupWorker) linkNearbyVectors(
 	defer fw.workspace.FreeVector(tempVector)
 
 	// Filter the results.
-	results := idxCtx.tempSearchSet.PopUnsortedResults()
+	results := idxCtx.tempSearchSet.PopResults()
 	for i := range results {
 		result := &results[i]
 
 
@@ -11,7 +11,6 @@ import (
 	"math"
 	"math/rand"
 	"runtime"
-	"slices"
 	"strconv"
 	"strings"
 
@@ -336,9 +335,8 @@ func (vi *Index) Close() {
 // NOTE: This can result in two vectors with the same primary key being inserted
 // into the index. To minimize this possibility, callers should call Delete
 // before Insert when a vector is updated. Even then, it's not guaranteed that
-// Delete will find the old vector. Vector index methods handle this rare case
-// by checking for duplicates when returning search results. For details, see
-// Index.pruneDuplicates.
+// Delete will find the old vector. The search set handles this rare case by
+// filtering out results with duplicate key bytes.
 func (vi *Index) Insert(
 	ctx context.Context, idxCtx *Context, treeKey TreeKey, vec vector.T, key KeyBytes,
 ) error {
@@ -456,7 +454,7 @@ func (vi *Index) SearchForDelete(
 		if err != nil {
 			return nil, err
 		}
-		results := idxCtx.tempSearchSet.PopUnsortedResults()
+		results := idxCtx.tempSearchSet.PopResults()
 		if len(results) == 0 {
 			// Retry search with significantly higher beam size.
 			baseBeamSize *= 8
@@ -564,7 +562,7 @@ func (vi *Index) searchForInsertHelper(
 	if err != nil {
 		return nil, err
 	}
-	results := idxCtx.tempSearchSet.PopUnsortedResults()
+	results := idxCtx.tempSearchSet.PopResults()
 	if len(results) != 1 {
 		return nil, errors.AssertionFailedf(
 			"SearchForInsert should return exactly one result, got %d", len(results))
@@ -658,7 +656,7 @@ func (vi *Index) searchHelper(ctx context.Context, idxCtx *Context, searchSet *S
 	}
 
 	for {
-		results := subSearchSet.PopUnsortedResults()
+		results := subSearchSet.PopResults()
 		if len(results) == 0 && searchLevel > LeafLevel {
 			// This should never happen, as it means that interior partition(s)
 			// have no children. The vector deletion logic should prevent that.
@@ -668,10 +666,6 @@ func (vi *Index) searchHelper(ctx context.Context, idxCtx *Context, searchSet *S
 
 		var zscore float64
 		if searchLevel > LeafLevel {
-			// Results need to be sorted in order to calculate their "spread". This
-			// also sorts them for determining which partitions to search next.
-			results.Sort()
-
 			// Compute the Z-score of the candidate list if there are enough
 			// samples. Otherwise, use the default Z-score of 0.
 			if len(results) >= vi.options.QualitySamples {
@@ -695,7 +689,6 @@ func (vi *Index) searchHelper(ctx context.Context, idxCtx *Context, searchSet *S
 			// Aggregate all stats from searching lower levels of the tree.
 			searchSet.Stats.Add(&subSearchSet.Stats)
 
-			results = vi.pruneDuplicates(results)
 			if !idxCtx.options.SkipRerank || idxCtx.options.ReturnVectors {
 				// Re-rank search results with full vectors.
 				searchSet.Stats.FullVectorCount += len(results)
@@ -803,7 +796,7 @@ func (vi *Index) searchChildPartitions(
 		// If one of the searched partitions has only 1 vector remaining, do not
 		// return that vector when "ignoreLonelyVector" is true.
 		if idxCtx.ignoreLonelyVector && idxCtx.level == level && count == 1 {
-			searchSet.RemoveResults(parentResults[i].ChildKey.PartitionKey)
+			searchSet.RemoveByParent(parentResults[i].ChildKey.PartitionKey)
 		}
 
 		// Enqueue background fixup if a split or merge operation needs to be
@@ -825,38 +818,6 @@ func (vi *Index) searchChildPartitions(
 	return level, nil
 }
 
-// pruneDuplicates removes candidates with duplicate child keys. This is rare,
-// but it can happen when a vector updated in the primary index cannot be
-// located in the secondary index.
-// NOTE: This logic will reorder the candidates slice.
-// NOTE: This logic can remove the "wrong" duplicate, with a quantized distance
-// that doesn't correspond to the true distance. However, this has no impact as
-// long as we rerank candidates using the original full-size vectors. Even if
-// we're not reranking, the impact of this should be minimal, since duplicates
-// are so rare and there's already quite a bit of inaccuracy when not reranking.
-func (vi *Index) pruneDuplicates(candidates []SearchResult) []SearchResult {
-	if len(candidates) <= 1 {
-		// No possibility of duplicates.
-		return candidates
-	}
-
-	if candidates[0].ChildKey.KeyBytes == nil {
-		// Only leaf partitions can have duplicates.
-		return candidates
-	}
-
-	// TODO DURING REVIEW: this is O(n * log(n)) instead of O(n) like the previous
-	// code, but is probably faster in practice for small values of n because it
-	// is allocation free. It is also cleaner and easier to understand. Choose an
-	// approach.
-	slices.SortFunc(candidates, func(a, b SearchResult) int {
-		return bytes.Compare(a.ChildKey.KeyBytes, b.ChildKey.KeyBytes)
-	})
-	return slices.CompactFunc(candidates, func(a, b SearchResult) bool {
-		return bytes.Equal(a.ChildKey.KeyBytes, b.ChildKey.KeyBytes)
-	})
-}
-
 // rerankSearchResults updates the given set of candidates with their exact
 // distances from the query vector. It does this by fetching the original full
 // size vectors from the store, in order to re-rank the top candidates for