@@ -11,7 +11,6 @@ import (
1111 "math"
1212 "math/rand"
1313 "runtime"
14- "slices"
1514 "strconv"
1615 "strings"
1716
@@ -336,9 +335,8 @@ func (vi *Index) Close() {
336335// NOTE: This can result in two vectors with the same primary key being inserted
337336// into the index. To minimize this possibility, callers should call Delete
338337// before Insert when a vector is updated. Even then, it's not guaranteed that
339- // Delete will find the old vector. Vector index methods handle this rare case
340- // by checking for duplicates when returning search results. For details, see
341- // Index.pruneDuplicates.
338+ // Delete will find the old vector. The search set handles this rare case by
339+ // filtering out results with duplicate key bytes.
342340func (vi * Index ) Insert (
343341 ctx context.Context , idxCtx * Context , treeKey TreeKey , vec vector.T , key KeyBytes ,
344342) error {
@@ -456,7 +454,7 @@ func (vi *Index) SearchForDelete(
456454 if err != nil {
457455 return nil , err
458456 }
459- results := idxCtx .tempSearchSet .PopUnsortedResults ()
457+ results := idxCtx .tempSearchSet .PopResults ()
460458 if len (results ) == 0 {
461459 // Retry search with significantly higher beam size.
462460 baseBeamSize *= 8
@@ -564,7 +562,7 @@ func (vi *Index) searchForInsertHelper(
564562 if err != nil {
565563 return nil , err
566564 }
567- results := idxCtx .tempSearchSet .PopUnsortedResults ()
565+ results := idxCtx .tempSearchSet .PopResults ()
568566 if len (results ) != 1 {
569567 return nil , errors .AssertionFailedf (
570568 "SearchForInsert should return exactly one result, got %d" , len (results ))
@@ -658,7 +656,7 @@ func (vi *Index) searchHelper(ctx context.Context, idxCtx *Context, searchSet *S
658656 }
659657
660658 for {
661- results := subSearchSet .PopUnsortedResults ()
659+ results := subSearchSet .PopResults ()
662660 if len (results ) == 0 && searchLevel > LeafLevel {
663661 // This should never happen, as it means that interior partition(s)
664662 // have no children. The vector deletion logic should prevent that.
@@ -668,10 +666,6 @@ func (vi *Index) searchHelper(ctx context.Context, idxCtx *Context, searchSet *S
668666
669667 var zscore float64
670668 if searchLevel > LeafLevel {
671- // Results need to be sorted in order to calculate their "spread". This
672- // also sorts them for determining which partitions to search next.
673- results .Sort ()
674-
675669 // Compute the Z-score of the candidate list if there are enough
676670 // samples. Otherwise, use the default Z-score of 0.
677671 if len (results ) >= vi .options .QualitySamples {
@@ -695,7 +689,6 @@ func (vi *Index) searchHelper(ctx context.Context, idxCtx *Context, searchSet *S
695689 // Aggregate all stats from searching lower levels of the tree.
696690 searchSet .Stats .Add (& subSearchSet .Stats )
697691
698- results = vi .pruneDuplicates (results )
699692 if ! idxCtx .options .SkipRerank || idxCtx .options .ReturnVectors {
700693 // Re-rank search results with full vectors.
701694 searchSet .Stats .FullVectorCount += len (results )
@@ -803,7 +796,7 @@ func (vi *Index) searchChildPartitions(
803796 // If one of the searched partitions has only 1 vector remaining, do not
804797 // return that vector when "ignoreLonelyVector" is true.
805798 if idxCtx .ignoreLonelyVector && idxCtx .level == level && count == 1 {
806- searchSet .RemoveResults (parentResults [i ].ChildKey .PartitionKey )
799+ searchSet .RemoveByParent (parentResults [i ].ChildKey .PartitionKey )
807800 }
808801
809802 // Enqueue background fixup if a split or merge operation needs to be
@@ -825,38 +818,6 @@ func (vi *Index) searchChildPartitions(
825818 return level , nil
826819}
827820
828- // pruneDuplicates removes candidates with duplicate child keys. This is rare,
829- // but it can happen when a vector updated in the primary index cannot be
830- // located in the secondary index.
831- // NOTE: This logic will reorder the candidates slice.
832- // NOTE: This logic can remove the "wrong" duplicate, with a quantized distance
833- // that doesn't correspond to the true distance. However, this has no impact as
834- // long as we rerank candidates using the original full-size vectors. Even if
835- // we're not reranking, the impact of this should be minimal, since duplicates
836- // are so rare and there's already quite a bit of inaccuracy when not reranking.
837- func (vi * Index ) pruneDuplicates (candidates []SearchResult ) []SearchResult {
838- if len (candidates ) <= 1 {
839- // No possibility of duplicates.
840- return candidates
841- }
842-
843- if candidates [0 ].ChildKey .KeyBytes == nil {
844- // Only leaf partitions can have duplicates.
845- return candidates
846- }
847-
848- // TODO DURING REVIEW: this is O(n * log(n)) instead of O(n) like the previous
849- // code, but is probably faster in practice for small values of n because it
850- // is allocation free. It is also cleaner and easier to understand. Choose an
851- // approach.
852- slices .SortFunc (candidates , func (a , b SearchResult ) int {
853- return bytes .Compare (a .ChildKey .KeyBytes , b .ChildKey .KeyBytes )
854- })
855- return slices .CompactFunc (candidates , func (a , b SearchResult ) bool {
856- return bytes .Equal (a .ChildKey .KeyBytes , b .ChildKey .KeyBytes )
857- })
858- }
859-
860821// rerankSearchResults updates the given set of candidates with their exact
861822// distances from the query vector. It does this by fetching the original full
862823// size vectors from the store, in order to re-rank the top candidates for
0 commit comments