@@ -115,6 +115,11 @@ type Context struct {
115115 // randomized is the original vector after it has been randomized by applying
116116 // a random orthogonal transformation (ROT).
117117 randomized vector.T
118+ // ignoreLonelyVector, if true, prohibits searches from returning a vector
119+ // that is the last remaining in its partition. This is used to avoid moving
120+ // the last remaining vector to another partition, thereby creating an empty
121+ // non-leaf partition, which is not allowed in a balanced K-means tree.
122+ ignoreLonelyVector bool
118123
119124 tempSearchSet SearchSet
120125 tempSubSearchSet SearchSet
@@ -364,9 +369,8 @@ func (vi *Index) Delete(
364369 }
365370
366371 // Remove the vector from its partition in the store.
367- _ , err = vi .removeFromPartition (
372+ return vi .removeFromPartition (
368373 ctx , idxCtx .txn , treeKey , result .ParentPartitionKey , result .ChildKey )
369- return err
370374}
371375
372376// Search finds vectors in the index that are closest to the given query vector
@@ -404,16 +408,12 @@ func (vi *Index) SearchForInsert(
404408 return nil , err
405409 }
406410
407- // Now fetch the centroid of the insert partition. This has the side effect
408- // of checking the size of the partition, in case it's over-sized.
411+ // Now fetch the centroid of the insert partition.
409412 partitionKey := result .ChildKey .PartitionKey
410413 metadata , err := idxCtx .txn .GetPartitionMetadata (ctx , treeKey , partitionKey , true /* forUpdate */ )
411414 if err != nil {
412415 return nil , err
413416 }
414- if metadata .Count > vi .options .MaxPartitionSize {
415- vi .fixups .AddSplit (ctx , treeKey , result .ParentPartitionKey , partitionKey )
416- }
417417
418418 result .Vector = metadata .Centroid
419419 return result , nil
@@ -474,18 +474,11 @@ func (vi *Index) ProcessFixups() {
474474 vi .fixups .Process ()
475475}
476476
477- // ForceSplit enqueues a split fixup. It is used for testing.
478- func (vi * Index ) ForceSplit (
477+ // ForceSplitOrMerge enqueues a split or merge fixup. It is used for testing.
478+ func (vi * Index ) ForceSplitOrMerge (
479479 ctx context.Context , treeKey TreeKey , parentPartitionKey PartitionKey , partitionKey PartitionKey ,
480480) {
481- vi .fixups .AddSplit (ctx , treeKey , parentPartitionKey , partitionKey )
482- }
483-
484- // ForceMerge enqueues a merge fixup. It is used for testing.
485- func (vi * Index ) ForceMerge (
486- ctx context.Context , treeKey TreeKey , parentPartitionKey PartitionKey , partitionKey PartitionKey ,
487- ) {
488- vi .fixups .AddMerge (ctx , treeKey , parentPartitionKey , partitionKey )
481+ vi .fixups .AddSplitOrMergeCheck (ctx , treeKey , parentPartitionKey , partitionKey )
489482}
490483
491484// setupInsertContext sets up the given context for an insert operation. Before
@@ -526,10 +519,9 @@ func (vi *Index) insertHelper(
526519 if err != nil {
527520 return err
528521 }
529- parentPartitionKey := result .ParentPartitionKey
530522 partitionKey := result .ChildKey .PartitionKey
531- err = vi .addToPartition (ctx , idxCtx . txn , idxCtx . treeKey , parentPartitionKey ,
532- partitionKey , idxCtx .randomized , childKey , valueBytes )
523+ err = vi .addToPartition (
524+ ctx , idxCtx . txn , idxCtx . treeKey , partitionKey , idxCtx .randomized , childKey , valueBytes )
533525 if errors .Is (err , ErrRestartOperation ) {
534526 return vi .insertHelper (ctx , idxCtx , childKey , valueBytes )
535527 }
@@ -551,7 +543,11 @@ func (vi *Index) searchForInsertHelper(
551543 return nil , errors .AssertionFailedf (
552544 "SearchForInsert should return exactly one result, got %d" , len (results ))
553545 }
554- return & results [0 ], err
546+
547+ vi .fixups .AddSplitOrMergeCheck (
548+ ctx , idxCtx .treeKey , results [0 ].ParentPartitionKey , results [0 ].ChildKey .PartitionKey )
549+
550+ return & results [0 ], nil
555551}
556552
557553// addToPartition calls the store to add the given vector to an existing
@@ -561,36 +557,31 @@ func (vi *Index) addToPartition(
561557 ctx context.Context ,
562558 txn Txn ,
563559 treeKey TreeKey ,
564- parentPartitionKey PartitionKey ,
565560 partitionKey PartitionKey ,
566561 vec vector.T ,
567562 childKey ChildKey ,
568563 valueBytes ValueBytes ,
569564) error {
570- metadata , err := txn .AddToPartition (ctx , treeKey , partitionKey , vec , childKey , valueBytes )
565+ err := txn .AddToPartition (ctx , treeKey , partitionKey , vec , childKey , valueBytes )
571566 if err != nil {
572567 return errors .Wrapf (err , "adding vector to partition %d" , partitionKey )
573568 }
574- if metadata .Count > vi .options .MaxPartitionSize {
575- vi .fixups .AddSplit (ctx , treeKey , parentPartitionKey , partitionKey )
576- }
577569 return vi .stats .OnAddOrRemoveVector (ctx )
578570}
579571
580572// removeFromPartition calls the store to remove a vector, by its key, from an
581573// existing partition.
582574func (vi * Index ) removeFromPartition (
583575 ctx context.Context , txn Txn , treeKey TreeKey , partitionKey PartitionKey , childKey ChildKey ,
584- ) ( metadata PartitionMetadata , err error ) {
585- metadata , err = txn .RemoveFromPartition (ctx , treeKey , partitionKey , childKey )
576+ ) error {
577+ err : = txn .RemoveFromPartition (ctx , treeKey , partitionKey , childKey )
586578 if err != nil {
587- return PartitionMetadata {},
588- errors .Wrapf (err , "removing vector from partition %d" , partitionKey )
579+ return errors .Wrapf (err , "removing vector from partition %d" , partitionKey )
589580 }
590- if err : = vi .stats .OnAddOrRemoveVector (ctx ); err != nil {
591- return PartitionMetadata {}, err
581+ if err = vi .stats .OnAddOrRemoveVector (ctx ); err != nil {
582+ return err
592583 }
593- return metadata , nil
584+ return nil
594585}
595586
596587// searchHelper contains the core search logic for the K-means tree. It begins
@@ -768,11 +759,19 @@ func (vi *Index) searchChildPartitions(
768759 count := idxCtx .tempCounts [i ]
769760 searchSet .Stats .SearchedPartition (level , count )
770761
762+ // If one of the searched partitions has only 1 vector remaining, do not
763+ // return that vector when "ignoreLonelyVector" is true.
764+ if idxCtx .ignoreLonelyVector && idxCtx .level == level && count == 1 {
765+ searchSet .RemoveResults (parentResults [i ].ChildKey .PartitionKey )
766+ }
767+
771768 partitionKey := parentResults [i ].ChildKey .PartitionKey
772769 if count < vi .options .MinPartitionSize && partitionKey != RootKey {
773- vi .fixups .AddMerge (ctx , idxCtx .treeKey , parentResults [i ].ParentPartitionKey , partitionKey )
770+ vi .fixups .AddSplitOrMergeCheck (
771+ ctx , idxCtx .treeKey , parentResults [i ].ParentPartitionKey , partitionKey )
774772 } else if count > vi .options .MaxPartitionSize {
775- vi .fixups .AddSplit (ctx , idxCtx .treeKey , parentResults [i ].ParentPartitionKey , partitionKey )
773+ vi .fixups .AddSplitOrMergeCheck (
774+ ctx , idxCtx .treeKey , parentResults [i ].ParentPartitionKey , partitionKey )
776775 }
777776 }
778777
0 commit comments