@@ -711,33 +711,33 @@ func (fw *fixupWorker) copyToSplitSubPartitions(
711
711
vectors vector.Set ,
712
712
leftMetadata , rightMetadata PartitionMetadata ,
713
713
) (err error ) {
714
- var leftOffsets , rightOffsets [] uint64
714
+ var leftCount int
715
715
sourceState := sourcePartition .Metadata ().StateDetails
716
716
717
717
defer func () {
718
718
err = errors .Wrapf (err ,
719
719
"assigning %d vectors to left partition %d and %d vectors to right partition %d" ,
720
- len ( leftOffsets ) , sourceState .Target1 , len ( rightOffsets ) , sourceState .Target2 )
720
+ leftCount , sourceState .Target1 , vectors . Count - leftCount , sourceState .Target2 )
721
721
}()
722
722
723
- tempOffsets := fw .workspace .AllocUint64s (vectors .Count )
724
- defer fw .workspace .FreeUint64s (tempOffsets )
723
+ tempAssignments := fw .workspace .AllocUint64s (vectors .Count )
724
+ defer fw .workspace .FreeUint64s (tempAssignments )
725
725
726
726
// Assign vectors to the partition with the nearest centroid.
727
727
kmeans := BalancedKmeans {Workspace : & fw .workspace , Rand : fw .rng }
728
- leftOffsets , rightOffsets = kmeans .AssignPartitions (
729
- vectors , leftMetadata .Centroid , rightMetadata .Centroid , tempOffsets )
728
+ leftCount = kmeans .AssignPartitions (
729
+ vectors , leftMetadata .Centroid , rightMetadata .Centroid , tempAssignments )
730
730
731
731
// Assign vectors and associated keys and values into contiguous left and right groupings.
732
732
childKeys := slices .Clone (sourcePartition .ChildKeys ())
733
733
valueBytes := slices .Clone (sourcePartition .ValueBytes ())
734
- splitPartitionData (& fw .workspace , vectors , childKeys , valueBytes , leftOffsets , rightOffsets )
734
+ splitPartitionData (& fw .workspace , vectors , childKeys , valueBytes , tempAssignments )
735
735
leftVectors := vectors
736
- rightVectors := leftVectors .SplitAt (len ( leftOffsets ) )
737
- leftChildKeys := childKeys [:len ( leftOffsets ) ]
738
- rightChildKeys := childKeys [len ( leftOffsets ) :]
739
- leftValueBytes := valueBytes [:len ( leftOffsets ) ]
740
- rightValueBytes := valueBytes [len ( leftOffsets ) :]
736
+ rightVectors := leftVectors .SplitAt (leftCount )
737
+ leftChildKeys := childKeys [:leftCount ]
738
+ rightChildKeys := childKeys [leftCount :]
739
+ leftValueBytes := valueBytes [:leftCount ]
740
+ rightValueBytes := valueBytes [leftCount :]
741
741
742
742
// Add vectors to left and right sub-partitions. Note that this may not be
743
743
// transactional; if an error occurs, any vectors already added may not be
@@ -748,7 +748,7 @@ func (fw *fixupWorker) copyToSplitSubPartitions(
748
748
leftPartitionKey , leftVectors , leftChildKeys , leftValueBytes , leftMetadata )
749
749
if added {
750
750
log .VEventf (ctx , 2 , "assigned %d vectors to left partition %d (level=%d, state=%s)" ,
751
- len ( leftOffsets ) , leftPartitionKey , leftMetadata .Level , leftMetadata .StateDetails .String ())
751
+ leftCount , leftPartitionKey , leftMetadata .Level , leftMetadata .StateDetails .String ())
752
752
}
753
753
if err != nil {
754
754
return err
@@ -768,7 +768,7 @@ func (fw *fixupWorker) copyToSplitSubPartitions(
768
768
rightPartitionKey , rightVectors , rightChildKeys , rightValueBytes , rightMetadata )
769
769
if added {
770
770
log .VEventf (ctx , 2 , "assigned %d vectors to right partition %d (level=%d, state=%s)" ,
771
- len ( rightOffsets ) , rightPartitionKey ,
771
+ vectors . Count - leftCount , rightPartitionKey ,
772
772
rightMetadata .Level , rightMetadata .StateDetails .String ())
773
773
}
774
774
if err != nil {
@@ -798,62 +798,55 @@ func suppressRaceErrors(err error) (PartitionMetadata, error) {
798
798
}
799
799
800
800
// splitPartitionData groups the provided partition data according to the left
801
- // and right offsets. All data referenced by left offsets will be moved to the
802
- // left of each set or slice. All data referenced by right offsets will be moved
803
- // to the right. The internal ordering of elements on each side is not defined.
804
- //
805
- // TODO(andyk): Passing in left and right offsets makes this overly complex. It
806
- // would be better to pass an assignments slice of the same length as the
807
- // partition data, where 0=left and 1=right.
801
+ // and right offsets. The assignments slice specifies which partition the data
802
+ // will be moved into: 0 for left and 1 for right. The internal ordering of
803
+ // elements on each side is not defined.
808
804
func splitPartitionData (
809
805
w * workspace.T ,
810
806
vectors vector.Set ,
811
807
childKeys []ChildKey ,
812
808
valueBytes []ValueBytes ,
813
- leftOffsets , rightOffsets []uint64 ,
809
+ assignments []uint64 ,
814
810
) {
815
811
tempVector := w .AllocFloats (vectors .Dims )
816
812
defer w .FreeFloats (tempVector )
817
813
814
+ // Use a two-pointer approach to partition the data. left points to the next
815
+ // position where a left element should go. right points to the next position
816
+ // where a right element should go (from the end).
818
817
left := 0
819
- right := 0
818
+ right := len (assignments ) - 1
819
+
820
820
for {
821
- // Find a misplaced "right" element from the left side.
822
- var leftOffset int
823
- for {
824
- if left >= len (leftOffsets ) {
825
- return
826
- }
827
- leftOffset = int (leftOffsets [left ])
821
+ // Find a misplaced element on the left side (should be 0 but is 1).
822
+ for left < right && assignments [left ] == 0 {
828
823
left ++
829
- if leftOffset >= len (leftOffsets ) {
830
- break
831
- }
832
824
}
833
825
834
- // There must be a misplaced "left" element from the right side.
835
- var rightOffset int
836
- for {
837
- rightOffset = int ( rightOffsets [ right ])
838
- right ++
839
- if rightOffset < len ( leftOffsets ) {
840
- break
841
- }
826
+ // Find a misplaced element on the right side (should be 1 but is 0) .
827
+ for left < right && assignments [ right ] == 1 {
828
+ right --
829
+ }
830
+
831
+ if left >= right {
832
+ // No more misplaced elements, so break.
833
+ break
842
834
}
843
835
844
- // Swap the two elements.
845
- rightToLeft := vectors .At (leftOffset )
846
- leftToRight := vectors .At (rightOffset )
847
- copy (tempVector , rightToLeft )
848
- copy (rightToLeft , leftToRight )
849
- copy (leftToRight , tempVector )
836
+ // Swap vectors.
837
+ leftVector := vectors .At (left )
838
+ rightVector := vectors .At (right )
839
+ copy (tempVector , leftVector )
840
+ copy (leftVector , rightVector )
841
+ copy (rightVector , tempVector )
842
+
843
+ // Swap child keys.
844
+ childKeys [left ], childKeys [right ] = childKeys [right ], childKeys [left ]
850
845
851
- tempChildKey := childKeys [leftOffset ]
852
- childKeys [leftOffset ] = childKeys [rightOffset ]
853
- childKeys [rightOffset ] = tempChildKey
846
+ // Swap value bytes.
847
+ valueBytes [left ], valueBytes [right ] = valueBytes [right ], valueBytes [left ]
854
848
855
- tempValueBytes := valueBytes [leftOffset ]
856
- valueBytes [leftOffset ] = valueBytes [rightOffset ]
857
- valueBytes [rightOffset ] = tempValueBytes
849
+ left ++
850
+ right --
858
851
}
859
852
}
0 commit comments