2626 */
2727class KMeansLocal {
2828
29+ // the minimum distance that is considered to be "far enough" to a centroid in order to compute the soar distance.
30+ // For vectors that are closer than this distance to the centroid, we use the squared distance to find the
31+ // second closest centroid.
32+ private static final float SOAR_MIN_DISTANCE = 1e-16f ;
33+
2934 final int sampleSize ;
3035 final int maxIterations ;
3136 final int clustersPerNeighborhood ;
@@ -190,15 +195,18 @@ private int[] assignSpilled(FloatVectorValues vectors, List<int[]> neighborhoods
190195
191196 int currAssignment = assignments [i ];
192197 float [] currentCentroid = centroids [currAssignment ];
193- for (int j = 0 ; j < vectors .dimension (); j ++) {
194- float diff = vector [j ] - currentCentroid [j ];
195- diffs [j ] = diff ;
196- }
197198
198199 // TODO: cache these?
199200 // float vectorCentroidDist = assignmentDistances[i];
200201 float vectorCentroidDist = VectorUtil .squareDistance (vector , currentCentroid );
201202
203+ if (vectorCentroidDist > SOAR_MIN_DISTANCE ) {
204+ for (int j = 0 ; j < vectors .dimension (); j ++) {
205+ float diff = vector [j ] - currentCentroid [j ];
206+ diffs [j ] = diff ;
207+ }
208+ }
209+
202210 int bestAssignment = -1 ;
203211 float minSoar = Float .MAX_VALUE ;
204212 assert neighborhoods .get (currAssignment ) != null ;
@@ -207,13 +215,19 @@ private int[] assignSpilled(FloatVectorValues vectors, List<int[]> neighborhoods
207215 continue ;
208216 }
209217 float [] neighborCentroid = centroids [neighbor ];
210- float soar = ESVectorUtil .soarDistance (vector , neighborCentroid , diffs , soarLambda , vectorCentroidDist );
218+ final float soar ;
219+ if (vectorCentroidDist > SOAR_MIN_DISTANCE ) {
220+ soar = ESVectorUtil .soarDistance (vector , neighborCentroid , diffs , soarLambda , vectorCentroidDist );
221+ } else {
222+ // if the vector is very close to the centroid, we look for the second-nearest centroid
223+ soar = VectorUtil .squareDistance (vector , neighborCentroid );
224+ }
211225 if (soar < minSoar ) {
212226 bestAssignment = neighbor ;
213227 minSoar = soar ;
214228 }
215229 }
216-
230+ assert bestAssignment != - 1 : "Failed to assign soar vector to centroid" ;
217231 spilledAssignments [i ] = bestAssignment ;
218232 }
219233
0 commit comments