Skip to content

Commit dfcb185

Browse files
committed
Handle soar assignments when vector and centroid are very close
1 parent 404eb61 commit dfcb185

File tree

1 file changed

+20
-6
lines changed
  • server/src/main/java/org/elasticsearch/index/codec/vectors/cluster

1 file changed

+20
-6
lines changed

server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/KMeansLocal.java

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@
2626
*/
2727
class KMeansLocal {
2828

29+
// the minimum distance that is considered to be "far enough" to a centroid in order to compute the soar distance.
30+
// For vectors that are closer than this distance to the centroid, we use the squared distance to find the
31+
// second closest centroid.
32+
private static final float SOAR_MIN_DISTANCE = 1e-16f;
33+
2934
final int sampleSize;
3035
final int maxIterations;
3136
final int clustersPerNeighborhood;
@@ -190,15 +195,18 @@ private int[] assignSpilled(FloatVectorValues vectors, List<int[]> neighborhoods
190195

191196
int currAssignment = assignments[i];
192197
float[] currentCentroid = centroids[currAssignment];
193-
for (int j = 0; j < vectors.dimension(); j++) {
194-
float diff = vector[j] - currentCentroid[j];
195-
diffs[j] = diff;
196-
}
197198

198199
// TODO: cache these?
199200
// float vectorCentroidDist = assignmentDistances[i];
200201
float vectorCentroidDist = VectorUtil.squareDistance(vector, currentCentroid);
201202

203+
if (vectorCentroidDist > SOAR_MIN_DISTANCE) {
204+
for (int j = 0; j < vectors.dimension(); j++) {
205+
float diff = vector[j] - currentCentroid[j];
206+
diffs[j] = diff;
207+
}
208+
}
209+
202210
int bestAssignment = -1;
203211
float minSoar = Float.MAX_VALUE;
204212
assert neighborhoods.get(currAssignment) != null;
@@ -207,13 +215,19 @@ private int[] assignSpilled(FloatVectorValues vectors, List<int[]> neighborhoods
207215
continue;
208216
}
209217
float[] neighborCentroid = centroids[neighbor];
210-
float soar = ESVectorUtil.soarDistance(vector, neighborCentroid, diffs, soarLambda, vectorCentroidDist);
218+
final float soar;
219+
if (vectorCentroidDist > SOAR_MIN_DISTANCE) {
220+
soar = ESVectorUtil.soarDistance(vector, neighborCentroid, diffs, soarLambda, vectorCentroidDist);
221+
} else {
222+
// if the vector is very close to the centroid, we look for the second-nearest centroid
223+
soar = VectorUtil.squareDistance(vector, neighborCentroid);
224+
}
211225
if (soar < minSoar) {
212226
bestAssignment = neighbor;
213227
minSoar = soar;
214228
}
215229
}
216-
230+
assert bestAssignment != -1 : "Failed to assign soar vector to centroid";
217231
spilledAssignments[i] = bestAssignment;
218232
}
219233

0 commit comments

Comments
 (0)