diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/HierarchicalKMeans.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/HierarchicalKMeans.java index 34d802e5f0aaf..43643e9a002d9 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/HierarchicalKMeans.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/HierarchicalKMeans.java @@ -95,13 +95,11 @@ KMeansIntermediate clusterAndSplit(final FloatVectorValues vectors, final int ta // TODO: consider adding cluster size counts to the kmeans algo // handle assignment here so we can track distance and cluster size int[] centroidVectorCount = new int[centroids.length]; + int effectiveK = 0; for (int assigment : assignments) { centroidVectorCount[assigment]++; - } - - int effectiveK = 0; - for (int j : centroidVectorCount) { - if (j > 0) { + // this cluster has received an assignment, its now effective, but only count it once + if (centroidVectorCount[assigment] == 1) { effectiveK++; } }