Skip to content

Commit 742b39b

Browse files
authored
Update the HKMeansTest to Account for Allowing -1 SOAR Assignments (#135544)
1 parent b7dbb26 commit 742b39b

File tree

5 files changed

+14
-10
lines changed

5 files changed

+14
-10
lines changed

muted-tests.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -663,9 +663,6 @@ tests:
663663
- class: org.elasticsearch.upgrades.MatchOnlyTextRollingUpgradeIT
664664
method: testIndexing {upgradedNodes=1}
665665
issue: https://github.com/elastic/elasticsearch/issues/135525
666-
- class: org.elasticsearch.index.codec.vectors.cluster.HierarchicalKMeansTests
667-
method: testFewDifferentValues
668-
issue: https://github.com/elastic/elasticsearch/issues/135538
669666

670667
# Examples:
671668
#

server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/HierarchicalKMeans.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ public class HierarchicalKMeans {
2424
public static final int MAX_ITERATIONS_DEFAULT = 6;
2525
public static final int SAMPLES_PER_CLUSTER_DEFAULT = 64;
2626
public static final float DEFAULT_SOAR_LAMBDA = 1.0f;
27+
public static final int NO_SOAR_ASSIGNMENT = -1;
2728

2829
final int dimension;
2930
final int maxIterations;

server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/KMeansLocal.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
import java.util.Arrays;
2020
import java.util.Random;
2121

22+
import static org.elasticsearch.index.codec.vectors.cluster.HierarchicalKMeans.NO_SOAR_ASSIGNMENT;
23+
2224
/**
2325
* k-means implementation specific to the needs of the {@link HierarchicalKMeans} algorithm that deals specifically
2426
* with finalizing nearby pre-established clusters and generate
@@ -240,7 +242,7 @@ private void assignSpilled(
240242
// TODO: cache these?
241243
float vectorCentroidDist = VectorUtil.squareDistance(vector, currentCentroid);
242244
if (vectorCentroidDist <= SOAR_MIN_DISTANCE) {
243-
spilledAssignments[i] = -1; // no SOAR assignment
245+
spilledAssignments[i] = NO_SOAR_ASSIGNMENT; // no SOAR assignment
244246
continue;
245247
}
246248

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsWriter.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@
3838
import java.util.AbstractList;
3939
import java.util.Arrays;
4040

41+
import static org.elasticsearch.index.codec.vectors.cluster.HierarchicalKMeans.NO_SOAR_ASSIGNMENT;
42+
4143
/**
4244
* Default implementation of {@link IVFVectorsWriter}. It uses {@link HierarchicalKMeans} algorithm to
4345
* partition the vector space, and then stores the centroids and posting list in a sequential
@@ -75,7 +77,7 @@ CentroidOffsetAndLength buildAndWritePostingsLists(
7577
for (int i = 0; i < assignments.length; i++) {
7678
centroidVectorCount[assignments[i]]++;
7779
// if soar assignments are present, count them as well
78-
if (overspillAssignments.length > i && overspillAssignments[i] != -1) {
80+
if (overspillAssignments.length > i && overspillAssignments[i] != NO_SOAR_ASSIGNMENT) {
7981
centroidVectorCount[overspillAssignments[i]]++;
8082
}
8183
}
@@ -95,7 +97,7 @@ CentroidOffsetAndLength buildAndWritePostingsLists(
9597
// if soar assignments are present, add them to the cluster as well
9698
if (overspillAssignments.length > i) {
9799
int s = overspillAssignments[i];
98-
if (s != -1) {
100+
if (s != NO_SOAR_ASSIGNMENT) {
99101
assignmentsByCluster[s][centroidVectorCount[s]++] = i;
100102
}
101103
}
@@ -187,7 +189,7 @@ CentroidOffsetAndLength buildAndWritePostingsLists(
187189
int c = assignments[i];
188190
float[] centroid = centroidSupplier.centroid(c);
189191
float[] vector = floatVectorValues.vectorValue(i);
190-
boolean overspill = overspillAssignments.length > i && overspillAssignments[i] != -1;
192+
boolean overspill = overspillAssignments.length > i && overspillAssignments[i] != NO_SOAR_ASSIGNMENT;
191193
OptimizedScalarQuantizer.QuantizationResult result = quantizer.scalarQuantize(
192194
vector,
193195
scratch,
@@ -220,7 +222,7 @@ CentroidOffsetAndLength buildAndWritePostingsLists(
220222
for (int i = 0; i < assignments.length; i++) {
221223
centroidVectorCount[assignments[i]]++;
222224
// if soar assignments are present, count them as well
223-
if (overspillAssignments.length > i && overspillAssignments[i] != -1) {
225+
if (overspillAssignments.length > i && overspillAssignments[i] != NO_SOAR_ASSIGNMENT) {
224226
centroidVectorCount[overspillAssignments[i]]++;
225227
}
226228
}
@@ -242,7 +244,7 @@ CentroidOffsetAndLength buildAndWritePostingsLists(
242244
// if soar assignments are present, add them to the cluster as well
243245
if (overspillAssignments.length > i) {
244246
int s = overspillAssignments[i];
245-
if (s != -1) {
247+
if (s != NO_SOAR_ASSIGNMENT) {
246248
assignmentsByCluster[s][centroidVectorCount[s]] = i;
247249
isOverspillByCluster[s][centroidVectorCount[s]++] = true;
248250
}

server/src/test/java/org/elasticsearch/index/codec/vectors/cluster/HierarchicalKMeansTests.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
import java.util.ArrayList;
1616
import java.util.List;
1717

18+
import static org.elasticsearch.index.codec.vectors.cluster.HierarchicalKMeans.NO_SOAR_ASSIGNMENT;
19+
1820
public class HierarchicalKMeansTests extends ESTestCase {
1921

2022
public void testHKmeans() throws IOException {
@@ -127,7 +129,7 @@ public void testFewDifferentValues() throws IOException {
127129
assertEquals(nVectors, soarAssignments.length);
128130
// verify no duplicates exist
129131
for (int i = 0; i < assignments.length; i++) {
130-
assertTrue(soarAssignments[i] >= 0 && soarAssignments[i] < centroids.length);
132+
assertTrue((soarAssignments[i] == NO_SOAR_ASSIGNMENT || soarAssignments[i] >= 0) && soarAssignments[i] < centroids.length);
131133
assertNotEquals(assignments[i], soarAssignments[i]);
132134
}
133135
} else {

0 commit comments

Comments
 (0)