@@ -148,6 +148,7 @@ static void writeCentroids(float[][] centroids, FieldInfo fieldInfo, float[] glo
148148 }
149149 }
150150
151+ @ Override
151152 CentroidAssignments calculateAndWriteCentroids (
152153 FieldInfo fieldInfo ,
153154 FloatVectorValues floatVectorValues ,
@@ -156,16 +157,7 @@ CentroidAssignments calculateAndWriteCentroids(
156157 float [] globalCentroid
157158 ) throws IOException {
158159 // TODO: take advantage of prior generated clusters from mergeState in the future
159- return calculateAndWriteCentroids (fieldInfo , floatVectorValues , centroidOutput , globalCentroid , false );
160- }
161-
162- CentroidAssignments calculateAndWriteCentroids (
163- FieldInfo fieldInfo ,
164- FloatVectorValues floatVectorValues ,
165- IndexOutput centroidOutput ,
166- float [] globalCentroid
167- ) throws IOException {
168- return calculateAndWriteCentroids (fieldInfo , floatVectorValues , centroidOutput , globalCentroid , true );
160+ return calculateAndWriteCentroids (fieldInfo , floatVectorValues , centroidOutput , globalCentroid );
169161 }
170162
171163 /**
@@ -176,26 +168,22 @@ CentroidAssignments calculateAndWriteCentroids(
176168 * @param floatVectorValues the float vector values to merge
177169 * @param centroidOutput the centroid output
178170 * @param globalCentroid the global centroid, calculated by this method and used to quantize the centroids
179- * @param cacheCentroids whether the centroids are kept or discarded once computed
180171 * @return the vector assignments, soar assignments, and if asked the centroids themselves that were computed
181172 * @throws IOException if an I/O error occurs
182173 */
174+ @ Override
183175 CentroidAssignments calculateAndWriteCentroids (
184176 FieldInfo fieldInfo ,
185177 FloatVectorValues floatVectorValues ,
186178 IndexOutput centroidOutput ,
187- float [] globalCentroid ,
188- boolean cacheCentroids
189- ) throws IOException {
179+ float [] globalCentroid
180+ ) throws IOException {
190181
191182 long nanoTime = System .nanoTime ();
192183
193184 // TODO: consider hinting / bootstrapping hierarchical kmeans with the prior segments centroids
194185 KMeansResult kMeansResult = new HierarchicalKMeans (floatVectorValues .dimension ()).cluster (floatVectorValues , vectorPerCluster );
195186 float [][] centroids = kMeansResult .centroids ();
196- int [] assignments = kMeansResult .assignments ();
197- int [] soarAssignments = kMeansResult .soarAssignments ();
198-
199187 // TODO: for flush we are doing this over the vectors and here centroids which seems duplicative
200188 // preliminary tests suggest recall is good using only centroids but need to do further evaluation
201189 // TODO: push this logic into vector util?
@@ -215,7 +203,13 @@ CentroidAssignments calculateAndWriteCentroids(
215203 logger .debug ("calculate centroids and assign vectors time ms: {}" , (System .nanoTime () - nanoTime ) / 1000000.0 );
216204 logger .debug ("final centroid count: {}" , centroids .length );
217205 }
206+ return buildCentroidAssignments (kMeansResult );
207+ }
218208
209+ static CentroidAssignments buildCentroidAssignments (KMeansResult kMeansResult ) {
210+ float [][] centroids = kMeansResult .centroids ();
211+ int [] assignments = kMeansResult .assignments ();
212+ int [] soarAssignments = kMeansResult .soarAssignments ();
219213 int [] centroidVectorCount = new int [centroids .length ];
220214 for (int i = 0 ; i < assignments .length ; i ++) {
221215 centroidVectorCount [assignments [i ]]++;
@@ -242,12 +236,7 @@ CentroidAssignments calculateAndWriteCentroids(
242236 }
243237 }
244238 }
245-
246- if (cacheCentroids ) {
247- return new CentroidAssignments (centroids , assignmentsByCluster );
248- } else {
249- return new CentroidAssignments (centroids .length , assignmentsByCluster );
250- }
239+ return new CentroidAssignments (centroids , assignmentsByCluster );
251240 }
252241
253242 static void writeQuantizedValue (IndexOutput indexOutput , byte [] binaryValue , OptimizedScalarQuantizer .QuantizationResult corrections )
0 commit comments