@@ -139,40 +139,40 @@ private static int getBestCentroidFromNeighbours(
139139 NeighborHood neighborhood ,
140140 float [] distances
141141 ) {
142- final int limit = neighborhood .neighbors .length - 3 ;
142+ final int limit = neighborhood .neighbors () .length - 3 ;
143143 int bestCentroidOffset = centroidIdx ;
144144 assert centroidIdx >= 0 && centroidIdx < centroids .length ;
145145 float minDsq = VectorUtil .squareDistance (vector , centroids [centroidIdx ]);
146146 int i = 0 ;
147147 for (; i < limit ; i += 4 ) {
148- if (minDsq < neighborhood .maxIntraDistance ) {
148+ if (minDsq < neighborhood .maxIntraDistance () ) {
149149 // if the distance found is smaller than the maximum intra-cluster distance
150150 // we don't consider it for further re-assignment
151151 return bestCentroidOffset ;
152152 }
153153 ESVectorUtil .squareDistanceBulk (
154154 vector ,
155- centroids [neighborhood .neighbors [i ]],
156- centroids [neighborhood .neighbors [i + 1 ]],
157- centroids [neighborhood .neighbors [i + 2 ]],
158- centroids [neighborhood .neighbors [i + 3 ]],
155+ centroids [neighborhood .neighbors () [i ]],
156+ centroids [neighborhood .neighbors () [i + 1 ]],
157+ centroids [neighborhood .neighbors () [i + 2 ]],
158+ centroids [neighborhood .neighbors () [i + 3 ]],
159159 distances
160160 );
161161 for (int j = 0 ; j < distances .length ; j ++) {
162162 float dsq = distances [j ];
163163 if (dsq < minDsq ) {
164164 minDsq = dsq ;
165- bestCentroidOffset = neighborhood .neighbors [i + j ];
165+ bestCentroidOffset = neighborhood .neighbors () [i + j ];
166166 }
167167 }
168168 }
169- for (; i < neighborhood .neighbors .length ; i ++) {
170- if (minDsq < neighborhood .maxIntraDistance ) {
169+ for (; i < neighborhood .neighbors () .length ; i ++) {
170+ if (minDsq < neighborhood .maxIntraDistance () ) {
171171 // if the distance found is smaller than the maximum intra-cluster distance
172172 // we don't consider it for further re-assignment
173173 return bestCentroidOffset ;
174174 }
175- int offset = neighborhood .neighbors [i ];
175+ int offset = neighborhood .neighbors () [i ];
176176 // float score = neighborhood.scores[i];
177177 assert offset >= 0 && offset < centroids .length : "Invalid neighbor offset: " + offset ;
178178 // compute the distance to the centroid
@@ -210,52 +210,6 @@ private static int getBestCentroid(float[][] centroids, float[] vector, float[]
210210 return bestCentroidOffset ;
211211 }
212212
213- private NeighborHood [] computeNeighborhoods (float [][] centers , int clustersPerNeighborhood ) {
214- int k = centers .length ;
215- assert k > clustersPerNeighborhood ;
216- NeighborQueue [] neighborQueues = new NeighborQueue [k ];
217- for (int i = 0 ; i < k ; i ++) {
218- neighborQueues [i ] = new NeighborQueue (clustersPerNeighborhood , true );
219- }
220- final float [] scores = new float [4 ];
221- final int limit = k - 3 ;
222- for (int i = 0 ; i < k - 1 ; i ++) {
223- float [] center = centers [i ];
224- int j = i + 1 ;
225- for (; j < limit ; j += 4 ) {
226- ESVectorUtil .squareDistanceBulk (center , centers [j ], centers [j + 1 ], centers [j + 2 ], centers [j + 3 ], scores );
227- for (int h = 0 ; h < 4 ; h ++) {
228- neighborQueues [j + h ].insertWithOverflow (i , scores [h ]);
229- neighborQueues [i ].insertWithOverflow (j + h , scores [h ]);
230- }
231- }
232- for (; j < k ; j ++) {
233- float dsq = VectorUtil .squareDistance (center , centers [j ]);
234- neighborQueues [j ].insertWithOverflow (i , dsq );
235- neighborQueues [i ].insertWithOverflow (j , dsq );
236- }
237- }
238-
239- NeighborHood [] neighborhoods = new NeighborHood [k ];
240- for (int i = 0 ; i < k ; i ++) {
241- NeighborQueue queue = neighborQueues [i ];
242- if (queue .size () == 0 ) {
243- // no neighbors, skip
244- neighborhoods [i ] = NeighborHood .EMPTY ;
245- continue ;
246- }
247- // consume the queue into the neighbors array and get the maximum intra-cluster distance
248- int [] neighbors = new int [queue .size ()];
249- float maxIntraDistance = queue .topScore ();
250- int iter = 0 ;
251- while (queue .size () > 0 ) {
252- neighbors [neighbors .length - ++iter ] = queue .pop ();
253- }
254- neighborhoods [i ] = new NeighborHood (neighbors , maxIntraDistance );
255- }
256- return neighborhoods ;
257- }
258-
259213 private void assignSpilled (
260214 FloatVectorValues vectors ,
261215 KMeansIntermediate kmeansIntermediate ,
@@ -299,8 +253,8 @@ private void assignSpilled(
299253 if (neighborhoods != null ) {
300254 assert neighborhoods [currAssignment ] != null ;
301255 NeighborHood neighborhood = neighborhoods [currAssignment ];
302- centroidCount = neighborhood .neighbors .length ;
303- centroidOrds = c -> neighborhood .neighbors [c ];
256+ centroidCount = neighborhood .neighbors () .length ;
257+ centroidOrds = c -> neighborhood .neighbors () [c ];
304258 } else {
305259 centroidCount = centroids .length - 1 ;
306260 centroidOrds = c -> c < currAssignment ? c : c + 1 ; // skip the current centroid
@@ -344,10 +298,6 @@ private void assignSpilled(
344298 }
345299 }
346300
347- record NeighborHood (int [] neighbors , float maxIntraDistance ) {
348- static final NeighborHood EMPTY = new NeighborHood (new int [0 ], Float .POSITIVE_INFINITY );
349- }
350-
351301 /**
352302 * cluster using a lloyd k-means algorithm that is not neighbor aware
353303 *
@@ -390,7 +340,7 @@ private void doCluster(FloatVectorValues vectors, KMeansIntermediate kMeansInter
390340 NeighborHood [] neighborhoods = null ;
391341 // if there are very few centroids, don't bother with neighborhoods or neighbor aware clustering
392342 if (neighborAware && centroids .length > clustersPerNeighborhood ) {
393- neighborhoods = computeNeighborhoods (centroids , clustersPerNeighborhood );
343+ neighborhoods = NeighborHood . computeNeighborhoods (centroids , clustersPerNeighborhood );
394344 }
395345 cluster (vectors , kMeansIntermediate , neighborhoods );
396346 if (neighborAware && soarLambda >= 0 ) {
0 commit comments