1717import org .apache .lucene .internal .hppc .IntArrayList ;
1818import org .apache .lucene .store .IndexInput ;
1919import org .apache .lucene .store .IndexOutput ;
20- import org .apache .lucene .util .InfoStream ;
2120import org .apache .lucene .util .VectorUtil ;
2221import org .apache .lucene .util .quantization .OptimizedScalarQuantizer ;
2322import org .elasticsearch .index .codec .vectors .cluster .HierarchicalKMeans ;
2423import org .elasticsearch .index .codec .vectors .cluster .KMeansResult ;
24+ import org .elasticsearch .logging .LogManager ;
25+ import org .elasticsearch .logging .Logger ;
2526import org .elasticsearch .simdvec .ES91OSQVectorsScorer ;
2627
2728import java .io .IOException ;
3132import static org .apache .lucene .codecs .lucene102 .Lucene102BinaryQuantizedVectorsFormat .INDEX_BITS ;
3233import static org .apache .lucene .util .quantization .OptimizedScalarQuantizer .discretize ;
3334import static org .apache .lucene .util .quantization .OptimizedScalarQuantizer .packAsBinary ;
34- import static org .elasticsearch .index .codec .vectors .IVFVectorsFormat .IVF_VECTOR_COMPONENT ;
3535
3636/**
3737 * Default implementation of {@link IVFVectorsWriter}. It uses {@link HierarchicalKMeans} algorithm to
3838 * partition the vector space, and then stores the centroids and posting list in a sequential
3939 * fashion.
4040 */
4141public class DefaultIVFVectorsWriter extends IVFVectorsWriter {
42+ private static final Logger logger = LogManager .getLogger (DefaultIVFVectorsWriter .class );
4243
4344 private final int vectorPerCluster ;
4445
@@ -53,7 +54,6 @@ long[] buildAndWritePostingsLists(
5354 CentroidSupplier centroidSupplier ,
5455 FloatVectorValues floatVectorValues ,
5556 IndexOutput postingsOutput ,
56- InfoStream infoStream ,
5757 IntArrayList [] assignmentsByCluster
5858 ) throws IOException {
5959 // write the posting lists
@@ -79,14 +79,14 @@ long[] buildAndWritePostingsLists(
7979 writePostingList (cluster , postingsOutput , binarizedByteVectorValues );
8080 }
8181
82- if (infoStream . isEnabled ( IVF_VECTOR_COMPONENT )) {
83- printClusterQualityStatistics (assignmentsByCluster , infoStream );
82+ if (logger . isDebugEnabled ( )) {
83+ printClusterQualityStatistics (assignmentsByCluster );
8484 }
8585
8686 return offsets ;
8787 }
8888
89- private static void printClusterQualityStatistics (IntArrayList [] clusters , InfoStream infoStream ) {
89+ private static void printClusterQualityStatistics (IntArrayList [] clusters ) {
9090 float min = Float .MAX_VALUE ;
9191 float max = Float .MIN_VALUE ;
9292 float mean = 0 ;
@@ -105,20 +105,9 @@ private static void printClusterQualityStatistics(IntArrayList[] clusters, InfoS
105105 max = Math .max (max , cluster .size ());
106106 }
107107 float variance = m2 / (clusters .length - 1 );
108- infoStream .message (
109- IVF_VECTOR_COMPONENT ,
110- "Centroid count: "
111- + clusters .length
112- + " min: "
113- + min
114- + " max: "
115- + max
116- + " mean: "
117- + mean
118- + " stdDev: "
119- + Math .sqrt (variance )
120- + " variance: "
121- + variance
108+ logger .debug (
109+ "Centroid count: {} min: {} max: {} mean: {} stdDev: {} variance: {}" ,
110+ clusters .length , min , max , mean , Math .sqrt (variance ), variance
122111 );
123112 }
124113
@@ -208,17 +197,16 @@ CentroidAssignments calculateAndWriteCentroids(
208197 float [] globalCentroid
209198 ) throws IOException {
210199 // TODO: take advantage of prior generated clusters from mergeState in the future
211- return calculateAndWriteCentroids (fieldInfo , floatVectorValues , centroidOutput , mergeState . infoStream , globalCentroid , false );
200+ return calculateAndWriteCentroids (fieldInfo , floatVectorValues , centroidOutput , globalCentroid , false );
212201 }
213202
214203 CentroidAssignments calculateAndWriteCentroids (
215204 FieldInfo fieldInfo ,
216205 FloatVectorValues floatVectorValues ,
217206 IndexOutput centroidOutput ,
218- InfoStream infoStream ,
219207 float [] globalCentroid
220208 ) throws IOException {
221- return calculateAndWriteCentroids (fieldInfo , floatVectorValues , centroidOutput , infoStream , globalCentroid , true );
209+ return calculateAndWriteCentroids (fieldInfo , floatVectorValues , centroidOutput , globalCentroid , true );
222210 }
223211
224212 /**
@@ -228,7 +216,6 @@ CentroidAssignments calculateAndWriteCentroids(
228216 * @param fieldInfo merging field info
229217 * @param floatVectorValues the float vector values to merge
230218 * @param centroidOutput the centroid output
231- * @param infoStream the merge state
232219 * @param globalCentroid the global centroid, calculated by this method and used to quantize the centroids
233220 * @param cacheCentroids whether the centroids are kept or discarded once computed
234221 * @return the vector assignments, soar assignments, and if asked the centroids themselves that were computed
@@ -238,7 +225,6 @@ CentroidAssignments calculateAndWriteCentroids(
238225 FieldInfo fieldInfo ,
239226 FloatVectorValues floatVectorValues ,
240227 IndexOutput centroidOutput ,
241- InfoStream infoStream ,
242228 float [] globalCentroid ,
243229 boolean cacheCentroids
244230 ) throws IOException {
@@ -266,12 +252,9 @@ CentroidAssignments calculateAndWriteCentroids(
266252 // write centroids
267253 writeCentroids (centroids , fieldInfo , globalCentroid , centroidOutput );
268254
269- if (infoStream .isEnabled (IVF_VECTOR_COMPONENT )) {
270- infoStream .message (
271- IVF_VECTOR_COMPONENT ,
272- "calculate centroids and assign vectors time ms: " + ((System .nanoTime () - nanoTime ) / 1000000.0 )
273- );
274- infoStream .message (IVF_VECTOR_COMPONENT , "final centroid count: " + centroids .length );
255+ if (logger .isDebugEnabled ()) {
256+ logger .debug ("calculate centroids and assign vectors time ms: {}" , (System .nanoTime () - nanoTime ) / 1000000.0 );
257+ logger .debug ("final centroid count: {}" , centroids .length );
275258 }
276259
277260 IntArrayList [] assignmentsByCluster = new IntArrayList [centroids .length ];
0 commit comments