99
1010package org .elasticsearch .index .codec .vectors ;
1111
12+ import org .apache .logging .log4j .LogManager ;
13+ import org .apache .logging .log4j .Logger ;
1214import org .apache .lucene .codecs .hnsw .FlatVectorsWriter ;
1315import org .apache .lucene .index .FieldInfo ;
1416import org .apache .lucene .index .FloatVectorValues ;
1719import org .apache .lucene .internal .hppc .IntArrayList ;
1820import org .apache .lucene .store .IndexInput ;
1921import org .apache .lucene .store .IndexOutput ;
20- import org .apache .lucene .util .InfoStream ;
2122import org .apache .lucene .util .VectorUtil ;
2223import org .apache .lucene .util .quantization .OptimizedScalarQuantizer ;
2324import org .elasticsearch .index .codec .vectors .cluster .HierarchicalKMeans ;
3132import static org .apache .lucene .codecs .lucene102 .Lucene102BinaryQuantizedVectorsFormat .INDEX_BITS ;
3233import static org .apache .lucene .util .quantization .OptimizedScalarQuantizer .discretize ;
3334import static org .apache .lucene .util .quantization .OptimizedScalarQuantizer .packAsBinary ;
34- import static org .elasticsearch .index .codec .vectors .IVFVectorsFormat .IVF_VECTOR_COMPONENT ;
3535
3636/**
3737 * Default implementation of {@link IVFVectorsWriter}. It uses {@link HierarchicalKMeans} algorithm to
3838 * partition the vector space, and then stores the centroids and posting list in a sequential
3939 * fashion.
4040 */
4141public class DefaultIVFVectorsWriter extends IVFVectorsWriter {
42+ private static final Logger logger = LogManager .getLogger (DefaultIVFVectorsWriter .class );
4243
4344 private final int vectorPerCluster ;
4445
@@ -53,7 +54,6 @@ long[] buildAndWritePostingsLists(
5354 CentroidSupplier centroidSupplier ,
5455 FloatVectorValues floatVectorValues ,
5556 IndexOutput postingsOutput ,
56- InfoStream infoStream ,
5757 IntArrayList [] assignmentsByCluster
5858 ) throws IOException {
5959 // write the posting lists
@@ -79,14 +79,14 @@ long[] buildAndWritePostingsLists(
7979 writePostingList (cluster , postingsOutput , binarizedByteVectorValues );
8080 }
8181
82- if ( infoStream . isEnabled ( IVF_VECTOR_COMPONENT )) {
83- printClusterQualityStatistics (assignmentsByCluster , infoStream );
82+ if ( logger . isDebugEnabled ( )) {
83+ printClusterQualityStatistics (assignmentsByCluster );
8484 }
8585
8686 return offsets ;
8787 }
8888
89- private static void printClusterQualityStatistics (IntArrayList [] clusters , InfoStream infoStream ) {
89+ private static void printClusterQualityStatistics (IntArrayList [] clusters ) {
9090 float min = Float .MAX_VALUE ;
9191 float max = Float .MIN_VALUE ;
9292 float mean = 0 ;
@@ -105,21 +105,8 @@ private static void printClusterQualityStatistics(IntArrayList[] clusters, InfoS
105105 max = Math .max (max , cluster .size ());
106106 }
107107 float variance = m2 / (clusters .length - 1 );
108- infoStream .message (
109- IVF_VECTOR_COMPONENT ,
110- "Centroid count: "
111- + clusters .length
112- + " min: "
113- + min
114- + " max: "
115- + max
116- + " mean: "
117- + mean
118- + " stdDev: "
119- + Math .sqrt (variance )
120- + " variance: "
121- + variance
122- );
108+ logger .debug ("Centroid count: {} min: {} max: {} mean: {} stdDev: {} variance: {}" ,
109+ clusters .length , min , max , mean , Math .sqrt (variance ), variance );
123110 }
124111
125112 private void writePostingList (IntArrayList cluster , IndexOutput postingsOutput , BinarizedFloatVectorValues binarizedByteVectorValues )
@@ -208,17 +195,16 @@ CentroidAssignments calculateAndWriteCentroids(
208195 float [] globalCentroid
209196 ) throws IOException {
210197 // TODO: take advantage of prior generated clusters from mergeState in the future
211- return calculateAndWriteCentroids (fieldInfo , floatVectorValues , centroidOutput , mergeState . infoStream , globalCentroid , false );
198+ return calculateAndWriteCentroids (fieldInfo , floatVectorValues , centroidOutput , globalCentroid , false );
212199 }
213200
214201 CentroidAssignments calculateAndWriteCentroids (
215202 FieldInfo fieldInfo ,
216203 FloatVectorValues floatVectorValues ,
217204 IndexOutput centroidOutput ,
218- InfoStream infoStream ,
219205 float [] globalCentroid
220206 ) throws IOException {
221- return calculateAndWriteCentroids (fieldInfo , floatVectorValues , centroidOutput , infoStream , globalCentroid , true );
207+ return calculateAndWriteCentroids (fieldInfo , floatVectorValues , centroidOutput , globalCentroid , true );
222208 }
223209
224210 /**
@@ -228,7 +214,6 @@ CentroidAssignments calculateAndWriteCentroids(
228214 * @param fieldInfo merging field info
229215 * @param floatVectorValues the float vector values to merge
230216 * @param centroidOutput the centroid output
231- * @param infoStream the merge state
232217 * @param globalCentroid the global centroid, calculated by this method and used to quantize the centroids
233218 * @param cacheCentroids whether the centroids are kept or discarded once computed
234219 * @return the vector assignments, soar assignments, and if asked the centroids themselves that were computed
@@ -238,7 +223,6 @@ CentroidAssignments calculateAndWriteCentroids(
238223 FieldInfo fieldInfo ,
239224 FloatVectorValues floatVectorValues ,
240225 IndexOutput centroidOutput ,
241- InfoStream infoStream ,
242226 float [] globalCentroid ,
243227 boolean cacheCentroids
244228 ) throws IOException {
@@ -266,12 +250,9 @@ CentroidAssignments calculateAndWriteCentroids(
266250 // write centroids
267251 writeCentroids (centroids , fieldInfo , globalCentroid , centroidOutput );
268252
269- if (infoStream .isEnabled (IVF_VECTOR_COMPONENT )) {
270- infoStream .message (
271- IVF_VECTOR_COMPONENT ,
272- "calculate centroids and assign vectors time ms: " + ((System .nanoTime () - nanoTime ) / 1000000.0 )
273- );
274- infoStream .message (IVF_VECTOR_COMPONENT , "final centroid count: " + centroids .length );
253+ if (logger .isDebugEnabled ()) {
254+ logger .debug ("calculate centroids and assign vectors time ms: {}" , (System .nanoTime () - nanoTime ) / 1000000.0 );
255+ logger .debug ("final centroid count: {}" , centroids .length );
275256 }
276257
277258 IntArrayList [] assignmentsByCluster = new IntArrayList [centroids .length ];
0 commit comments