1717import  org .apache .lucene .store .IOContext ;
1818import  org .apache .lucene .store .IndexInput ;
1919import  org .apache .lucene .store .IndexOutput ;
20+ import  org .apache .lucene .util .LongValues ;
2021import  org .apache .lucene .util .VectorUtil ;
2122import  org .apache .lucene .util .hnsw .IntToIntFunction ;
23+ import  org .apache .lucene .util .packed .PackedInts ;
24+ import  org .apache .lucene .util .packed .PackedLongValues ;
2225import  org .elasticsearch .index .codec .vectors .cluster .HierarchicalKMeans ;
2326import  org .elasticsearch .index .codec .vectors .cluster .KMeansResult ;
2427import  org .elasticsearch .logging .LogManager ;
@@ -46,7 +49,7 @@ public DefaultIVFVectorsWriter(SegmentWriteState state, FlatVectorsWriter rawVec
4649    }
4750
4851    @ Override 
49-     long []  buildAndWritePostingsLists (
52+     LongValues  buildAndWritePostingsLists (
5053        FieldInfo  fieldInfo ,
5154        CentroidSupplier  centroidSupplier ,
5255        FloatVectorValues  floatVectorValues ,
@@ -81,7 +84,7 @@ long[] buildAndWritePostingsLists(
8184            }
8285        }
8386        // write the posting lists 
84-         final  long []  offsets  = new   long [ centroidSupplier . size ()] ;
87+         final  PackedLongValues . Builder  offsets  = PackedLongValues . monotonicBuilder ( PackedInts . COMPACT ) ;
8588        DocIdsWriter  docIdsWriter  = new  DocIdsWriter ();
8689        DiskBBQBulkWriter  bulkWriter  = new  DiskBBQBulkWriter .OneBitDiskBBQBulkWriter (ES91OSQVectorsScorer .BULK_SIZE , postingsOutput );
8790        OnHeapQuantizedVectors  onHeapQuantizedVectors  = new  OnHeapQuantizedVectors (
@@ -93,7 +96,7 @@ long[] buildAndWritePostingsLists(
9396            float [] centroid  = centroidSupplier .centroid (c );
9497            int [] cluster  = assignmentsByCluster [c ];
9598            // TODO align??? 
96-             offsets [ c ] =  postingsOutput .getFilePointer ();
99+             offsets . add ( postingsOutput .getFilePointer () );
97100            int  size  = cluster .length ;
98101            postingsOutput .writeVInt (size );
99102            postingsOutput .writeInt (Float .floatToIntBits (VectorUtil .dotProduct (centroid , centroid )));
@@ -109,11 +112,11 @@ long[] buildAndWritePostingsLists(
109112            printClusterQualityStatistics (assignmentsByCluster );
110113        }
111114
112-         return  offsets ;
115+         return  offsets . build () ;
113116    }
114117
115118    @ Override 
116-     long []  buildAndWritePostingsLists (
119+     LongValues  buildAndWritePostingsLists (
117120        FieldInfo  fieldInfo ,
118121        CentroidSupplier  centroidSupplier ,
119122        FloatVectorValues  floatVectorValues ,
@@ -199,7 +202,7 @@ long[] buildAndWritePostingsLists(
199202        }
200203        // now we can read the quantized vectors from the temporary file 
201204        try  (IndexInput  quantizedVectorsInput  = mergeState .segmentInfo .dir .openInput (quantizedVectorsTempName , IOContext .DEFAULT )) {
202-             final  long []  offsets  = new   long [ centroidSupplier . size ()] ;
205+             final  PackedLongValues . Builder  offsets  = PackedLongValues . monotonicBuilder ( PackedInts . COMPACT ) ;
203206            OffHeapQuantizedVectors  offHeapQuantizedVectors  = new  OffHeapQuantizedVectors (
204207                quantizedVectorsInput ,
205208                fieldInfo .getVectorDimension ()
@@ -210,9 +213,9 @@ long[] buildAndWritePostingsLists(
210213                float [] centroid  = centroidSupplier .centroid (c );
211214                int [] cluster  = assignmentsByCluster [c ];
212215                boolean [] isOverspill  = isOverspillByCluster [c ];
213-                 // TODO align??? 
214-                 offsets [c ] = postingsOutput .getFilePointer ();
216+                 offsets .add (postingsOutput .getFilePointer ());
215217                int  size  = cluster .length ;
218+                 // TODO align??? 
216219                postingsOutput .writeVInt (size );
217220                postingsOutput .writeInt (Float .floatToIntBits (VectorUtil .dotProduct (centroid , centroid )));
218221                offHeapQuantizedVectors .reset (size , ord  -> isOverspill [ord ], ord  -> cluster [ord ]);
@@ -226,7 +229,7 @@ long[] buildAndWritePostingsLists(
226229            if  (logger .isDebugEnabled ()) {
227230                printClusterQualityStatistics (assignmentsByCluster );
228231            }
229-             return  offsets ;
232+             return  offsets . build () ;
230233        }
231234    }
232235
@@ -270,7 +273,7 @@ void writeCentroids(
270273        FieldInfo  fieldInfo ,
271274        CentroidSupplier  centroidSupplier ,
272275        float [] globalCentroid ,
273-         long []  offsets ,
276+         LongValues  offsets ,
274277        IndexOutput  centroidOutput 
275278    ) throws  IOException  {
276279
@@ -302,7 +305,7 @@ void writeCentroids(
302305            // write the centroids 
303306            centroidOutput .writeBytes (buffer .array (), buffer .array ().length );
304307            // write the offset of this posting list 
305-             centroidOutput .writeLong (offsets [ i ] );
308+             centroidOutput .writeLong (offsets . get ( i ) );
306309        }
307310    }
308311
0 commit comments