1717import org .apache .lucene .store .IOContext ;
1818import org .apache .lucene .store .IndexInput ;
1919import org .apache .lucene .store .IndexOutput ;
20+ import org .apache .lucene .util .LongValues ;
2021import org .apache .lucene .util .VectorUtil ;
2122import org .apache .lucene .util .hnsw .IntToIntFunction ;
23+ import org .apache .lucene .util .packed .PackedInts ;
24+ import org .apache .lucene .util .packed .PackedLongValues ;
2225import org .elasticsearch .index .codec .vectors .cluster .HierarchicalKMeans ;
2326import org .elasticsearch .index .codec .vectors .cluster .KMeansResult ;
2427import org .elasticsearch .logging .LogManager ;
@@ -46,7 +49,7 @@ public DefaultIVFVectorsWriter(SegmentWriteState state, FlatVectorsWriter rawVec
4649 }
4750
4851 @ Override
49- long [] buildAndWritePostingsLists (
52+ LongValues buildAndWritePostingsLists (
5053 FieldInfo fieldInfo ,
5154 CentroidSupplier centroidSupplier ,
5255 FloatVectorValues floatVectorValues ,
@@ -81,7 +84,7 @@ long[] buildAndWritePostingsLists(
8184 }
8285 }
8386 // write the posting lists
84- final long [] offsets = new long [ centroidSupplier . size ()] ;
87+ final PackedLongValues . Builder offsets = PackedLongValues . monotonicBuilder ( PackedInts . COMPACT ) ;
8588 DocIdsWriter docIdsWriter = new DocIdsWriter ();
8689 DiskBBQBulkWriter bulkWriter = new DiskBBQBulkWriter .OneBitDiskBBQBulkWriter (ES91OSQVectorsScorer .BULK_SIZE , postingsOutput );
8790 OnHeapQuantizedVectors onHeapQuantizedVectors = new OnHeapQuantizedVectors (
@@ -93,7 +96,7 @@ long[] buildAndWritePostingsLists(
9396 float [] centroid = centroidSupplier .centroid (c );
9497 int [] cluster = assignmentsByCluster [c ];
9598 // TODO align???
96- offsets [ c ] = postingsOutput .getFilePointer ();
99+ offsets . add ( postingsOutput .getFilePointer () );
97100 int size = cluster .length ;
98101 postingsOutput .writeVInt (size );
99102 postingsOutput .writeInt (Float .floatToIntBits (VectorUtil .dotProduct (centroid , centroid )));
@@ -109,11 +112,11 @@ long[] buildAndWritePostingsLists(
109112 printClusterQualityStatistics (assignmentsByCluster );
110113 }
111114
112- return offsets ;
115+ return offsets . build () ;
113116 }
114117
115118 @ Override
116- long [] buildAndWritePostingsLists (
119+ LongValues buildAndWritePostingsLists (
117120 FieldInfo fieldInfo ,
118121 CentroidSupplier centroidSupplier ,
119122 FloatVectorValues floatVectorValues ,
@@ -199,7 +202,7 @@ long[] buildAndWritePostingsLists(
199202 }
200203 // now we can read the quantized vectors from the temporary file
201204 try (IndexInput quantizedVectorsInput = mergeState .segmentInfo .dir .openInput (quantizedVectorsTempName , IOContext .DEFAULT )) {
202- final long [] offsets = new long [ centroidSupplier . size ()] ;
205+ final PackedLongValues . Builder offsets = PackedLongValues . monotonicBuilder ( PackedInts . COMPACT ) ;
203206 OffHeapQuantizedVectors offHeapQuantizedVectors = new OffHeapQuantizedVectors (
204207 quantizedVectorsInput ,
205208 fieldInfo .getVectorDimension ()
@@ -210,9 +213,9 @@ long[] buildAndWritePostingsLists(
210213 float [] centroid = centroidSupplier .centroid (c );
211214 int [] cluster = assignmentsByCluster [c ];
212215 boolean [] isOverspill = isOverspillByCluster [c ];
213- // TODO align???
214- offsets [c ] = postingsOutput .getFilePointer ();
216+ offsets .add (postingsOutput .getFilePointer ());
215217 int size = cluster .length ;
218+ // TODO align???
216219 postingsOutput .writeVInt (size );
217220 postingsOutput .writeInt (Float .floatToIntBits (VectorUtil .dotProduct (centroid , centroid )));
218221 offHeapQuantizedVectors .reset (size , ord -> isOverspill [ord ], ord -> cluster [ord ]);
@@ -226,7 +229,7 @@ long[] buildAndWritePostingsLists(
226229 if (logger .isDebugEnabled ()) {
227230 printClusterQualityStatistics (assignmentsByCluster );
228231 }
229- return offsets ;
232+ return offsets . build () ;
230233 }
231234 }
232235
@@ -270,7 +273,7 @@ void writeCentroids(
270273 FieldInfo fieldInfo ,
271274 CentroidSupplier centroidSupplier ,
272275 float [] globalCentroid ,
273- long [] offsets ,
276+ LongValues offsets ,
274277 IndexOutput centroidOutput
275278 ) throws IOException {
276279
@@ -302,7 +305,7 @@ void writeCentroids(
302305 // write the centroids
303306 centroidOutput .writeBytes (buffer .array (), buffer .array ().length );
304307 // write the offset of this posting list
305- centroidOutput .writeLong (offsets [ i ] );
308+ centroidOutput .writeLong (offsets . get ( i ) );
306309 }
307310 }
308311
0 commit comments