@@ -109,9 +109,9 @@ LongValues buildAndWritePostingsLists(
109109 new OptimizedScalarQuantizer (fieldInfo .getVectorSimilarityFunction ())
110110 );
111111 final ByteBuffer buffer = ByteBuffer .allocate (fieldInfo .getVectorDimension () * Float .BYTES ).order (ByteOrder .LITTLE_ENDIAN );
112- int [] docIds = null ;
113- int [] docDeltas = null ;
114- int [] clusterOrds = null ;
112+ final int [] docIds = new int [ maxPostingListSize ] ;
113+ final int [] docDeltas = new int [ maxPostingListSize ] ;
114+ final int [] clusterOrds = new int [ maxPostingListSize ] ;
115115 DocIdsWriter idsWriter = new DocIdsWriter ();
116116 for (int c = 0 ; c < centroidSupplier .size (); c ++) {
117117 float [] centroid = centroidSupplier .centroid (c );
@@ -125,29 +125,21 @@ LongValues buildAndWritePostingsLists(
125125 int size = cluster .length ;
126126 // write docIds
127127 postingsOutput .writeVInt (size );
128- if (docIds == null || docIds .length < cluster .length ) {
129- docIds = new int [cluster .length ];
130- clusterOrds = new int [cluster .length ];
131- docDeltas = new int [cluster .length ];
132- }
133128 for (int j = 0 ; j < size ; j ++) {
134129 docIds [j ] = floatVectorValues .ordToDoc (cluster [j ]);
135130 clusterOrds [j ] = j ;
136131 }
137- final int [] finalDocs = docIds ;
138- final int [] finalOrds = clusterOrds ;
139132 // sort cluster.buffer by docIds values, this way cluster ordinals are sorted by docIds
140- new IntSorter (clusterOrds , i -> finalDocs [i ]).sort (0 , size );
133+ new IntSorter (clusterOrds , i -> docIds [i ]).sort (0 , size );
141134 // encode doc deltas
142135 for (int j = 0 ; j < size ; j ++) {
143- docDeltas [j ] = j == 0 ? finalDocs [ finalOrds [j ]] : finalDocs [ finalOrds [j ]] - finalDocs [ finalOrds [j - 1 ]];
136+ docDeltas [j ] = j == 0 ? docIds [ clusterOrds [j ]] : docIds [ clusterOrds [j ]] - docIds [ clusterOrds [j - 1 ]];
144137 }
145- final int [] finalDocDeltas = docDeltas ;
146- onHeapQuantizedVectors .reset (centroid , size , ord -> cluster [finalOrds [ord ]]);
138+ onHeapQuantizedVectors .reset (centroid , size , ord -> cluster [clusterOrds [ord ]]);
147139 // TODO we might want to consider putting the docIds in a separate file
148140 // to aid with only having to fetch vectors from slower storage when they are required
149141 // keeping them in the same file indicates we pull the entire file into cache
150- idsWriter .writeDocIds (i -> finalDocDeltas [i ], size , postingsOutput );
142+ idsWriter .writeDocIds (i -> docDeltas [i ], size , postingsOutput );
151143 // write vectors
152144 bulkWriter .writeVectors (onHeapQuantizedVectors );
153145 }
@@ -260,9 +252,9 @@ LongValues buildAndWritePostingsLists(
260252 // write the max posting list size
261253 postingsOutput .writeVInt (maxPostingListSize );
262254 // write the posting lists
263- int [] docIds = null ;
264- int [] docDeltas = null ;
265- int [] clusterOrds = null ;
255+ final int [] docIds = new int [ maxPostingListSize ] ;
256+ final int [] docDeltas = new int [ maxPostingListSize ] ;
257+ final int [] clusterOrds = new int [ maxPostingListSize ] ;
266258 DocIdsWriter idsWriter = new DocIdsWriter ();
267259 for (int c = 0 ; c < centroidSupplier .size (); c ++) {
268260 float [] centroid = centroidSupplier .centroid (c );
@@ -277,29 +269,21 @@ LongValues buildAndWritePostingsLists(
277269 // write docIds
278270 int size = cluster .length ;
279271 postingsOutput .writeVInt (size );
280- if (docIds == null || docIds .length < cluster .length ) {
281- docIds = new int [cluster .length ];
282- clusterOrds = new int [cluster .length ];
283- docDeltas = new int [cluster .length ];
284- }
285272 for (int j = 0 ; j < size ; j ++) {
286273 docIds [j ] = floatVectorValues .ordToDoc (cluster [j ]);
287274 clusterOrds [j ] = j ;
288275 }
289- final int [] finalDocs = docIds ;
290- final int [] finalOrds = clusterOrds ;
291276 // sort cluster.buffer by docIds values, this way cluster ordinals are sorted by docIds
292- new IntSorter (clusterOrds , i -> finalDocs [i ]).sort (0 , size );
277+ new IntSorter (clusterOrds , i -> docIds [i ]).sort (0 , size );
293278 // encode doc deltas
294279 for (int j = 0 ; j < size ; j ++) {
295- docDeltas [j ] = j == 0 ? finalDocs [ finalOrds [j ]] : finalDocs [ finalOrds [j ]] - finalDocs [ finalOrds [j - 1 ]];
280+ docDeltas [j ] = j == 0 ? docIds [ clusterOrds [j ]] : docIds [ clusterOrds [j ]] - docIds [ clusterOrds [j - 1 ]];
296281 }
297- final int [] finalDocDeltas = docDeltas ;
298- offHeapQuantizedVectors .reset (size , ord -> isOverspill [finalOrds [ord ]], ord -> cluster [finalOrds [ord ]]);
282+ offHeapQuantizedVectors .reset (size , ord -> isOverspill [clusterOrds [ord ]], ord -> cluster [clusterOrds [ord ]]);
299283 // TODO we might want to consider putting the docIds in a separate file
300284 // to aid with only having to fetch vectors from slower storage when they are required
301285 // keeping them in the same file indicates we pull the entire file into cache
302- idsWriter .writeDocIds (i -> finalDocDeltas [i ], size , postingsOutput );
286+ idsWriter .writeDocIds (i -> docDeltas [i ], size , postingsOutput );
303287 // write vectors
304288 bulkWriter .writeVectors (offHeapQuantizedVectors );
305289 }
0 commit comments