@@ -225,102 +225,107 @@ public int ordToDoc(int ord) {
225
225
}
226
226
227
227
@ Override
228
- @ SuppressForbidden (reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)" )
229
228
public final void mergeOneField (FieldInfo fieldInfo , MergeState mergeState ) throws IOException {
230
- rawVectorDelegate .mergeOneField (fieldInfo , mergeState );
231
229
if (fieldInfo .getVectorEncoding ().equals (VectorEncoding .FLOAT32 )) {
232
- final int numVectors ;
233
- String tempRawVectorsFileName = null ;
234
- boolean success = false ;
235
- // build a float vector values with random access. In order to do that we dump the vectors to
236
- // a temporary file
237
- // and write the docID follow by the vector
238
- try (IndexOutput out = mergeState .segmentInfo .dir .createTempOutput (mergeState .segmentInfo .name , "ivf_" , IOContext .DEFAULT )) {
239
- tempRawVectorsFileName = out .getName ();
240
- // TODO do this better, we shouldn't have to write to a temp file, we should be able to
241
- // to just from the merged vector values, the tricky part is the random access.
242
- numVectors = writeFloatVectorValues (fieldInfo , out , MergedVectorValues .mergeFloatVectorValues (fieldInfo , mergeState ));
243
- CodecUtil .writeFooter (out );
230
+ mergeOneFieldIVF (fieldInfo , mergeState );
231
+ }
232
+ // we merge the vectors at the end so we only have two copies of the vectors on disk at the same time.
233
+ rawVectorDelegate .mergeOneField (fieldInfo , mergeState );
234
+ }
235
+
236
+ @ SuppressForbidden (reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)" )
237
+ private void mergeOneFieldIVF (FieldInfo fieldInfo , MergeState mergeState ) throws IOException {
238
+ final int numVectors ;
239
+ String tempRawVectorsFileName = null ;
240
+ boolean success = false ;
241
+ // build a float vector values with random access. In order to do that we dump the vectors to
242
+ // a temporary file
243
+ // and write the docID follow by the vector
244
+ try (IndexOutput out = mergeState .segmentInfo .dir .createTempOutput (mergeState .segmentInfo .name , "ivf_" , IOContext .DEFAULT )) {
245
+ tempRawVectorsFileName = out .getName ();
246
+ // TODO do this better, we shouldn't have to write to a temp file, we should be able to
247
+ // to just from the merged vector values, the tricky part is the random access.
248
+ numVectors = writeFloatVectorValues (fieldInfo , out , MergedVectorValues .mergeFloatVectorValues (fieldInfo , mergeState ));
249
+ CodecUtil .writeFooter (out );
250
+ success = true ;
251
+ } finally {
252
+ if (success == false && tempRawVectorsFileName != null ) {
253
+ org .apache .lucene .util .IOUtils .deleteFilesIgnoringExceptions (mergeState .segmentInfo .dir , tempRawVectorsFileName );
254
+ }
255
+ }
256
+ try (IndexInput in = mergeState .segmentInfo .dir .openInput (tempRawVectorsFileName , IOContext .DEFAULT )) {
257
+ float [] calculatedGlobalCentroid = new float [fieldInfo .getVectorDimension ()];
258
+ final FloatVectorValues floatVectorValues = getFloatVectorValues (fieldInfo , in , numVectors );
259
+ success = false ;
260
+ long centroidOffset ;
261
+ long centroidLength ;
262
+ String centroidTempName = null ;
263
+ int numCentroids ;
264
+ IndexOutput centroidTemp = null ;
265
+ CentroidAssignments centroidAssignments ;
266
+ try {
267
+ centroidTemp = mergeState .segmentInfo .dir .createTempOutput (mergeState .segmentInfo .name , "civf_" , IOContext .DEFAULT );
268
+ centroidTempName = centroidTemp .getName ();
269
+
270
+ centroidAssignments = calculateAndWriteCentroids (
271
+ fieldInfo ,
272
+ floatVectorValues ,
273
+ centroidTemp ,
274
+ mergeState ,
275
+ calculatedGlobalCentroid
276
+ );
277
+ numCentroids = centroidAssignments .numCentroids ();
278
+
244
279
success = true ;
245
280
} finally {
246
- if (success == false && tempRawVectorsFileName != null ) {
247
- org .apache .lucene .util .IOUtils .deleteFilesIgnoringExceptions (mergeState .segmentInfo .dir , tempRawVectorsFileName );
281
+ if (success == false && centroidTempName != null ) {
282
+ IOUtils .closeWhileHandlingException (centroidTemp );
283
+ org .apache .lucene .util .IOUtils .deleteFilesIgnoringExceptions (mergeState .segmentInfo .dir , centroidTempName );
248
284
}
249
285
}
250
- try (IndexInput in = mergeState .segmentInfo .dir .openInput (tempRawVectorsFileName , IOContext .DEFAULT )) {
251
- float [] calculatedGlobalCentroid = new float [fieldInfo .getVectorDimension ()];
252
- final FloatVectorValues floatVectorValues = getFloatVectorValues (fieldInfo , in , numVectors );
253
- success = false ;
254
- long centroidOffset ;
255
- long centroidLength ;
256
- String centroidTempName = null ;
257
- int numCentroids ;
258
- IndexOutput centroidTemp = null ;
259
- CentroidAssignments centroidAssignments ;
260
- try {
261
- centroidTemp = mergeState .segmentInfo .dir .createTempOutput (mergeState .segmentInfo .name , "civf_" , IOContext .DEFAULT );
262
- centroidTempName = centroidTemp .getName ();
263
-
264
- centroidAssignments = calculateAndWriteCentroids (
286
+ try {
287
+ if (numCentroids == 0 ) {
288
+ centroidOffset = ivfCentroids .getFilePointer ();
289
+ writeMeta (fieldInfo , centroidOffset , 0 , new long [0 ], null );
290
+ CodecUtil .writeFooter (centroidTemp );
291
+ IOUtils .close (centroidTemp );
292
+ return ;
293
+ }
294
+ CodecUtil .writeFooter (centroidTemp );
295
+ IOUtils .close (centroidTemp );
296
+ centroidOffset = ivfCentroids .alignFilePointer (Float .BYTES );
297
+ try (IndexInput centroidsInput = mergeState .segmentInfo .dir .openInput (centroidTempName , IOContext .DEFAULT )) {
298
+ ivfCentroids .copyBytes (centroidsInput , centroidsInput .length () - CodecUtil .footerLength ());
299
+ centroidLength = ivfCentroids .getFilePointer () - centroidOffset ;
300
+
301
+ CentroidSupplier centroidSupplier = createCentroidSupplier (
302
+ centroidsInput ,
303
+ numCentroids ,
265
304
fieldInfo ,
266
- floatVectorValues ,
267
- centroidTemp ,
268
- mergeState ,
269
305
calculatedGlobalCentroid
270
306
);
271
- numCentroids = centroidAssignments .numCentroids ();
272
-
273
- success = true ;
274
- } finally {
275
- if (success == false && centroidTempName != null ) {
276
- IOUtils .closeWhileHandlingException (centroidTemp );
277
- org .apache .lucene .util .IOUtils .deleteFilesIgnoringExceptions (mergeState .segmentInfo .dir , centroidTempName );
278
- }
279
- }
280
- try {
281
- if (numCentroids == 0 ) {
282
- centroidOffset = ivfCentroids .getFilePointer ();
283
- writeMeta (fieldInfo , centroidOffset , 0 , new long [0 ], null );
284
- CodecUtil .writeFooter (centroidTemp );
285
- IOUtils .close (centroidTemp );
286
- return ;
287
- }
288
- CodecUtil .writeFooter (centroidTemp );
289
- IOUtils .close (centroidTemp );
290
- centroidOffset = ivfCentroids .alignFilePointer (Float .BYTES );
291
- try (IndexInput centroidsInput = mergeState .segmentInfo .dir .openInput (centroidTempName , IOContext .DEFAULT )) {
292
- ivfCentroids .copyBytes (centroidsInput , centroidsInput .length () - CodecUtil .footerLength ());
293
- centroidLength = ivfCentroids .getFilePointer () - centroidOffset ;
294
-
295
- CentroidSupplier centroidSupplier = createCentroidSupplier (
296
- centroidsInput ,
297
- numCentroids ,
298
- fieldInfo ,
299
- calculatedGlobalCentroid
300
- );
301
-
302
- // build a float vector values with random access
303
- // build centroids
304
- final long [] offsets = buildAndWritePostingsLists (
305
- fieldInfo ,
306
- centroidSupplier ,
307
- floatVectorValues ,
308
- ivfClusters ,
309
- centroidAssignments .assignmentsByCluster ()
310
- );
311
- assert offsets .length == centroidSupplier .size ();
312
- writeMeta (fieldInfo , centroidOffset , centroidLength , offsets , calculatedGlobalCentroid );
313
- }
314
- } finally {
315
- org .apache .lucene .util .IOUtils .deleteFilesIgnoringExceptions (
316
- mergeState .segmentInfo .dir ,
317
- tempRawVectorsFileName ,
318
- centroidTempName
307
+
308
+ // build a float vector values with random access
309
+ // build centroids
310
+ final long [] offsets = buildAndWritePostingsLists (
311
+ fieldInfo ,
312
+ centroidSupplier ,
313
+ floatVectorValues ,
314
+ ivfClusters ,
315
+ centroidAssignments .assignmentsByCluster ()
319
316
);
317
+ assert offsets .length == centroidSupplier .size ();
318
+ writeMeta (fieldInfo , centroidOffset , centroidLength , offsets , calculatedGlobalCentroid );
320
319
}
321
320
} finally {
322
- org .apache .lucene .util .IOUtils .deleteFilesIgnoringExceptions (mergeState .segmentInfo .dir , tempRawVectorsFileName );
321
+ org .apache .lucene .util .IOUtils .deleteFilesIgnoringExceptions (
322
+ mergeState .segmentInfo .dir ,
323
+ tempRawVectorsFileName ,
324
+ centroidTempName
325
+ );
323
326
}
327
+ } finally {
328
+ org .apache .lucene .util .IOUtils .deleteFilesIgnoringExceptions (mergeState .segmentInfo .dir , tempRawVectorsFileName );
324
329
}
325
330
}
326
331
0 commit comments