@@ -225,102 +225,107 @@ public int ordToDoc(int ord) {
225225 }
226226
227227 @ Override
228- @ SuppressForbidden (reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)" )
229228 public final void mergeOneField (FieldInfo fieldInfo , MergeState mergeState ) throws IOException {
230- rawVectorDelegate .mergeOneField (fieldInfo , mergeState );
231229 if (fieldInfo .getVectorEncoding ().equals (VectorEncoding .FLOAT32 )) {
232- final int numVectors ;
233- String tempRawVectorsFileName = null ;
234- boolean success = false ;
235- // build a float vector values with random access. In order to do that we dump the vectors to
236- // a temporary file
237- // and write the docID follow by the vector
238- try (IndexOutput out = mergeState .segmentInfo .dir .createTempOutput (mergeState .segmentInfo .name , "ivf_" , IOContext .DEFAULT )) {
239- tempRawVectorsFileName = out .getName ();
240- // TODO do this better, we shouldn't have to write to a temp file, we should be able to
241- // to just from the merged vector values, the tricky part is the random access.
242- numVectors = writeFloatVectorValues (fieldInfo , out , MergedVectorValues .mergeFloatVectorValues (fieldInfo , mergeState ));
243- CodecUtil .writeFooter (out );
230+ mergeOneFieldIVF (fieldInfo , mergeState );
231+ }
232+ // we merge the vectors at the end so we only have two copies of the vectors on disk at the same time.
233+ rawVectorDelegate .mergeOneField (fieldInfo , mergeState );
234+ }
235+
236+ @ SuppressForbidden (reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)" )
237+ private void mergeOneFieldIVF (FieldInfo fieldInfo , MergeState mergeState ) throws IOException {
238+ final int numVectors ;
239+ String tempRawVectorsFileName = null ;
240+ boolean success = false ;
241+ // build a float vector values with random access. In order to do that we dump the vectors to
242+ // a temporary file
243+ // and write the docID follow by the vector
244+ try (IndexOutput out = mergeState .segmentInfo .dir .createTempOutput (mergeState .segmentInfo .name , "ivf_" , IOContext .DEFAULT )) {
245+ tempRawVectorsFileName = out .getName ();
246+ // TODO do this better, we shouldn't have to write to a temp file, we should be able to
247+ // to just from the merged vector values, the tricky part is the random access.
248+ numVectors = writeFloatVectorValues (fieldInfo , out , MergedVectorValues .mergeFloatVectorValues (fieldInfo , mergeState ));
249+ CodecUtil .writeFooter (out );
250+ success = true ;
251+ } finally {
252+ if (success == false && tempRawVectorsFileName != null ) {
253+ org .apache .lucene .util .IOUtils .deleteFilesIgnoringExceptions (mergeState .segmentInfo .dir , tempRawVectorsFileName );
254+ }
255+ }
256+ try (IndexInput in = mergeState .segmentInfo .dir .openInput (tempRawVectorsFileName , IOContext .DEFAULT )) {
257+ float [] calculatedGlobalCentroid = new float [fieldInfo .getVectorDimension ()];
258+ final FloatVectorValues floatVectorValues = getFloatVectorValues (fieldInfo , in , numVectors );
259+ success = false ;
260+ long centroidOffset ;
261+ long centroidLength ;
262+ String centroidTempName = null ;
263+ int numCentroids ;
264+ IndexOutput centroidTemp = null ;
265+ CentroidAssignments centroidAssignments ;
266+ try {
267+ centroidTemp = mergeState .segmentInfo .dir .createTempOutput (mergeState .segmentInfo .name , "civf_" , IOContext .DEFAULT );
268+ centroidTempName = centroidTemp .getName ();
269+
270+ centroidAssignments = calculateAndWriteCentroids (
271+ fieldInfo ,
272+ floatVectorValues ,
273+ centroidTemp ,
274+ mergeState ,
275+ calculatedGlobalCentroid
276+ );
277+ numCentroids = centroidAssignments .numCentroids ();
278+
244279 success = true ;
245280 } finally {
246- if (success == false && tempRawVectorsFileName != null ) {
247- org .apache .lucene .util .IOUtils .deleteFilesIgnoringExceptions (mergeState .segmentInfo .dir , tempRawVectorsFileName );
281+ if (success == false && centroidTempName != null ) {
282+ IOUtils .closeWhileHandlingException (centroidTemp );
283+ org .apache .lucene .util .IOUtils .deleteFilesIgnoringExceptions (mergeState .segmentInfo .dir , centroidTempName );
248284 }
249285 }
250- try (IndexInput in = mergeState .segmentInfo .dir .openInput (tempRawVectorsFileName , IOContext .DEFAULT )) {
251- float [] calculatedGlobalCentroid = new float [fieldInfo .getVectorDimension ()];
252- final FloatVectorValues floatVectorValues = getFloatVectorValues (fieldInfo , in , numVectors );
253- success = false ;
254- long centroidOffset ;
255- long centroidLength ;
256- String centroidTempName = null ;
257- int numCentroids ;
258- IndexOutput centroidTemp = null ;
259- CentroidAssignments centroidAssignments ;
260- try {
261- centroidTemp = mergeState .segmentInfo .dir .createTempOutput (mergeState .segmentInfo .name , "civf_" , IOContext .DEFAULT );
262- centroidTempName = centroidTemp .getName ();
263-
264- centroidAssignments = calculateAndWriteCentroids (
286+ try {
287+ if (numCentroids == 0 ) {
288+ centroidOffset = ivfCentroids .getFilePointer ();
289+ writeMeta (fieldInfo , centroidOffset , 0 , new long [0 ], null );
290+ CodecUtil .writeFooter (centroidTemp );
291+ IOUtils .close (centroidTemp );
292+ return ;
293+ }
294+ CodecUtil .writeFooter (centroidTemp );
295+ IOUtils .close (centroidTemp );
296+ centroidOffset = ivfCentroids .alignFilePointer (Float .BYTES );
297+ try (IndexInput centroidsInput = mergeState .segmentInfo .dir .openInput (centroidTempName , IOContext .DEFAULT )) {
298+ ivfCentroids .copyBytes (centroidsInput , centroidsInput .length () - CodecUtil .footerLength ());
299+ centroidLength = ivfCentroids .getFilePointer () - centroidOffset ;
300+
301+ CentroidSupplier centroidSupplier = createCentroidSupplier (
302+ centroidsInput ,
303+ numCentroids ,
265304 fieldInfo ,
266- floatVectorValues ,
267- centroidTemp ,
268- mergeState ,
269305 calculatedGlobalCentroid
270306 );
271- numCentroids = centroidAssignments .numCentroids ();
272-
273- success = true ;
274- } finally {
275- if (success == false && centroidTempName != null ) {
276- IOUtils .closeWhileHandlingException (centroidTemp );
277- org .apache .lucene .util .IOUtils .deleteFilesIgnoringExceptions (mergeState .segmentInfo .dir , centroidTempName );
278- }
279- }
280- try {
281- if (numCentroids == 0 ) {
282- centroidOffset = ivfCentroids .getFilePointer ();
283- writeMeta (fieldInfo , centroidOffset , 0 , new long [0 ], null );
284- CodecUtil .writeFooter (centroidTemp );
285- IOUtils .close (centroidTemp );
286- return ;
287- }
288- CodecUtil .writeFooter (centroidTemp );
289- IOUtils .close (centroidTemp );
290- centroidOffset = ivfCentroids .alignFilePointer (Float .BYTES );
291- try (IndexInput centroidsInput = mergeState .segmentInfo .dir .openInput (centroidTempName , IOContext .DEFAULT )) {
292- ivfCentroids .copyBytes (centroidsInput , centroidsInput .length () - CodecUtil .footerLength ());
293- centroidLength = ivfCentroids .getFilePointer () - centroidOffset ;
294-
295- CentroidSupplier centroidSupplier = createCentroidSupplier (
296- centroidsInput ,
297- numCentroids ,
298- fieldInfo ,
299- calculatedGlobalCentroid
300- );
301-
302- // build a float vector values with random access
303- // build centroids
304- final long [] offsets = buildAndWritePostingsLists (
305- fieldInfo ,
306- centroidSupplier ,
307- floatVectorValues ,
308- ivfClusters ,
309- centroidAssignments .assignmentsByCluster ()
310- );
311- assert offsets .length == centroidSupplier .size ();
312- writeMeta (fieldInfo , centroidOffset , centroidLength , offsets , calculatedGlobalCentroid );
313- }
314- } finally {
315- org .apache .lucene .util .IOUtils .deleteFilesIgnoringExceptions (
316- mergeState .segmentInfo .dir ,
317- tempRawVectorsFileName ,
318- centroidTempName
307+
308+ // build a float vector values with random access
309+ // build centroids
310+ final long [] offsets = buildAndWritePostingsLists (
311+ fieldInfo ,
312+ centroidSupplier ,
313+ floatVectorValues ,
314+ ivfClusters ,
315+ centroidAssignments .assignmentsByCluster ()
319316 );
317+ assert offsets .length == centroidSupplier .size ();
318+ writeMeta (fieldInfo , centroidOffset , centroidLength , offsets , calculatedGlobalCentroid );
320319 }
321320 } finally {
322- org .apache .lucene .util .IOUtils .deleteFilesIgnoringExceptions (mergeState .segmentInfo .dir , tempRawVectorsFileName );
321+ org .apache .lucene .util .IOUtils .deleteFilesIgnoringExceptions (
322+ mergeState .segmentInfo .dir ,
323+ tempRawVectorsFileName ,
324+ centroidTempName
325+ );
323326 }
327+ } finally {
328+ org .apache .lucene .util .IOUtils .deleteFilesIgnoringExceptions (mergeState .segmentInfo .dir , tempRawVectorsFileName );
324329 }
325330 }
326331
0 commit comments