Fix MergedByteVectorValues lastOrd tracking bug

finnroblin · finnroblin · commit cabb630f0b48 · 2026-01-07T13:04:55.000-08:00
The fix mirrors the behavior of MergedFloat32VectorValues:
1. Increment lastOrd in nextDoc() when advancing to a new document
2. Change vectorValue(ord) check from 'ord != lastOrd + 1' to 'ord != lastOrd'

This ensures that skipping vectors via nextDoc() without loading them
(as done in multipart upload partitioning) works correctly.
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java
@@ -414,11 +414,9 @@ private MergedByteVectorValues(List<ByteVectorValuesSub> subs, MergeState mergeS
 
       @Override
       public byte[] vectorValue(int ord) throws IOException {
-        if (ord != lastOrd + 1) {
+        if (ord != lastOrd) {
           throw new IllegalStateException(
               "only supports forward iteration: ord=" + ord + ", lastOrd=" + lastOrd);
-        } else {
-          lastOrd = ord;
         }
         return current.values.vectorValue(current.index());
       }
@@ -446,6 +444,7 @@ public int nextDoc() throws IOException {
               index = NO_MORE_DOCS;
             } else {
               docId = current.mappedDocID;
+              ++lastOrd;
               ++index;
             }
             return docId;
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/TestMergedByteVectorValues.java b/lucene/core/src/test/org/apache/lucene/codecs/TestMergedByteVectorValues.java
@@ -32,25 +32,22 @@
 import org.apache.lucene.index.MergeState;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
-import org.apache.lucene.index.SlowCodecReaderWrapper;
 import org.apache.lucene.index.Sorter;
 import org.apache.lucene.index.VectorSimilarityFunction;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.tests.codecs.asserting.AssertingCodec;
 import org.apache.lucene.tests.util.LuceneTestCase;
 import org.apache.lucene.tests.util.TestUtil;
 
-/**
- * Tests for MergedByteVectorValues to ensure lastOrd is properly incremented during iteration.
- */
+/** Tests for MergedByteVectorValues to ensure lastOrd is properly incremented during iteration. */
 public class TestMergedByteVectorValues extends LuceneTestCase {
 
   /**
-   * Tests that skipping vectors via nextDoc() and then loading a vector works correctly
-   * during merge. This verifies the fix for the lastOrd tracking bug in MergedByteVectorValues.
-   * 
-   * The bug: MergedByteVectorValues.nextDoc() does not increment lastOrd, so when you
-   * skip N vectors and then try to load vectorValue(N), it fails because lastOrd is still -1.
+   * Tests that skipping vectors via nextDoc() and then loading a vector works correctly during
+   * merge. This verifies the fix for the lastOrd tracking bug in MergedByteVectorValues.
+   *
+   * <p>The bug: MergedByteVectorValues.nextDoc() does not increment lastOrd, so when you skip N
+   * vectors and then try to load vectorValue(N), it fails because lastOrd is still -1.
    */
   public void testSkipThenLoadByteVectorDuringMerge() throws IOException {
     try (Directory dir = newDirectory()) {
@@ -60,15 +57,23 @@ public void testSkipThenLoadByteVectorDuringMerge() throws IOException {
         // First segment
         for (int i = 0; i < 3; i++) {
           Document doc = new Document();
-          doc.add(new KnnByteVectorField("field", new byte[] {(byte) i, (byte) (i + 1)}, VectorSimilarityFunction.EUCLIDEAN));
+          doc.add(
+              new KnnByteVectorField(
+                  "field",
+                  new byte[] {(byte) i, (byte) (i + 1)},
+                  VectorSimilarityFunction.EUCLIDEAN));
           writer.addDocument(doc);
         }
         writer.commit();
 
         // Second segment
         for (int i = 3; i < 6; i++) {
           Document doc = new Document();
-          doc.add(new KnnByteVectorField("field", new byte[] {(byte) i, (byte) (i + 1)}, VectorSimilarityFunction.EUCLIDEAN));
+          doc.add(
+              new KnnByteVectorField(
+                  "field",
+                  new byte[] {(byte) i, (byte) (i + 1)},
+                  VectorSimilarityFunction.EUCLIDEAN));
           writer.addDocument(doc);
         }
         writer.commit();
@@ -77,126 +82,133 @@ public void testSkipThenLoadByteVectorDuringMerge() throws IOException {
       // Open reader with multiple segments
       try (DirectoryReader reader = DirectoryReader.open(dir)) {
         assertEquals("Should have 2 segments", 2, reader.leaves().size());
-        
-        // Get CodecReaders for merge
+
+        // Get CodecReaders for merge - SegmentReader is already a CodecReader
         List<CodecReader> codecReaders = new ArrayList<>();
         for (LeafReaderContext ctx : reader.leaves()) {
-          codecReaders.add(SlowCodecReaderWrapper.wrap(ctx.reader()));
+          codecReaders.add((CodecReader) ctx.reader());
         }
 
         // Create a custom KnnVectorsFormat that tests the MergedByteVectorValues during merge
         final boolean[] testPassed = {false};
         final Exception[] testException = {null};
-        
+
         KnnVectorsFormat delegate = TestUtil.getDefaultKnnVectorsFormat();
-        KnnVectorsFormat testFormat = new KnnVectorsFormat(delegate.getName()) {
-          @Override
-          public int getMaxDimensions(String fieldName) {
-            return delegate.getMaxDimensions(fieldName);
-          }
-          
-          @Override
-          public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
-            KnnVectorsWriter delegateWriter = delegate.fieldsWriter(state);
-            return new KnnVectorsWriter() {
+        KnnVectorsFormat testFormat =
+            new KnnVectorsFormat(delegate.getName()) {
               @Override
-              public KnnFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws IOException {
-                return delegateWriter.addField(fieldInfo);
+              public int getMaxDimensions(String fieldName) {
+                return delegate.getMaxDimensions(fieldName);
               }
-              
-              @Override
-              public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
-                delegateWriter.flush(maxDoc, sortMap);
-              }
-              
+
               @Override
-              public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException {
-                // Get the MergedByteVectorValues and test the skip-then-load pattern
-                try {
-                  ByteVectorValues mergedValues = KnnVectorsWriter.MergedVectorValues.mergeByteVectorValues(
-                      fieldInfo, mergeState);
-                  
-                  KnnVectorValues.DocIndexIterator iterator = mergedValues.iterator();
-                  
-                  // Skip first 3 vectors without loading them
-                  for (int i = 0; i < 3; i++) {
-                    int docId = iterator.nextDoc();
-                    if (docId == KnnVectorValues.DocIndexIterator.NO_MORE_DOCS) {
-                      throw new AssertionError("Unexpected NO_MORE_DOCS at iteration " + i);
-                    }
+              public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
+                KnnVectorsWriter delegateWriter = delegate.fieldsWriter(state);
+                return new KnnVectorsWriter() {
+                  @Override
+                  public KnnFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws IOException {
+                    return delegateWriter.addField(fieldInfo);
                   }
-                  
-                  // Now advance one more and load the vector
-                  int docId = iterator.nextDoc();
-                  if (docId == KnnVectorValues.DocIndexIterator.NO_MORE_DOCS) {
-                    throw new AssertionError("Unexpected NO_MORE_DOCS for 4th doc");
+
+                  @Override
+                  public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
+                    delegateWriter.flush(maxDoc, sortMap);
                   }
-                  
-                  // This is the call that fails without the fix:
-                  // - lastOrd is still -1 (never incremented in nextDoc)
-                  // - vectorValue(3) checks: 3 != -1 + 1 → 3 != 0 → throws IllegalStateException
-                  byte[] vector = mergedValues.vectorValue(iterator.index());
-                  
-                  if (vector == null) {
-                    throw new AssertionError("Vector should not be null");
+
+                  @Override
+                  public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState)
+                      throws IOException {
+                    // Get the MergedByteVectorValues and test the skip-then-load pattern
+                    try {
+                      ByteVectorValues mergedValues =
+                          KnnVectorsWriter.MergedVectorValues.mergeByteVectorValues(
+                              fieldInfo, mergeState);
+
+                      KnnVectorValues.DocIndexIterator iterator = mergedValues.iterator();
+
+                      // Skip first 3 vectors without loading them
+                      for (int i = 0; i < 3; i++) {
+                        int docId = iterator.nextDoc();
+                        if (docId == KnnVectorValues.DocIndexIterator.NO_MORE_DOCS) {
+                          throw new AssertionError("Unexpected NO_MORE_DOCS at iteration " + i);
+                        }
+                      }
+
+                      // Now advance one more and load the vector
+                      int docId = iterator.nextDoc();
+                      if (docId == KnnVectorValues.DocIndexIterator.NO_MORE_DOCS) {
+                        throw new AssertionError("Unexpected NO_MORE_DOCS for 4th doc");
+                      }
+
+                      // This is the call that fails without the fix:
+                      // - lastOrd is still -1 (never incremented in nextDoc)
+                      // - vectorValue(3) checks: 3 != -1 + 1 → 3 != 0 → throws
+                      // IllegalStateException
+                      byte[] vector = mergedValues.vectorValue(iterator.index());
+
+                      if (vector == null) {
+                        throw new AssertionError("Vector should not be null");
+                      }
+                      if (vector.length != 2) {
+                        throw new AssertionError(
+                            "Vector dimension should be 2, got " + vector.length);
+                      }
+                      // The 4th vector (index 3) should have values {3, 4}
+                      if (vector[0] != 3 || vector[1] != 4) {
+                        throw new AssertionError(
+                            "Expected vector {3, 4}, got {" + vector[0] + ", " + vector[1] + "}");
+                      }
+
+                      testPassed[0] = true;
+                    } catch (Exception e) {
+                      testException[0] = e;
+                    }
+
+                    // Still perform the actual merge
+                    delegateWriter.mergeOneField(fieldInfo, mergeState);
                   }
-                  if (vector.length != 2) {
-                    throw new AssertionError("Vector dimension should be 2, got " + vector.length);
+
+                  @Override
+                  public void finish() throws IOException {
+                    delegateWriter.finish();
                   }
-                  // The 4th vector (index 3) should have values {3, 4}
-                  if (vector[0] != 3 || vector[1] != 4) {
-                    throw new AssertionError("Expected vector {3, 4}, got {" + vector[0] + ", " + vector[1] + "}");
+
+                  @Override
+                  public void close() throws IOException {
+                    delegateWriter.close();
                   }
-                  
-                  testPassed[0] = true;
-                } catch (Exception e) {
-                  testException[0] = e;
-                }
-                
-                // Still perform the actual merge
-                delegateWriter.mergeOneField(fieldInfo, mergeState);
-              }
-              
-              @Override
-              public void finish() throws IOException {
-                delegateWriter.finish();
-              }
-              
-              @Override
-              public void close() throws IOException {
-                delegateWriter.close();
+
+                  @Override
+                  public long ramBytesUsed() {
+                    return delegateWriter.ramBytesUsed();
+                  }
+                };
               }
-              
+
               @Override
-              public long ramBytesUsed() {
-                return delegateWriter.ramBytesUsed();
+              public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
+                return delegate.fieldsReader(state);
               }
             };
-          }
-          
-          @Override
-          public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
-            return delegate.fieldsReader(state);
-          }
-        };
 
         // Create a new directory for the merged segment with our test format
         try (Directory mergeDir = newDirectory()) {
           IndexWriterConfig mergeConfig = new IndexWriterConfig();
-          mergeConfig.setCodec(new AssertingCodec() {
-            @Override
-            public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
-              return testFormat;
-            }
-          });
-          
+          mergeConfig.setCodec(
+              new AssertingCodec() {
+                @Override
+                public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
+                  return testFormat;
+                }
+              });
+
           try (IndexWriter mergeWriter = new IndexWriter(mergeDir, mergeConfig)) {
             // Add the segments - this triggers the merge code path and our test
             mergeWriter.addIndexes(codecReaders.toArray(new CodecReader[0]));
             mergeWriter.commit();
           }
         }
-        
+
         // Check if the test passed
         if (testException[0] != null) {
           throw new AssertionError("Test failed during merge", testException[0]);

Original file line number	Diff line number	Diff line change
`@@ -414,11 +414,9 @@ private MergedByteVectorValues(List<ByteVectorValuesSub> subs, MergeState mergeS`
`414`	`414`
`415`	`415`	`@Override`
`416`	`416`	`public byte[] vectorValue(int ord) throws IOException {`
`417`		`- if (ord != lastOrd + 1) {`
	`417`	`+ if (ord != lastOrd) {`
`418`	`418`	`throw new IllegalStateException(`
`419`	`419`	`"only supports forward iteration: ord=" + ord + ", lastOrd=" + lastOrd);`
`420`		`- } else {`
`421`		`- lastOrd = ord;`
`422`	`420`	`}`
`423`	`421`	`return current.values.vectorValue(current.index());`
`424`	`422`	`}`
`@@ -446,6 +444,7 @@ public int nextDoc() throws IOException {`
`446`	`444`	`index = NO_MORE_DOCS;`
`447`	`445`	`} else {`
`448`	`446`	`docId = current.mappedDocID;`
	`447`	`+ ++lastOrd;`
`449`	`448`	`++index;`
`450`	`449`	`}`
`451`	`450`	`return docId;`