Skip to content

Commit cabb630

Browse files
committed
Fix MergedByteVectorValues lastOrd tracking bug
The fix mirrors the behavior of MergedFloat32VectorValues: 1. Increment lastOrd in nextDoc() when advancing to a new document 2. Change vectorValue(ord) check from 'ord != lastOrd + 1' to 'ord != lastOrd' This ensures that skipping vectors via nextDoc() without loading them (as done in multipart upload partitioning) works correctly.
1 parent cca46d3 commit cabb630

File tree

2 files changed

+115
-104
lines changed

2 files changed

+115
-104
lines changed

lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -414,11 +414,9 @@ private MergedByteVectorValues(List<ByteVectorValuesSub> subs, MergeState mergeS
414414

415415
@Override
416416
public byte[] vectorValue(int ord) throws IOException {
417-
if (ord != lastOrd + 1) {
417+
if (ord != lastOrd) {
418418
throw new IllegalStateException(
419419
"only supports forward iteration: ord=" + ord + ", lastOrd=" + lastOrd);
420-
} else {
421-
lastOrd = ord;
422420
}
423421
return current.values.vectorValue(current.index());
424422
}
@@ -446,6 +444,7 @@ public int nextDoc() throws IOException {
446444
index = NO_MORE_DOCS;
447445
} else {
448446
docId = current.mappedDocID;
447+
++lastOrd;
449448
++index;
450449
}
451450
return docId;

lucene/core/src/test/org/apache/lucene/codecs/TestMergedByteVectorValues.java

Lines changed: 113 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -32,25 +32,22 @@
3232
import org.apache.lucene.index.MergeState;
3333
import org.apache.lucene.index.SegmentReadState;
3434
import org.apache.lucene.index.SegmentWriteState;
35-
import org.apache.lucene.index.SlowCodecReaderWrapper;
3635
import org.apache.lucene.index.Sorter;
3736
import org.apache.lucene.index.VectorSimilarityFunction;
3837
import org.apache.lucene.store.Directory;
3938
import org.apache.lucene.tests.codecs.asserting.AssertingCodec;
4039
import org.apache.lucene.tests.util.LuceneTestCase;
4140
import org.apache.lucene.tests.util.TestUtil;
4241

43-
/**
44-
* Tests for MergedByteVectorValues to ensure lastOrd is properly incremented during iteration.
45-
*/
42+
/** Tests for MergedByteVectorValues to ensure lastOrd is properly incremented during iteration. */
4643
public class TestMergedByteVectorValues extends LuceneTestCase {
4744

4845
/**
49-
* Tests that skipping vectors via nextDoc() and then loading a vector works correctly
50-
* during merge. This verifies the fix for the lastOrd tracking bug in MergedByteVectorValues.
51-
*
52-
* The bug: MergedByteVectorValues.nextDoc() does not increment lastOrd, so when you
53-
* skip N vectors and then try to load vectorValue(N), it fails because lastOrd is still -1.
46+
* Tests that skipping vectors via nextDoc() and then loading a vector works correctly during
47+
* merge. This verifies the fix for the lastOrd tracking bug in MergedByteVectorValues.
48+
*
49+
* <p>The bug: MergedByteVectorValues.nextDoc() does not increment lastOrd, so when you skip N
50+
* vectors and then try to load vectorValue(N), it fails because lastOrd is still -1.
5451
*/
5552
public void testSkipThenLoadByteVectorDuringMerge() throws IOException {
5653
try (Directory dir = newDirectory()) {
@@ -60,15 +57,23 @@ public void testSkipThenLoadByteVectorDuringMerge() throws IOException {
6057
// First segment
6158
for (int i = 0; i < 3; i++) {
6259
Document doc = new Document();
63-
doc.add(new KnnByteVectorField("field", new byte[] {(byte) i, (byte) (i + 1)}, VectorSimilarityFunction.EUCLIDEAN));
60+
doc.add(
61+
new KnnByteVectorField(
62+
"field",
63+
new byte[] {(byte) i, (byte) (i + 1)},
64+
VectorSimilarityFunction.EUCLIDEAN));
6465
writer.addDocument(doc);
6566
}
6667
writer.commit();
6768

6869
// Second segment
6970
for (int i = 3; i < 6; i++) {
7071
Document doc = new Document();
71-
doc.add(new KnnByteVectorField("field", new byte[] {(byte) i, (byte) (i + 1)}, VectorSimilarityFunction.EUCLIDEAN));
72+
doc.add(
73+
new KnnByteVectorField(
74+
"field",
75+
new byte[] {(byte) i, (byte) (i + 1)},
76+
VectorSimilarityFunction.EUCLIDEAN));
7277
writer.addDocument(doc);
7378
}
7479
writer.commit();
@@ -77,126 +82,133 @@ public void testSkipThenLoadByteVectorDuringMerge() throws IOException {
7782
// Open reader with multiple segments
7883
try (DirectoryReader reader = DirectoryReader.open(dir)) {
7984
assertEquals("Should have 2 segments", 2, reader.leaves().size());
80-
81-
// Get CodecReaders for merge
85+
86+
// Get CodecReaders for merge - SegmentReader is already a CodecReader
8287
List<CodecReader> codecReaders = new ArrayList<>();
8388
for (LeafReaderContext ctx : reader.leaves()) {
84-
codecReaders.add(SlowCodecReaderWrapper.wrap(ctx.reader()));
89+
codecReaders.add((CodecReader) ctx.reader());
8590
}
8691

8792
// Create a custom KnnVectorsFormat that tests the MergedByteVectorValues during merge
8893
final boolean[] testPassed = {false};
8994
final Exception[] testException = {null};
90-
95+
9196
KnnVectorsFormat delegate = TestUtil.getDefaultKnnVectorsFormat();
92-
KnnVectorsFormat testFormat = new KnnVectorsFormat(delegate.getName()) {
93-
@Override
94-
public int getMaxDimensions(String fieldName) {
95-
return delegate.getMaxDimensions(fieldName);
96-
}
97-
98-
@Override
99-
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
100-
KnnVectorsWriter delegateWriter = delegate.fieldsWriter(state);
101-
return new KnnVectorsWriter() {
97+
KnnVectorsFormat testFormat =
98+
new KnnVectorsFormat(delegate.getName()) {
10299
@Override
103-
public KnnFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws IOException {
104-
return delegateWriter.addField(fieldInfo);
100+
public int getMaxDimensions(String fieldName) {
101+
return delegate.getMaxDimensions(fieldName);
105102
}
106-
107-
@Override
108-
public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
109-
delegateWriter.flush(maxDoc, sortMap);
110-
}
111-
103+
112104
@Override
113-
public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException {
114-
// Get the MergedByteVectorValues and test the skip-then-load pattern
115-
try {
116-
ByteVectorValues mergedValues = KnnVectorsWriter.MergedVectorValues.mergeByteVectorValues(
117-
fieldInfo, mergeState);
118-
119-
KnnVectorValues.DocIndexIterator iterator = mergedValues.iterator();
120-
121-
// Skip first 3 vectors without loading them
122-
for (int i = 0; i < 3; i++) {
123-
int docId = iterator.nextDoc();
124-
if (docId == KnnVectorValues.DocIndexIterator.NO_MORE_DOCS) {
125-
throw new AssertionError("Unexpected NO_MORE_DOCS at iteration " + i);
126-
}
105+
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
106+
KnnVectorsWriter delegateWriter = delegate.fieldsWriter(state);
107+
return new KnnVectorsWriter() {
108+
@Override
109+
public KnnFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws IOException {
110+
return delegateWriter.addField(fieldInfo);
127111
}
128-
129-
// Now advance one more and load the vector
130-
int docId = iterator.nextDoc();
131-
if (docId == KnnVectorValues.DocIndexIterator.NO_MORE_DOCS) {
132-
throw new AssertionError("Unexpected NO_MORE_DOCS for 4th doc");
112+
113+
@Override
114+
public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
115+
delegateWriter.flush(maxDoc, sortMap);
133116
}
134-
135-
// This is the call that fails without the fix:
136-
// - lastOrd is still -1 (never incremented in nextDoc)
137-
// - vectorValue(3) checks: 3 != -1 + 1 → 3 != 0 → throws IllegalStateException
138-
byte[] vector = mergedValues.vectorValue(iterator.index());
139-
140-
if (vector == null) {
141-
throw new AssertionError("Vector should not be null");
117+
118+
@Override
119+
public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState)
120+
throws IOException {
121+
// Get the MergedByteVectorValues and test the skip-then-load pattern
122+
try {
123+
ByteVectorValues mergedValues =
124+
KnnVectorsWriter.MergedVectorValues.mergeByteVectorValues(
125+
fieldInfo, mergeState);
126+
127+
KnnVectorValues.DocIndexIterator iterator = mergedValues.iterator();
128+
129+
// Skip first 3 vectors without loading them
130+
for (int i = 0; i < 3; i++) {
131+
int docId = iterator.nextDoc();
132+
if (docId == KnnVectorValues.DocIndexIterator.NO_MORE_DOCS) {
133+
throw new AssertionError("Unexpected NO_MORE_DOCS at iteration " + i);
134+
}
135+
}
136+
137+
// Now advance one more and load the vector
138+
int docId = iterator.nextDoc();
139+
if (docId == KnnVectorValues.DocIndexIterator.NO_MORE_DOCS) {
140+
throw new AssertionError("Unexpected NO_MORE_DOCS for 4th doc");
141+
}
142+
143+
// This is the call that fails without the fix:
144+
// - lastOrd is still -1 (never incremented in nextDoc)
145+
// - vectorValue(3) checks: 3 != -1 + 1 → 3 != 0 → throws
146+
// IllegalStateException
147+
byte[] vector = mergedValues.vectorValue(iterator.index());
148+
149+
if (vector == null) {
150+
throw new AssertionError("Vector should not be null");
151+
}
152+
if (vector.length != 2) {
153+
throw new AssertionError(
154+
"Vector dimension should be 2, got " + vector.length);
155+
}
156+
// The 4th vector (index 3) should have values {3, 4}
157+
if (vector[0] != 3 || vector[1] != 4) {
158+
throw new AssertionError(
159+
"Expected vector {3, 4}, got {" + vector[0] + ", " + vector[1] + "}");
160+
}
161+
162+
testPassed[0] = true;
163+
} catch (Exception e) {
164+
testException[0] = e;
165+
}
166+
167+
// Still perform the actual merge
168+
delegateWriter.mergeOneField(fieldInfo, mergeState);
142169
}
143-
if (vector.length != 2) {
144-
throw new AssertionError("Vector dimension should be 2, got " + vector.length);
170+
171+
@Override
172+
public void finish() throws IOException {
173+
delegateWriter.finish();
145174
}
146-
// The 4th vector (index 3) should have values {3, 4}
147-
if (vector[0] != 3 || vector[1] != 4) {
148-
throw new AssertionError("Expected vector {3, 4}, got {" + vector[0] + ", " + vector[1] + "}");
175+
176+
@Override
177+
public void close() throws IOException {
178+
delegateWriter.close();
149179
}
150-
151-
testPassed[0] = true;
152-
} catch (Exception e) {
153-
testException[0] = e;
154-
}
155-
156-
// Still perform the actual merge
157-
delegateWriter.mergeOneField(fieldInfo, mergeState);
158-
}
159-
160-
@Override
161-
public void finish() throws IOException {
162-
delegateWriter.finish();
163-
}
164-
165-
@Override
166-
public void close() throws IOException {
167-
delegateWriter.close();
180+
181+
@Override
182+
public long ramBytesUsed() {
183+
return delegateWriter.ramBytesUsed();
184+
}
185+
};
168186
}
169-
187+
170188
@Override
171-
public long ramBytesUsed() {
172-
return delegateWriter.ramBytesUsed();
189+
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
190+
return delegate.fieldsReader(state);
173191
}
174192
};
175-
}
176-
177-
@Override
178-
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
179-
return delegate.fieldsReader(state);
180-
}
181-
};
182193

183194
// Create a new directory for the merged segment with our test format
184195
try (Directory mergeDir = newDirectory()) {
185196
IndexWriterConfig mergeConfig = new IndexWriterConfig();
186-
mergeConfig.setCodec(new AssertingCodec() {
187-
@Override
188-
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
189-
return testFormat;
190-
}
191-
});
192-
197+
mergeConfig.setCodec(
198+
new AssertingCodec() {
199+
@Override
200+
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
201+
return testFormat;
202+
}
203+
});
204+
193205
try (IndexWriter mergeWriter = new IndexWriter(mergeDir, mergeConfig)) {
194206
// Add the segments - this triggers the merge code path and our test
195207
mergeWriter.addIndexes(codecReaders.toArray(new CodecReader[0]));
196208
mergeWriter.commit();
197209
}
198210
}
199-
211+
200212
// Check if the test passed
201213
if (testException[0] != null) {
202214
throw new AssertionError("Test failed during merge", testException[0]);

0 commit comments

Comments
 (0)