3232import org .apache .lucene .index .MergeState ;
3333import org .apache .lucene .index .SegmentReadState ;
3434import org .apache .lucene .index .SegmentWriteState ;
35- import org .apache .lucene .index .SlowCodecReaderWrapper ;
3635import org .apache .lucene .index .Sorter ;
3736import org .apache .lucene .index .VectorSimilarityFunction ;
3837import org .apache .lucene .store .Directory ;
3938import org .apache .lucene .tests .codecs .asserting .AssertingCodec ;
4039import org .apache .lucene .tests .util .LuceneTestCase ;
4140import org .apache .lucene .tests .util .TestUtil ;
4241
43- /**
44- * Tests for MergedByteVectorValues to ensure lastOrd is properly incremented during iteration.
45- */
42+ /** Tests for MergedByteVectorValues to ensure lastOrd is properly incremented during iteration. */
4643public class TestMergedByteVectorValues extends LuceneTestCase {
4744
4845 /**
49- * Tests that skipping vectors via nextDoc() and then loading a vector works correctly
50- * during merge. This verifies the fix for the lastOrd tracking bug in MergedByteVectorValues.
51- *
52- * The bug: MergedByteVectorValues.nextDoc() does not increment lastOrd, so when you
53- * skip N vectors and then try to load vectorValue(N), it fails because lastOrd is still -1.
46+ * Tests that skipping vectors via nextDoc() and then loading a vector works correctly during
47+ * merge. This verifies the fix for the lastOrd tracking bug in MergedByteVectorValues.
48+ *
49+ * <p> The bug: MergedByteVectorValues.nextDoc() does not increment lastOrd, so when you skip N
50+ * vectors and then try to load vectorValue(N), it fails because lastOrd is still -1.
5451 */
5552 public void testSkipThenLoadByteVectorDuringMerge () throws IOException {
5653 try (Directory dir = newDirectory ()) {
@@ -60,15 +57,23 @@ public void testSkipThenLoadByteVectorDuringMerge() throws IOException {
6057 // First segment
6158 for (int i = 0 ; i < 3 ; i ++) {
6259 Document doc = new Document ();
63- doc .add (new KnnByteVectorField ("field" , new byte [] {(byte ) i , (byte ) (i + 1 )}, VectorSimilarityFunction .EUCLIDEAN ));
60+ doc .add (
61+ new KnnByteVectorField (
62+ "field" ,
63+ new byte [] {(byte ) i , (byte ) (i + 1 )},
64+ VectorSimilarityFunction .EUCLIDEAN ));
6465 writer .addDocument (doc );
6566 }
6667 writer .commit ();
6768
6869 // Second segment
6970 for (int i = 3 ; i < 6 ; i ++) {
7071 Document doc = new Document ();
71- doc .add (new KnnByteVectorField ("field" , new byte [] {(byte ) i , (byte ) (i + 1 )}, VectorSimilarityFunction .EUCLIDEAN ));
72+ doc .add (
73+ new KnnByteVectorField (
74+ "field" ,
75+ new byte [] {(byte ) i , (byte ) (i + 1 )},
76+ VectorSimilarityFunction .EUCLIDEAN ));
7277 writer .addDocument (doc );
7378 }
7479 writer .commit ();
@@ -77,126 +82,133 @@ public void testSkipThenLoadByteVectorDuringMerge() throws IOException {
7782 // Open reader with multiple segments
7883 try (DirectoryReader reader = DirectoryReader .open (dir )) {
7984 assertEquals ("Should have 2 segments" , 2 , reader .leaves ().size ());
80-
81- // Get CodecReaders for merge
85+
86+ // Get CodecReaders for merge - SegmentReader is already a CodecReader
8287 List <CodecReader > codecReaders = new ArrayList <>();
8388 for (LeafReaderContext ctx : reader .leaves ()) {
84- codecReaders .add (SlowCodecReaderWrapper . wrap ( ctx .reader () ));
89+ codecReaders .add (( CodecReader ) ctx .reader ());
8590 }
8691
8792 // Create a custom KnnVectorsFormat that tests the MergedByteVectorValues during merge
8893 final boolean [] testPassed = {false };
8994 final Exception [] testException = {null };
90-
95+
9196 KnnVectorsFormat delegate = TestUtil .getDefaultKnnVectorsFormat ();
92- KnnVectorsFormat testFormat = new KnnVectorsFormat (delegate .getName ()) {
93- @ Override
94- public int getMaxDimensions (String fieldName ) {
95- return delegate .getMaxDimensions (fieldName );
96- }
97-
98- @ Override
99- public KnnVectorsWriter fieldsWriter (SegmentWriteState state ) throws IOException {
100- KnnVectorsWriter delegateWriter = delegate .fieldsWriter (state );
101- return new KnnVectorsWriter () {
97+ KnnVectorsFormat testFormat =
98+ new KnnVectorsFormat (delegate .getName ()) {
10299 @ Override
103- public KnnFieldVectorsWriter <?> addField ( FieldInfo fieldInfo ) throws IOException {
104- return delegateWriter . addField ( fieldInfo );
100+ public int getMaxDimensions ( String fieldName ) {
101+ return delegate . getMaxDimensions ( fieldName );
105102 }
106-
107- @ Override
108- public void flush (int maxDoc , Sorter .DocMap sortMap ) throws IOException {
109- delegateWriter .flush (maxDoc , sortMap );
110- }
111-
103+
112104 @ Override
113- public void mergeOneField (FieldInfo fieldInfo , MergeState mergeState ) throws IOException {
114- // Get the MergedByteVectorValues and test the skip-then-load pattern
115- try {
116- ByteVectorValues mergedValues = KnnVectorsWriter .MergedVectorValues .mergeByteVectorValues (
117- fieldInfo , mergeState );
118-
119- KnnVectorValues .DocIndexIterator iterator = mergedValues .iterator ();
120-
121- // Skip first 3 vectors without loading them
122- for (int i = 0 ; i < 3 ; i ++) {
123- int docId = iterator .nextDoc ();
124- if (docId == KnnVectorValues .DocIndexIterator .NO_MORE_DOCS ) {
125- throw new AssertionError ("Unexpected NO_MORE_DOCS at iteration " + i );
126- }
105+ public KnnVectorsWriter fieldsWriter (SegmentWriteState state ) throws IOException {
106+ KnnVectorsWriter delegateWriter = delegate .fieldsWriter (state );
107+ return new KnnVectorsWriter () {
108+ @ Override
109+ public KnnFieldVectorsWriter <?> addField (FieldInfo fieldInfo ) throws IOException {
110+ return delegateWriter .addField (fieldInfo );
127111 }
128-
129- // Now advance one more and load the vector
130- int docId = iterator .nextDoc ();
131- if (docId == KnnVectorValues .DocIndexIterator .NO_MORE_DOCS ) {
132- throw new AssertionError ("Unexpected NO_MORE_DOCS for 4th doc" );
112+
113+ @ Override
114+ public void flush (int maxDoc , Sorter .DocMap sortMap ) throws IOException {
115+ delegateWriter .flush (maxDoc , sortMap );
133116 }
134-
135- // This is the call that fails without the fix:
136- // - lastOrd is still -1 (never incremented in nextDoc)
137- // - vectorValue(3) checks: 3 != -1 + 1 → 3 != 0 → throws IllegalStateException
138- byte [] vector = mergedValues .vectorValue (iterator .index ());
139-
140- if (vector == null ) {
141- throw new AssertionError ("Vector should not be null" );
117+
118+ @ Override
119+ public void mergeOneField (FieldInfo fieldInfo , MergeState mergeState )
120+ throws IOException {
121+ // Get the MergedByteVectorValues and test the skip-then-load pattern
122+ try {
123+ ByteVectorValues mergedValues =
124+ KnnVectorsWriter .MergedVectorValues .mergeByteVectorValues (
125+ fieldInfo , mergeState );
126+
127+ KnnVectorValues .DocIndexIterator iterator = mergedValues .iterator ();
128+
129+ // Skip first 3 vectors without loading them
130+ for (int i = 0 ; i < 3 ; i ++) {
131+ int docId = iterator .nextDoc ();
132+ if (docId == KnnVectorValues .DocIndexIterator .NO_MORE_DOCS ) {
133+ throw new AssertionError ("Unexpected NO_MORE_DOCS at iteration " + i );
134+ }
135+ }
136+
137+ // Now advance one more and load the vector
138+ int docId = iterator .nextDoc ();
139+ if (docId == KnnVectorValues .DocIndexIterator .NO_MORE_DOCS ) {
140+ throw new AssertionError ("Unexpected NO_MORE_DOCS for 4th doc" );
141+ }
142+
143+ // This is the call that fails without the fix:
144+ // - lastOrd is still -1 (never incremented in nextDoc)
145+ // - vectorValue(3) checks: 3 != -1 + 1 → 3 != 0 → throws
146+ // IllegalStateException
147+ byte [] vector = mergedValues .vectorValue (iterator .index ());
148+
149+ if (vector == null ) {
150+ throw new AssertionError ("Vector should not be null" );
151+ }
152+ if (vector .length != 2 ) {
153+ throw new AssertionError (
154+ "Vector dimension should be 2, got " + vector .length );
155+ }
156+ // The 4th vector (index 3) should have values {3, 4}
157+ if (vector [0 ] != 3 || vector [1 ] != 4 ) {
158+ throw new AssertionError (
159+ "Expected vector {3, 4}, got {" + vector [0 ] + ", " + vector [1 ] + "}" );
160+ }
161+
162+ testPassed [0 ] = true ;
163+ } catch (Exception e ) {
164+ testException [0 ] = e ;
165+ }
166+
167+ // Still perform the actual merge
168+ delegateWriter .mergeOneField (fieldInfo , mergeState );
142169 }
143- if (vector .length != 2 ) {
144- throw new AssertionError ("Vector dimension should be 2, got " + vector .length );
170+
171+ @ Override
172+ public void finish () throws IOException {
173+ delegateWriter .finish ();
145174 }
146- // The 4th vector (index 3) should have values {3, 4}
147- if (vector [0 ] != 3 || vector [1 ] != 4 ) {
148- throw new AssertionError ("Expected vector {3, 4}, got {" + vector [0 ] + ", " + vector [1 ] + "}" );
175+
176+ @ Override
177+ public void close () throws IOException {
178+ delegateWriter .close ();
149179 }
150-
151- testPassed [0 ] = true ;
152- } catch (Exception e ) {
153- testException [0 ] = e ;
154- }
155-
156- // Still perform the actual merge
157- delegateWriter .mergeOneField (fieldInfo , mergeState );
158- }
159-
160- @ Override
161- public void finish () throws IOException {
162- delegateWriter .finish ();
163- }
164-
165- @ Override
166- public void close () throws IOException {
167- delegateWriter .close ();
180+
181+ @ Override
182+ public long ramBytesUsed () {
183+ return delegateWriter .ramBytesUsed ();
184+ }
185+ };
168186 }
169-
187+
170188 @ Override
171- public long ramBytesUsed () {
172- return delegateWriter . ramBytesUsed ( );
189+ public KnnVectorsReader fieldsReader ( SegmentReadState state ) throws IOException {
190+ return delegate . fieldsReader ( state );
173191 }
174192 };
175- }
176-
177- @ Override
178- public KnnVectorsReader fieldsReader (SegmentReadState state ) throws IOException {
179- return delegate .fieldsReader (state );
180- }
181- };
182193
183194 // Create a new directory for the merged segment with our test format
184195 try (Directory mergeDir = newDirectory ()) {
185196 IndexWriterConfig mergeConfig = new IndexWriterConfig ();
186- mergeConfig .setCodec (new AssertingCodec () {
187- @ Override
188- public KnnVectorsFormat getKnnVectorsFormatForField (String field ) {
189- return testFormat ;
190- }
191- });
192-
197+ mergeConfig .setCodec (
198+ new AssertingCodec () {
199+ @ Override
200+ public KnnVectorsFormat getKnnVectorsFormatForField (String field ) {
201+ return testFormat ;
202+ }
203+ });
204+
193205 try (IndexWriter mergeWriter = new IndexWriter (mergeDir , mergeConfig )) {
194206 // Add the segments - this triggers the merge code path and our test
195207 mergeWriter .addIndexes (codecReaders .toArray (new CodecReader [0 ]));
196208 mergeWriter .commit ();
197209 }
198210 }
199-
211+
200212 // Check if the test passed
201213 if (testException [0 ] != null ) {
202214 throw new AssertionError ("Test failed during merge" , testException [0 ]);
0 commit comments