4545import org .apache .lucene .util .hnsw .RandomVectorScorer ;
4646
4747import java .io .IOException ;
48+ import java .io .UncheckedIOException ;
4849
4950import static org .apache .lucene .codecs .lucene99 .Lucene99HnswVectorsReader .readSimilarityFunction ;
5051import static org .apache .lucene .codecs .lucene99 .Lucene99HnswVectorsReader .readVectorEncoding ;
@@ -57,69 +58,31 @@ public class DirectIOLucene99FlatVectorsReader extends FlatVectorsReader {
5758
5859 private static final long SHALLOW_SIZE = RamUsageEstimator .shallowSizeOfInstance (DirectIOLucene99FlatVectorsReader .class );
5960
60- private final IntObjectHashMap <FieldEntry > fields ;
61+ private final IntObjectHashMap <FieldEntry > fields = new IntObjectHashMap <>() ;
6162 private final IndexInput vectorData ;
62- private final IndexInput vectorDataDirect ;
63- private final IndexInput vectorDataMerge ;
6463 private final FieldInfos fieldInfos ;
65- private final boolean isClone ;
6664
6765 public DirectIOLucene99FlatVectorsReader (SegmentReadState state , FlatVectorsScorer scorer ) throws IOException {
6866 super (scorer );
69- this .fields = new IntObjectHashMap <>();
7067 int versionMeta = readMetadata (state );
7168 this .fieldInfos = state .fieldInfos ;
7269 boolean success = false ;
7370 try {
74- vectorDataDirect = openDataInput (
71+ vectorData = openDataInput (
7572 state ,
7673 versionMeta ,
7774 DirectIOLucene99FlatVectorsFormat .VECTOR_DATA_EXTENSION ,
7875 DirectIOLucene99FlatVectorsFormat .VECTOR_DATA_CODEC_NAME ,
79- state .context ,
80- true
76+ // Flat formats are used to randomly access vectors from their node ID that is stored
77+ // in the HNSW graph.
78+ state .context .withReadAdvice (ReadAdvice .RANDOM )
8179 );
8280 success = true ;
8381 } finally {
8482 if (success == false ) {
8583 IOUtils .closeWhileHandlingException (this );
8684 }
8785 }
88-
89- success = false ;
90- try {
91- vectorDataMerge = openDataInput (
92- state ,
93- versionMeta ,
94- DirectIOLucene99FlatVectorsFormat .VECTOR_DATA_EXTENSION ,
95- DirectIOLucene99FlatVectorsFormat .VECTOR_DATA_CODEC_NAME ,
96- // We use sequential access since this input is only used for merging
97- USE_DIRECT_IO ? state .context .withReadAdvice (ReadAdvice .SEQUENTIAL ) : state .context ,
98- false
99- );
100- success = true ;
101- } finally {
102- if (success == false ) {
103- IOUtils .closeWhileHandlingException (this );
104- }
105- }
106- this .vectorData = vectorDataDirect ;
107- this .isClone = false ;
108- }
109-
110- /**
111- * Returns a {@link DirectIOLucene99FlatVectorsReader} that switch the raw vector data to use
112- * the provided {@link IndexInput}.
113- * This is useful for merges since we want to switch from directIO to sequential reads.
114- */
115- private DirectIOLucene99FlatVectorsReader (DirectIOLucene99FlatVectorsReader clone , IndexInput vectorData ) {
116- super (clone .vectorScorer );
117- this .fields = clone .fields ;
118- this .vectorData = vectorData ;
119- this .vectorDataDirect = clone .vectorDataDirect ;
120- this .vectorDataMerge = clone .vectorDataMerge ;
121- this .fieldInfos = clone .fieldInfos ;
122- this .isClone = true ;
12386 }
12487
12588 private int readMetadata (SegmentReadState state ) throws IOException {
@@ -155,13 +118,11 @@ private static IndexInput openDataInput(
155118 int versionMeta ,
156119 String fileExtension ,
157120 String codecName ,
158- IOContext context ,
159- boolean useDirectIO
121+ IOContext context
160122 ) throws IOException {
161123 String fileName = IndexFileNames .segmentFileName (state .segmentInfo .name , state .segmentSuffix , fileExtension );
162124 // use direct IO for accessing raw vector data for searches
163- IndexInput in = useDirectIO
164- && USE_DIRECT_IO
125+ IndexInput in = USE_DIRECT_IO
165126 && context .context () == IOContext .Context .DEFAULT
166127 && state .directory instanceof DirectIOIndexInputSupplier did
167128 ? did .openInputDirect (fileName , context )
@@ -192,11 +153,6 @@ private static IndexInput openDataInput(
192153 }
193154 }
194155
195- // only for tests
196- IndexInput getRawVectorsInput () {
197- return vectorData ;
198- }
199-
200156 private void readFields (ChecksumIndexInput meta , FieldInfos infos ) throws IOException {
201157 for (int fieldNumber = meta .readInt (); fieldNumber != -1 ; fieldNumber = meta .readInt ()) {
202158 FieldInfo info = infos .fieldInfo (fieldNumber );
@@ -220,8 +176,13 @@ public void checkIntegrity() throws IOException {
220176
221177 @ Override
222178 public FlatVectorsReader getMergeInstance () {
223- // for merges we use sequential access instead of direct IO
224- return new DirectIOLucene99FlatVectorsReader (this , vectorDataMerge );
179+ try {
180+ // Update the read advice since vectors are guaranteed to be accessed sequentially for merge
181+ this .vectorData .updateReadAdvice (ReadAdvice .SEQUENTIAL );
182+ return this ;
183+ } catch (IOException exception ) {
184+ throw new UncheckedIOException (exception );
185+ }
225186 }
226187
227188 private FieldEntry getFieldEntry (String field , VectorEncoding expectedEncoding ) {
@@ -306,11 +267,16 @@ public RandomVectorScorer getRandomVectorScorer(String field, byte[] target) thr
306267 );
307268 }
308269
270+ @ Override
271+ public void finishMerge () throws IOException {
272+ // This makes sure that the access pattern hint is reverted back since HNSW implementation
273+ // needs it
274+ this .vectorData .updateReadAdvice (ReadAdvice .RANDOM );
275+ }
276+
309277 @ Override
310278 public void close () throws IOException {
311- if (isClone == false ) {
312- IOUtils .close (vectorDataMerge , vectorDataDirect );
313- }
279+ IOUtils .close (vectorData );
314280 }
315281
316282 private record FieldEntry (
0 commit comments