Skip to content

Commit 7e6b988

Browse files
committed
Revert "Avoid using direct I/O during vector merges"
This reverts commit e842eeb.
1 parent 8a5d672 commit 7e6b988

File tree

2 files changed

+23
-62
lines changed

2 files changed

+23
-62
lines changed

server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsReader.java

Lines changed: 23 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
import org.apache.lucene.util.hnsw.RandomVectorScorer;
4646

4747
import java.io.IOException;
48+
import java.io.UncheckedIOException;
4849

4950
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readSimilarityFunction;
5051
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readVectorEncoding;
@@ -57,69 +58,31 @@ public class DirectIOLucene99FlatVectorsReader extends FlatVectorsReader {
5758

5859
private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(DirectIOLucene99FlatVectorsReader.class);
5960

60-
private final IntObjectHashMap<FieldEntry> fields;
61+
private final IntObjectHashMap<FieldEntry> fields = new IntObjectHashMap<>();
6162
private final IndexInput vectorData;
62-
private final IndexInput vectorDataDirect;
63-
private final IndexInput vectorDataMerge;
6463
private final FieldInfos fieldInfos;
65-
private final boolean isClone;
6664

6765
public DirectIOLucene99FlatVectorsReader(SegmentReadState state, FlatVectorsScorer scorer) throws IOException {
6866
super(scorer);
69-
this.fields = new IntObjectHashMap<>();
7067
int versionMeta = readMetadata(state);
7168
this.fieldInfos = state.fieldInfos;
7269
boolean success = false;
7370
try {
74-
vectorDataDirect = openDataInput(
71+
vectorData = openDataInput(
7572
state,
7673
versionMeta,
7774
DirectIOLucene99FlatVectorsFormat.VECTOR_DATA_EXTENSION,
7875
DirectIOLucene99FlatVectorsFormat.VECTOR_DATA_CODEC_NAME,
79-
state.context,
80-
true
76+
// Flat formats are used to randomly access vectors from their node ID that is stored
77+
// in the HNSW graph.
78+
state.context.withReadAdvice(ReadAdvice.RANDOM)
8179
);
8280
success = true;
8381
} finally {
8482
if (success == false) {
8583
IOUtils.closeWhileHandlingException(this);
8684
}
8785
}
88-
89-
success = false;
90-
try {
91-
vectorDataMerge = openDataInput(
92-
state,
93-
versionMeta,
94-
DirectIOLucene99FlatVectorsFormat.VECTOR_DATA_EXTENSION,
95-
DirectIOLucene99FlatVectorsFormat.VECTOR_DATA_CODEC_NAME,
96-
// We use sequential access since this input is only used for merging
97-
USE_DIRECT_IO ? state.context.withReadAdvice(ReadAdvice.SEQUENTIAL) : state.context,
98-
false
99-
);
100-
success = true;
101-
} finally {
102-
if (success == false) {
103-
IOUtils.closeWhileHandlingException(this);
104-
}
105-
}
106-
this.vectorData = vectorDataDirect;
107-
this.isClone = false;
108-
}
109-
110-
/**
111-
* Returns a {@link DirectIOLucene99FlatVectorsReader} that switch the raw vector data to use
112-
* the provided {@link IndexInput}.
113-
* This is useful for merges since we want to switch from directIO to sequential reads.
114-
*/
115-
private DirectIOLucene99FlatVectorsReader(DirectIOLucene99FlatVectorsReader clone, IndexInput vectorData) {
116-
super(clone.vectorScorer);
117-
this.fields = clone.fields;
118-
this.vectorData = vectorData;
119-
this.vectorDataDirect = clone.vectorDataDirect;
120-
this.vectorDataMerge = clone.vectorDataMerge;
121-
this.fieldInfos = clone.fieldInfos;
122-
this.isClone = true;
12386
}
12487

12588
private int readMetadata(SegmentReadState state) throws IOException {
@@ -155,13 +118,11 @@ private static IndexInput openDataInput(
155118
int versionMeta,
156119
String fileExtension,
157120
String codecName,
158-
IOContext context,
159-
boolean useDirectIO
121+
IOContext context
160122
) throws IOException {
161123
String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension);
162124
// use direct IO for accessing raw vector data for searches
163-
IndexInput in = useDirectIO
164-
&& USE_DIRECT_IO
125+
IndexInput in = USE_DIRECT_IO
165126
&& context.context() == IOContext.Context.DEFAULT
166127
&& state.directory instanceof DirectIOIndexInputSupplier did
167128
? did.openInputDirect(fileName, context)
@@ -192,11 +153,6 @@ private static IndexInput openDataInput(
192153
}
193154
}
194155

195-
// only for tests
196-
IndexInput getRawVectorsInput() {
197-
return vectorData;
198-
}
199-
200156
private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException {
201157
for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) {
202158
FieldInfo info = infos.fieldInfo(fieldNumber);
@@ -220,8 +176,13 @@ public void checkIntegrity() throws IOException {
220176

221177
@Override
222178
public FlatVectorsReader getMergeInstance() {
223-
// for merges we use sequential access instead of direct IO
224-
return new DirectIOLucene99FlatVectorsReader(this, vectorDataMerge);
179+
try {
180+
// Update the read advice since vectors are guaranteed to be accessed sequentially for merge
181+
this.vectorData.updateReadAdvice(ReadAdvice.SEQUENTIAL);
182+
return this;
183+
} catch (IOException exception) {
184+
throw new UncheckedIOException(exception);
185+
}
225186
}
226187

227188
private FieldEntry getFieldEntry(String field, VectorEncoding expectedEncoding) {
@@ -306,11 +267,16 @@ public RandomVectorScorer getRandomVectorScorer(String field, byte[] target) thr
306267
);
307268
}
308269

270+
@Override
271+
public void finishMerge() throws IOException {
272+
// This makes sure that the access pattern hint is reverted back since HNSW implementation
273+
// needs it
274+
this.vectorData.updateReadAdvice(ReadAdvice.RANDOM);
275+
}
276+
309277
@Override
310278
public void close() throws IOException {
311-
if (isClone == false) {
312-
IOUtils.close(vectorDataMerge, vectorDataDirect);
313-
}
279+
IOUtils.close(vectorData);
314280
}
315281

316282
private record FieldEntry(

server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -119,11 +119,6 @@ public class ES818BinaryQuantizedVectorsReader extends FlatVectorsReader impleme
119119
}
120120
}
121121

122-
// only for tests
123-
FlatVectorsReader getRawVectorsReader() {
124-
return rawVectorsReader;
125-
}
126-
127122
private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException {
128123
for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) {
129124
FieldInfo info = infos.fieldInfo(fieldNumber);

0 commit comments

Comments
 (0)