Skip to content

Commit a43c02d

Browse files
authored
Reapply "Update the IOContext rather than the ReadAdvice on IndexInput (#14702)" (#14844)
1 parent 319f010 commit a43c02d

File tree

11 files changed

+61
-77
lines changed

11 files changed

+61
-77
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ API Changes
2929
* GITHUB#14873: Change PriorityQueue to take a LessThan interface implementation
3030
in its constructor rather than overriding the lessThan abstract method (Simon Cooper)
3131

32+
* GITHUB#14844: Change IndexInput.updateReadAdvice to take an IOContext instead (Simon Cooper)
33+
3234
New Features
3335
---------------------
3436
* GITHUB#14097: Binary partitioning merge policy over float-valued vector field. (Mike Sokolov)

lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ public abstract void search(
124124
*
125125
* <p>The default implementation returns {@code this}
126126
*/
127-
public KnnVectorsReader getMergeInstance() {
127+
public KnnVectorsReader getMergeInstance() throws IOException {
128128
return this;
129129
}
130130

lucene/core/src/java/org/apache/lucene/codecs/hnsw/FlatVectorsReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ public abstract RandomVectorScorer getRandomVectorScorer(String field, byte[] ta
9696
* <p>The default implementation returns {@code this}
9797
*/
9898
@Override
99-
public FlatVectorsReader getMergeInstance() {
99+
public FlatVectorsReader getMergeInstance() throws IOException {
100100
return this;
101101
}
102102
}

lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsReader.java

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readVectorEncoding;
2222

2323
import java.io.IOException;
24-
import java.io.UncheckedIOException;
2524
import java.util.Map;
2625
import org.apache.lucene.codecs.CodecUtil;
2726
import org.apache.lucene.codecs.hnsw.FlatVectorsReader;
@@ -45,7 +44,6 @@
4544
import org.apache.lucene.store.FileTypeHint;
4645
import org.apache.lucene.store.IOContext;
4746
import org.apache.lucene.store.IndexInput;
48-
import org.apache.lucene.store.ReadAdvice;
4947
import org.apache.lucene.util.IOUtils;
5048
import org.apache.lucene.util.RamUsageEstimator;
5149
import org.apache.lucene.util.hnsw.RandomVectorScorer;
@@ -63,23 +61,25 @@ public final class Lucene99FlatVectorsReader extends FlatVectorsReader {
6361
private final IntObjectHashMap<FieldEntry> fields = new IntObjectHashMap<>();
6462
private final IndexInput vectorData;
6563
private final FieldInfos fieldInfos;
64+
private final IOContext dataContext;
6665

6766
public Lucene99FlatVectorsReader(SegmentReadState state, FlatVectorsScorer scorer)
6867
throws IOException {
6968
super(scorer);
7069
int versionMeta = readMetadata(state);
7170
this.fieldInfos = state.fieldInfos;
71+
// Flat formats are used to randomly access vectors from their node ID that is stored
72+
// in the HNSW graph.
73+
dataContext =
74+
state.context.withHints(FileTypeHint.DATA, FileDataHint.KNN_VECTORS, DataAccessHint.RANDOM);
7275
try {
7376
vectorData =
7477
openDataInput(
7578
state,
7679
versionMeta,
7780
Lucene99FlatVectorsFormat.VECTOR_DATA_EXTENSION,
7881
Lucene99FlatVectorsFormat.VECTOR_DATA_CODEC_NAME,
79-
// Flat formats are used to randomly access vectors from their node ID that is stored
80-
// in the HNSW graph.
81-
state.context.withHints(
82-
FileTypeHint.DATA, FileDataHint.KNN_VECTORS, DataAccessHint.RANDOM));
82+
dataContext);
8383
} catch (Throwable t) {
8484
IOUtils.closeWhileSuppressingExceptions(t, this);
8585
throw t;
@@ -177,14 +177,10 @@ public void checkIntegrity() throws IOException {
177177
}
178178

179179
@Override
180-
public FlatVectorsReader getMergeInstance() {
181-
try {
182-
// Update the read advice since vectors are guaranteed to be accessed sequentially for merge
183-
this.vectorData.updateReadAdvice(ReadAdvice.SEQUENTIAL);
184-
return this;
185-
} catch (IOException exception) {
186-
throw new UncheckedIOException(exception);
187-
}
180+
public FlatVectorsReader getMergeInstance() throws IOException {
181+
// Update the read advice since vectors are guaranteed to be accessed sequentially for merge
182+
vectorData.updateIOContext(dataContext.withHints(DataAccessHint.SEQUENTIAL));
183+
return this;
188184
}
189185

190186
private FieldEntry getFieldEntryOrThrow(String field) {
@@ -276,7 +272,7 @@ public RandomVectorScorer getRandomVectorScorer(String field, byte[] target) thr
276272
public void finishMerge() throws IOException {
277273
// This makes sure that the access pattern hint is reverted back since HNSW implementation
278274
// needs it
279-
this.vectorData.updateReadAdvice(ReadAdvice.RANDOM);
275+
vectorData.updateIOContext(dataContext);
280276
}
281277

282278
@Override

lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ private Lucene99HnswVectorsReader(
138138
}
139139

140140
@Override
141-
public KnnVectorsReader getMergeInstance() {
141+
public KnnVectorsReader getMergeInstance() throws IOException {
142142
return new Lucene99HnswVectorsReader(this, this.flatVectorsReader.getMergeInstance());
143143
}
144144

lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldKnnVectorsFormat.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ public FieldsReader(final SegmentReadState readState) throws IOException {
238238
}
239239
}
240240

241-
private FieldsReader(final FieldsReader fieldsReader) {
241+
private FieldsReader(final FieldsReader fieldsReader) throws IOException {
242242
this.fieldInfos = fieldsReader.fieldInfos;
243243
for (FieldInfo fi : this.fieldInfos) {
244244
if (fi.hasVectorValues() && fieldsReader.fields.containsKey(fi.number)) {
@@ -248,7 +248,7 @@ private FieldsReader(final FieldsReader fieldsReader) {
248248
}
249249

250250
@Override
251-
public KnnVectorsReader getMergeInstance() {
251+
public KnnVectorsReader getMergeInstance() throws IOException {
252252
return new FieldsReader(this);
253253
}
254254

lucene/core/src/java/org/apache/lucene/store/IndexInput.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -227,12 +227,12 @@ public String toString() {
227227
public void prefetch(long offset, long length) throws IOException {}
228228

229229
/**
230-
* Optional method: Give a hint to this input about the change in read access pattern. IndexInput
230+
* Optional method: Updates the {@code IOContext} to specify a new read access pattern. IndexInput
231231
* implementations may take advantage of this hint to optimize reads from storage.
232232
*
233233
* <p>The default implementation is a no-op.
234234
*/
235-
public void updateReadAdvice(ReadAdvice readAdvice) throws IOException {}
235+
public void updateIOContext(IOContext context) throws IOException {}
236236

237237
/**
238238
* Returns a hint whether all the contents of this input are resident in physical memory. It's a

lucene/core/src/java/org/apache/lucene/store/MemorySegmentIndexInput.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,11 @@ public void prefetch(long offset, long length) throws IOException {
357357
}
358358

359359
@Override
360-
public void updateReadAdvice(ReadAdvice readAdvice) throws IOException {
360+
public void updateIOContext(IOContext context) throws IOException {
361+
updateReadAdvice(toReadAdvice.apply(context));
362+
}
363+
364+
private void updateReadAdvice(ReadAdvice readAdvice) throws IOException {
361365
if (NATIVE_ACCESS.isEmpty()) {
362366
return;
363367
}

lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingKnnVectorsFormat.java

Lines changed: 34 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -113,16 +113,17 @@ public long ramBytesUsed() {
113113
public static class AssertingKnnVectorsReader extends KnnVectorsReader
114114
implements HnswGraphProvider {
115115
public final KnnVectorsReader delegate;
116-
final FieldInfos fis;
117-
final boolean mergeInstance;
118-
AtomicInteger mergeInstanceCount = new AtomicInteger();
119-
AtomicInteger finishMergeCount = new AtomicInteger();
116+
private final FieldInfos fis;
117+
private final boolean mergeInstance;
118+
private final AtomicInteger mergeInstanceCount = new AtomicInteger();
119+
private final AtomicInteger finishMergeCount = new AtomicInteger();
120120

121-
AssertingKnnVectorsReader(KnnVectorsReader delegate, FieldInfos fis) {
121+
private AssertingKnnVectorsReader(KnnVectorsReader delegate, FieldInfos fis) {
122122
this(delegate, fis, false);
123123
}
124124

125-
AssertingKnnVectorsReader(KnnVectorsReader delegate, FieldInfos fis, boolean mergeInstance) {
125+
private AssertingKnnVectorsReader(
126+
KnnVectorsReader delegate, FieldInfos fis, boolean mergeInstance) {
126127
assert delegate != null;
127128
this.delegate = delegate;
128129
this.fis = fis;
@@ -136,6 +137,8 @@ public void checkIntegrity() throws IOException {
136137

137138
@Override
138139
public FloatVectorValues getFloatVectorValues(String field) throws IOException {
140+
assert mergeInstanceCount.get() == finishMergeCount.get() || mergeInstance
141+
: "Called on the wrong instance";
139142
FieldInfo fi = fis.fieldInfo(field);
140143
assert fi != null
141144
&& fi.getVectorDimension() > 0
@@ -150,6 +153,8 @@ public FloatVectorValues getFloatVectorValues(String field) throws IOException {
150153

151154
@Override
152155
public ByteVectorValues getByteVectorValues(String field) throws IOException {
156+
assert mergeInstanceCount.get() == finishMergeCount.get() || mergeInstance
157+
: "Called on the wrong instance";
153158
FieldInfo fi = fis.fieldInfo(field);
154159
assert fi != null
155160
&& fi.getVectorDimension() > 0
@@ -165,7 +170,7 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException {
165170
@Override
166171
public void search(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs)
167172
throws IOException {
168-
assert !mergeInstance;
173+
assert mergeInstanceCount.get() == finishMergeCount.get() : "There is an open merge instance";
169174
FieldInfo fi = fis.fieldInfo(field);
170175
assert fi != null
171176
&& fi.getVectorDimension() > 0
@@ -176,7 +181,7 @@ public void search(String field, float[] target, KnnCollector knnCollector, Bits
176181
@Override
177182
public void search(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs)
178183
throws IOException {
179-
assert !mergeInstance;
184+
assert mergeInstanceCount.get() == finishMergeCount.get() : "There is an open merge instance";
180185
FieldInfo fi = fis.fieldInfo(field);
181186
assert fi != null
182187
&& fi.getVectorDimension() > 0
@@ -185,15 +190,28 @@ public void search(String field, byte[] target, KnnCollector knnCollector, Bits
185190
}
186191

187192
@Override
188-
public KnnVectorsReader getMergeInstance() {
189-
assert !mergeInstance;
193+
public KnnVectorsReader getMergeInstance() throws IOException {
190194
var mergeVectorsReader = delegate.getMergeInstance();
191195
assert mergeVectorsReader != null;
192196
mergeInstanceCount.incrementAndGet();
197+
AtomicInteger parentMergeFinishCount = this.finishMergeCount;
193198

194-
final var parent = this;
195199
return new AssertingKnnVectorsReader(
196200
mergeVectorsReader, AssertingKnnVectorsReader.this.fis, true) {
201+
private boolean finished;
202+
203+
@Override
204+
public void search(
205+
String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) {
206+
assert false : "This instance should only be used for merging";
207+
}
208+
209+
@Override
210+
public void search(
211+
String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) {
212+
assert false : "This instance should only be used for merging";
213+
}
214+
197215
@Override
198216
public KnnVectorsReader getMergeInstance() {
199217
assert false; // merging from a merge instance it not allowed
@@ -202,9 +220,10 @@ public KnnVectorsReader getMergeInstance() {
202220

203221
@Override
204222
public void finishMerge() throws IOException {
205-
assert mergeInstance;
223+
assert !finished : "Merging already finished";
224+
finished = true;
206225
delegate.finishMerge();
207-
parent.finishMergeCount.incrementAndGet();
226+
parentMergeFinishCount.incrementAndGet();
208227
}
209228

210229
@Override
@@ -216,9 +235,7 @@ public void close() {
216235

217236
@Override
218237
public void finishMerge() throws IOException {
219-
assert mergeInstance;
220-
delegate.finishMerge();
221-
finishMergeCount.incrementAndGet();
238+
assert false; // can only finish merge on the merge instance
222239
}
223240

224241
@Override
@@ -228,9 +245,8 @@ public Map<String, Long> getOffHeapByteSize(FieldInfo fieldInfo) {
228245

229246
@Override
230247
public void close() throws IOException {
231-
assert !mergeInstance;
232-
delegate.close();
233248
delegate.close();
249+
delegate.close(); // impls should be able to handle multiple closes
234250
assert finishMergeCount.get() <= 0 || mergeInstanceCount.get() == finishMergeCount.get();
235251
}
236252

lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@
6161
import org.apache.lucene.store.IndexOutput;
6262
import org.apache.lucene.store.MMapDirectory;
6363
import org.apache.lucene.store.RandomAccessInput;
64-
import org.apache.lucene.store.ReadAdvice;
6564
import org.apache.lucene.tests.mockfile.ExtrasFS;
6665
import org.apache.lucene.tests.util.LuceneTestCase;
6766
import org.apache.lucene.tests.util.TestUtil;
@@ -1571,38 +1570,6 @@ public void testPrefetchOnSlice() throws IOException {
15711570
doTestPrefetch(TestUtil.nextInt(random(), 1, 1024));
15721571
}
15731572

1574-
public void testUpdateReadAdvice() throws IOException {
1575-
try (Directory dir = getDirectory(createTempDir("testUpdateReadAdvice"))) {
1576-
final int totalLength = TestUtil.nextInt(random(), 16384, 65536);
1577-
byte[] arr = new byte[totalLength];
1578-
random().nextBytes(arr);
1579-
try (IndexOutput out = dir.createOutput("temp.bin", IOContext.DEFAULT)) {
1580-
out.writeBytes(arr, arr.length);
1581-
}
1582-
1583-
try (IndexInput orig = dir.openInput("temp.bin", IOContext.DEFAULT)) {
1584-
IndexInput in = random().nextBoolean() ? orig.clone() : orig;
1585-
// Read advice updated at start
1586-
in.updateReadAdvice(randomFrom(random(), ReadAdvice.values()));
1587-
for (int i = 0; i < totalLength; i++) {
1588-
int offset = TestUtil.nextInt(random(), 0, (int) in.length() - 1);
1589-
in.seek(offset);
1590-
assertEquals(arr[offset], in.readByte());
1591-
}
1592-
1593-
// Updating readAdvice in the middle
1594-
for (int i = 0; i < 10_000; ++i) {
1595-
int offset = TestUtil.nextInt(random(), 0, (int) in.length() - 1);
1596-
in.seek(offset);
1597-
assertEquals(arr[offset], in.readByte());
1598-
if (random().nextBoolean()) {
1599-
in.updateReadAdvice(randomFrom(random(), ReadAdvice.values()));
1600-
}
1601-
}
1602-
}
1603-
}
1604-
}
1605-
16061573
private void doTestPrefetch(int startOffset) throws IOException {
16071574
try (Directory dir = getDirectory(createTempDir())) {
16081575
final int totalLength = startOffset + TestUtil.nextInt(random(), 16384, 65536);

0 commit comments

Comments
 (0)