Skip to content

Commit fb95510

Browse files
authored
Fix BlobCacheBufferedIndexInput large read after clone (#98970)
After a clone the delegate is positioned as if it has just filled the buffer, but the buffer is empty. On most paths we go via `refill()` which repositions the delegate correctly, but if we skip the buffer and read directly we must also account for this case.
1 parent 896c1b6 commit fb95510

File tree

3 files changed

+34
-7
lines changed

3 files changed

+34
-7
lines changed

docs/changelog/98970.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 98970
2+
summary: Fix `BlobCacheBufferedIndexInput` large read after clone
3+
area: Snapshot/Restore
4+
type: bug
5+
issues: []

x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/common/BlobCacheBufferedIndexInput.java

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
/**
2121
* Copy of {@link org.apache.lucene.store.BufferedIndexInput} that contains optimizations that haven't made it to the Lucene version used
2222
* by Elasticsearch yet or that are only applicable to Elasticsearch.
23+
* <p>
24+
* Deviates from Lucene's implementation slightly to fix a bug - see [NOTE: Missing Seek] below, and #98970 for more details.
2325
*/
2426
public abstract class BlobCacheBufferedIndexInput extends IndexInput implements RandomAccessInput {
2527

@@ -106,13 +108,14 @@ public final void readBytes(byte[] b, int offset, int len, boolean useBuffer) th
106108
buffer.get(b, offset, len);
107109
}
108110
} else {
109-
// The amount left to read is larger than the buffer
110-
// or we've been asked to not use our buffer -
111-
// there's no performance reason not to read it all
112-
// at once. Note that unlike the previous code of
113-
// this function, there is no need to do a seek
114-
// here, because there's no need to reread what we
115-
// had in the buffer.
111+
// The amount left to read is larger than the buffer or we've been asked to not use our buffer - there's no performance
112+
// reason not to read it all at once.
113+
if (buffer == EMPTY_BYTEBUFFER) {
114+
// fresh clone, must seek
115+
// [NOTE: Missing Seek] This deviates from Lucene's BufferedIndexInput implementation - see #98970
116+
seekInternal(bufferStart);
117+
} // else there's no need to do a seek here because we are already positioned correctly
118+
116119
long after = bufferStart + buffer.position() + len;
117120
if (after > length()) throw new EOFException("read past EOF: " + this);
118121
readInternal(ByteBuffer.wrap(b, offset, len));

x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/store/input/DirectBlobContainerIndexInputTests.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,25 @@ public void testRandomReads() throws IOException {
142142
}
143143
}
144144

145+
public void testCloneAndLargeRead() throws IOException {
146+
final Tuple<String, byte[]> bytes = randomChecksumBytes(between(ByteSizeUnit.KB.toIntBytes(2), ByteSizeUnit.KB.toIntBytes(10)));
147+
try (var indexInput = createIndexInput(bytes)) {
148+
indexInput.readLong();
149+
150+
final var clone = indexInput.clone();
151+
152+
// do a read which is large enough to exercise the path which bypasses the buffer and fills the output directly
153+
154+
final var originalBytes = new byte[2048];
155+
indexInput.readBytes(originalBytes, 0, originalBytes.length);
156+
157+
final var cloneBytes = new byte[originalBytes.length];
158+
clone.readBytes(cloneBytes, 0, cloneBytes.length);
159+
160+
assertArrayEquals(originalBytes, cloneBytes);
161+
}
162+
}
163+
145164
public void testRandomOverflow() throws IOException {
146165
for (int i = 0; i < 100; i++) {
147166
final Tuple<String, byte[]> bytes = randomChecksumBytes(randomIntBetween(1, 1000));

0 commit comments

Comments
 (0)