Skip to content

Commit 20b6da6

Browse files
authored
Fix bug in compressed binary doc values bulk decoding. (#138755)
Reuse firstBlockId as blockId on first iteration.
1 parent abaef06 commit 20b6da6

File tree

2 files changed

+47
-1
lines changed

2 files changed

+47
-1
lines changed

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -617,7 +617,7 @@ void decodeBulk(int numBlocks, int firstDoc, int count, BlockLoader.SingletonByt
617617
byte[] bytes = new byte[bufferSize];
618618

619619
while (remainingCount > 0) {
620-
long blockId = findAndUpdateBlockByScanning(nextDoc);
620+
long blockId = remainingCount == count ? firstBlockId : findAndUpdateBlockByScanning(nextDoc);
621621
int numDocsInBlock = (int) (limitDocNumForBlock - startDocNumForBlock);
622622
int idxFirstDocInBlock = (int) (nextDoc - startDocNumForBlock);
623623
int countInBlock = Math.min(numDocsInBlock - idxFirstDocInBlock, remainingCount);

server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1197,6 +1197,52 @@ public void testOptionalColumnAtATimeReaderReadAsInt() throws Exception {
11971197
}
11981198
}
11991199

1200+
public void testOptionalColumnAtATimeReaderBinary() throws Exception {
1201+
final String binaryFieldOne = "binary_1";
1202+
1203+
var config = new IndexWriterConfig();
1204+
config.setMergePolicy(new LogByteSizeMergePolicy());
1205+
config.setCodec(getCodec());
1206+
1207+
try (var dir = newDirectory(); var iw = new IndexWriter(dir, config)) {
1208+
Set<String> binaryValues = new HashSet<>();
1209+
int numDocs = 10_000 * randomIntBetween(2, 20);
1210+
1211+
int numValues = randomIntBetween(8, 256);
1212+
for (int i = 0; i < numValues; i++) {
1213+
binaryValues.add(randomAlphaOfLength(between(128, 256)));
1214+
}
1215+
1216+
for (int i = 0; i < numDocs; i++) {
1217+
var d = new Document();
1218+
d.add(new BinaryDocValuesField(binaryFieldOne, new BytesRef(randomFrom(binaryValues))));
1219+
1220+
iw.addDocument(d);
1221+
if (i % 1000 == 0) {
1222+
iw.commit();
1223+
}
1224+
}
1225+
iw.commit();
1226+
var factory = TestBlock.factory();
1227+
try (var reader = DirectoryReader.open(iw)) {
1228+
for (var leaf : reader.leaves()) {
1229+
int maxDoc = leaf.reader().maxDoc();
1230+
var binaryFixedDV = getDenseBinaryValues(leaf.reader(), binaryFieldOne);
1231+
// Randomize start doc, starting from a docid that is part of later blocks triggers:
1232+
// https://github.com/elastic/elasticsearch/issues/138750
1233+
var docs = TestBlock.docs(IntStream.range(between(0, maxDoc - 1), maxDoc).toArray());
1234+
var block = (TestBlock) binaryFixedDV.tryRead(factory, docs, 0, random().nextBoolean(), null, false);
1235+
assertNotNull(block);
1236+
assertTrue(block.size() > 0);
1237+
for (int j = 0; j < block.size(); j++) {
1238+
var actual = ((BytesRef) block.get(j)).utf8ToString();
1239+
assertTrue("actual [" + actual + "] not in generated values", binaryValues.contains(actual));
1240+
}
1241+
}
1242+
}
1243+
}
1244+
}
1245+
12001246
public void testOptionalColumnAtATimeReaderWithSparseDocs() throws Exception {
12011247
final String counterField = "counter";
12021248
final String counterAsStringField = "counter_as_string";

0 commit comments

Comments
 (0)