Skip to content

Commit bfc83f2

Browse files
parkertimminsncordon
authored andcommitted
Simple bulk loading of compressed binary doc values (elastic#138541)
Add bulk loading to compressed binary doc values. This just pushes the value iteration down into the codec, future optimizations will allow for direct reading of adjacent values from compressed blocks.
1 parent da6ff8d commit bfc83f2

File tree

3 files changed

+74
-43
lines changed

3 files changed

+74
-43
lines changed

docs/changelog/138541.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 138541
2+
summary: Simple bulk loading of compressed binary doc values
3+
area: Codec
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,26 @@ private BinaryDocValues getCompressedBinary(BinaryEntry entry) throws IOExceptio
380380
public BytesRef binaryValue() throws IOException {
381381
return decoder.decode(doc, entry.numCompressedBlocks);
382382
}
383+
384+
@Override
385+
public BlockLoader.Block tryRead(
386+
BlockLoader.BlockFactory factory,
387+
BlockLoader.Docs docs,
388+
int offset,
389+
boolean nullsFiltered,
390+
BlockDocValuesReader.ToDouble toDouble,
391+
boolean toInt
392+
) throws IOException {
393+
int count = docs.count() - offset;
394+
try (var builder = factory.bytesRefs(count)) {
395+
for (int i = offset; i < docs.count(); i++) {
396+
doc = docs.get(i);
397+
var bytes = decoder.decode(doc, entry.numCompressedBlocks);
398+
builder.appendBytesRef(bytes);
399+
}
400+
return builder.build();
401+
}
402+
}
383403
};
384404
} else {
385405
// sparse
@@ -583,7 +603,7 @@ public BlockLoader.Block tryRead(
583603
}
584604
}
585605

586-
private abstract static class SparseBinaryDocValues extends BinaryDocValues {
606+
abstract static class SparseBinaryDocValues extends BinaryDocValues implements BlockLoader.OptionalColumnAtATimeReader {
587607

588608
final IndexedDISI disi;
589609

@@ -620,6 +640,19 @@ public boolean advanceExact(int target) throws IOException {
620640
public int docIDRunEnd() throws IOException {
621641
return disi.docIDRunEnd();
622642
}
643+
644+
@Override
645+
@Nullable
646+
public BlockLoader.Block tryRead(
647+
BlockLoader.BlockFactory factory,
648+
BlockLoader.Docs docs,
649+
int offset,
650+
boolean nullsFiltered,
651+
BlockDocValuesReader.ToDouble toDouble,
652+
boolean toInt
653+
) throws IOException {
654+
return null;
655+
}
623656
}
624657

625658
@Override

server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java

Lines changed: 35 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesProducer.BaseDenseNumericValues;
5050
import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesProducer.BaseSortedDocValues;
5151
import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesProducer.DenseBinaryDocValues;
52+
import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesProducer.SparseBinaryDocValues;
5253
import org.elasticsearch.index.mapper.BlockLoader;
5354
import org.elasticsearch.index.mapper.BlockLoader.OptionalColumnAtATimeReader;
5455
import org.elasticsearch.index.mapper.TestBlock;
@@ -978,13 +979,9 @@ public void testOptionalColumnAtATimeReader() throws Exception {
978979
assertEquals(expectedGauge, actualGauge);
979980
}
980981
}
981-
982-
// TODO add bulk loading to compressed values so this is not necessary
983-
var block = (TestBlock) binaryFixedDV.tryRead(factory, docs, 0, random().nextBoolean(), null, false);
984-
if (isCompressed(config, binaryFixedField)) {
985-
assertNull(block);
986-
} else {
982+
{
987983
// bulk loading binary fixed length field:
984+
var block = (TestBlock) binaryFixedDV.tryRead(factory, docs, 0, random().nextBoolean(), null, false);
988985
assertNotNull(block);
989986
assertEquals(size, block.size());
990987
for (int j = 0; j < block.size(); j++) {
@@ -993,13 +990,9 @@ public void testOptionalColumnAtATimeReader() throws Exception {
993990
assertEquals(expected, actual);
994991
}
995992
}
996-
997-
// TODO add bulk loading to compressed values so this is not necessary
998-
block = (TestBlock) binaryVariableDV.tryRead(factory, docs, 0, random().nextBoolean(), null, false);
999-
if (isCompressed(config, binaryVariableField)) {
1000-
assertNull(block);
1001-
} else {
993+
{
1002994
// bulk loading binary variable length field:
995+
var block = (TestBlock) binaryVariableDV.tryRead(factory, docs, 0, random().nextBoolean(), null, false);
1003996
assertNotNull(block);
1004997
assertEquals(size, block.size());
1005998
for (int j = 0; j < block.size(); j++) {
@@ -1212,6 +1205,7 @@ public void testOptionalColumnAtATimeReaderWithSparseDocs() throws Exception {
12121205
final String binaryFixedField = "binary_variable";
12131206
final String binaryVariableField = "binary_fixed";
12141207
final int binaryFieldMaxLength = randomIntBetween(1, 20);
1208+
boolean denseBinaryData = randomBoolean();
12151209

12161210
long currentTimestamp = 1704067200000L;
12171211
long currentCounter = 10_000_000;
@@ -1233,10 +1227,12 @@ public void testOptionalColumnAtATimeReaderWithSparseDocs() throws Exception {
12331227
d.add(new SortedDocValuesField(counterAsStringField, new BytesRef(Long.toString(currentCounter))));
12341228
d.add(new SortedNumericDocValuesField(queryField, q));
12351229

1236-
binaryFixed[numDocs - i] = new BytesRef(randomAlphaOfLength(binaryFieldMaxLength));
1237-
d.add(new BinaryDocValuesField(binaryFixedField, binaryFixed[numDocs - i]));
1238-
binaryVariable[numDocs - i] = new BytesRef(randomAlphaOfLength(between(0, binaryFieldMaxLength)));
1239-
d.add(new BinaryDocValuesField(binaryVariableField, binaryVariable[numDocs - i]));
1230+
if (denseBinaryData || random().nextBoolean()) {
1231+
binaryFixed[numDocs - i] = new BytesRef(randomAlphaOfLength(binaryFieldMaxLength));
1232+
d.add(new BinaryDocValuesField(binaryFixedField, binaryFixed[numDocs - i]));
1233+
binaryVariable[numDocs - i] = new BytesRef(randomAlphaOfLength(between(0, binaryFieldMaxLength)));
1234+
d.add(new BinaryDocValuesField(binaryVariableField, binaryVariable[numDocs - i]));
1235+
}
12401236

12411237
if (i % 120 == 0) {
12421238
q++;
@@ -1365,35 +1361,38 @@ public void testOptionalColumnAtATimeReaderWithSparseDocs() throws Exception {
13651361
}
13661362

13671363
{
1368-
// Bulk binary loader can only handle sparse queries over dense documents
1364+
// Bulk binary loader can only handle sparse queries over dense or sparse documents
13691365
List<Integer> testDocs = IntStream.range(0, numDocs - 1).filter(i -> randomBoolean()).boxed().toList();
13701366
docs = TestBlock.docs(testDocs.stream().mapToInt(n -> n).toArray());
13711367
if (testDocs.isEmpty() == false) {
1372-
{
1373-
var dv = getDenseBinaryValues(leafReader, binaryFixedField);
1374-
var block = (TestBlock) dv.tryRead(factory, docs, 0, random().nextBoolean(), null, false);
1375-
// TODO add bulk loading to compressed values so this is not necessary
1376-
if (isCompressed(config, binaryFixedField)) {
1377-
assertNull(block);
1378-
} else {
1368+
if (denseBinaryData) {
1369+
{
1370+
var dv = getDenseBinaryValues(leafReader, binaryFixedField);
1371+
var block = (TestBlock) dv.tryRead(factory, docs, 0, random().nextBoolean(), null, false);
13791372
assertNotNull(block);
13801373
for (int i = 0; i < testDocs.size(); i++) {
13811374
assertThat(block.get(i), equalTo(binaryFixed[testDocs.get(i)]));
13821375
}
13831376
}
1384-
}
1385-
{
1386-
var dv = getDenseBinaryValues(leafReader, binaryVariableField);
1387-
var block = (TestBlock) dv.tryRead(factory, docs, 0, random().nextBoolean(), null, false);
1388-
// TODO add bulk loading to compressed values so this is not necessary
1389-
if (isCompressed(config, binaryVariableField)) {
1390-
assertNull(block);
1391-
} else {
1377+
{
1378+
var dv = getDenseBinaryValues(leafReader, binaryVariableField);
1379+
var block = (TestBlock) dv.tryRead(factory, docs, 0, random().nextBoolean(), null, false);
13921380
assertNotNull(block);
13931381
for (int i = 0; i < testDocs.size(); i++) {
13941382
assertThat(block.get(i), equalTo(binaryVariable[testDocs.get(i)]));
13951383
}
13961384
}
1385+
} else {
1386+
{
1387+
var dv = getSparseBinaryValues(leafReader, binaryFixedField);
1388+
var block = (TestBlock) dv.tryRead(factory, docs, 0, random().nextBoolean(), null, false);
1389+
assertNull(block);
1390+
}
1391+
{
1392+
var dv = getSparseBinaryValues(leafReader, binaryVariableField);
1393+
var block = (TestBlock) dv.tryRead(factory, docs, 0, random().nextBoolean(), null, false);
1394+
assertNull(block);
1395+
}
13971396
}
13981397
}
13991398
}
@@ -1627,6 +1626,10 @@ private static DenseBinaryDocValues getDenseBinaryValues(LeafReader leafReader,
16271626
return (DenseBinaryDocValues) leafReader.getBinaryDocValues(field);
16281627
}
16291628

1629+
private static SparseBinaryDocValues getSparseBinaryValues(LeafReader leafReader, String field) throws IOException {
1630+
return (SparseBinaryDocValues) leafReader.getBinaryDocValues(field);
1631+
}
1632+
16301633
private static BaseDenseNumericValues getBaseDenseNumericValues(LeafReader leafReader, String field) throws IOException {
16311634
return (BaseDenseNumericValues) DocValues.unwrapSingleton(leafReader.getSortedNumericDocValues(field));
16321635
}
@@ -1793,14 +1796,4 @@ public static BinaryDVCompressionMode randomBinaryCompressionMode() {
17931796
BinaryDVCompressionMode[] modes = BinaryDVCompressionMode.values();
17941797
return modes[random().nextInt(modes.length)];
17951798
}
1796-
1797-
private boolean isCompressed(IndexWriterConfig config, String field) {
1798-
if (config.getCodec() instanceof Elasticsearch92Lucene103Codec codec) {
1799-
if (codec.getDocValuesFormatForField(field) instanceof ES819TSDBDocValuesFormat format) {
1800-
return format.binaryDVCompressionMode != BinaryDVCompressionMode.NO_COMPRESS;
1801-
}
1802-
}
1803-
return false;
1804-
}
1805-
18061799
}

0 commit comments

Comments
 (0)