Skip to content

Commit 29b03fd

Browse files
Simple bulk loading for binary doc values (#137860)
Add bulk loading for binary doc values. This version of bulk loading likely isn't faster than loading single values, since it just loads single values in a loop. But it adds some wiring and a test. This will be followed up with a PR that loads chunks of adjacent byte arrays values. Relates to #135711
1 parent e45481d commit 29b03fd

File tree

4 files changed

+102
-1
lines changed

4 files changed

+102
-1
lines changed

docs/changelog/137860.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 137860
2+
summary: Simple bulk loading for binary doc values
3+
area: Mapping
4+
type: feature
5+
issues: []

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,26 @@ public BytesRef binaryValue() throws IOException {
209209
bytesSlice.readBytes((long) doc * length, bytes.bytes, 0, length);
210210
return bytes;
211211
}
212+
213+
@Override
214+
public BlockLoader.Block tryRead(
215+
BlockLoader.BlockFactory factory,
216+
BlockLoader.Docs docs,
217+
int offset,
218+
boolean nullsFiltered,
219+
BlockDocValuesReader.ToDouble toDouble,
220+
boolean toInt
221+
) throws IOException {
222+
int count = docs.count() - offset;
223+
try (var builder = factory.bytesRefs(count)) {
224+
for (int i = offset; i < docs.count(); i++) {
225+
doc = docs.get(i);
226+
bytesSlice.readBytes((long) doc * length, bytes.bytes, 0, length);
227+
builder.appendBytesRef(bytes);
228+
}
229+
return builder.build();
230+
}
231+
}
212232
};
213233
} else {
214234
// variable length
@@ -224,6 +244,28 @@ public BytesRef binaryValue() throws IOException {
224244
bytesSlice.readBytes(startOffset, bytes.bytes, 0, bytes.length);
225245
return bytes;
226246
}
247+
248+
@Override
249+
public BlockLoader.Block tryRead(
250+
BlockLoader.BlockFactory factory,
251+
BlockLoader.Docs docs,
252+
int offset,
253+
boolean nullsFiltered,
254+
BlockDocValuesReader.ToDouble toDouble,
255+
boolean toInt
256+
) throws IOException {
257+
int count = docs.count() - offset;
258+
try (var builder = factory.bytesRefs(count)) {
259+
for (int i = offset; i < docs.count(); i++) {
260+
doc = docs.get(i);
261+
long startOffset = addresses.get(doc);
262+
bytes.length = (int) (addresses.get(doc + 1L) - startOffset);
263+
bytesSlice.readBytes(startOffset, bytes.bytes, 0, bytes.length);
264+
builder.appendBytesRef(bytes);
265+
}
266+
return builder.build();
267+
}
268+
}
227269
};
228270
}
229271
} else {
@@ -268,7 +310,7 @@ public BytesRef binaryValue() throws IOException {
268310
}
269311
}
270312

271-
private abstract static class DenseBinaryDocValues extends BinaryDocValues {
313+
abstract static class DenseBinaryDocValues extends BinaryDocValues implements BlockLoader.OptionalColumnAtATimeReader {
272314

273315
final int maxDoc;
274316
int doc = -1;

server/src/main/java/org/elasticsearch/index/mapper/blockloader/docvalues/BytesRefsFromBinaryBlockLoader.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import org.apache.lucene.index.LeafReaderContext;
1414
import org.apache.lucene.util.BytesRef;
1515
import org.elasticsearch.core.Nullable;
16+
import org.elasticsearch.index.mapper.BlockLoader;
1617

1718
import java.io.IOException;
1819

@@ -52,6 +53,17 @@ static class BytesRefsFromBinary extends BytesRefsFromCustomBinaryBlockLoader.Ab
5253
super(docValues);
5354
}
5455

56+
@Override
57+
public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset, boolean nullsFiltered) throws IOException {
58+
if (docValues instanceof BlockLoader.OptionalColumnAtATimeReader direct) {
59+
BlockLoader.Block block = direct.tryRead(factory, docs, offset, nullsFiltered, null, false);
60+
if (block != null) {
61+
return block;
62+
}
63+
}
64+
return super.read(factory, docs, offset, nullsFiltered);
65+
}
66+
5567
@Override
5668
public void read(int doc, BytesRefBuilder builder) throws IOException {
5769
if (false == docValues.advanceExact(doc)) {

server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormatTests;
4848
import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesProducer.BaseDenseNumericValues;
4949
import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesProducer.BaseSortedDocValues;
50+
import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesProducer.DenseBinaryDocValues;
5051
import org.elasticsearch.index.mapper.BlockLoader;
5152
import org.elasticsearch.index.mapper.BlockLoader.OptionalColumnAtATimeReader;
5253
import org.elasticsearch.index.mapper.TestBlock;
@@ -64,6 +65,8 @@
6465
import java.util.function.Supplier;
6566
import java.util.stream.IntStream;
6667

68+
import static org.elasticsearch.test.ESTestCase.between;
69+
import static org.elasticsearch.test.ESTestCase.randomAlphaOfLength;
6770
import static org.elasticsearch.test.ESTestCase.randomFrom;
6871
import static org.hamcrest.Matchers.equalTo;
6972
import static org.hamcrest.Matchers.instanceOf;
@@ -757,22 +760,30 @@ public void testOptionalColumnAtATimeReader() throws Exception {
757760
final String counterFieldAsString = "counter_as_string";
758761
final String timestampField = "@timestamp";
759762
final String gaugeField = "gauge";
763+
final String binaryFixedField = "binary_variable";
764+
final String binaryVariableField = "binary_fixed";
760765
long currentTimestamp = 1704067200000L;
761766
long currentCounter = 10_000_000;
762767

763768
var config = getTimeSeriesIndexWriterConfig(null, timestampField);
764769
try (var dir = newDirectory(); var iw = new IndexWriter(dir, config)) {
765770
long[] gauge1Values = new long[] { 2, 4, 6, 8, 10, 12, 14, 16 };
771+
List<BytesRef> binaryFixedValues = new ArrayList<>();
772+
List<BytesRef> binaryVariableValues = new ArrayList<>();
766773
int numDocs = 256 + random().nextInt(8096);
767774

768775
for (int i = 0; i < numDocs; i++) {
776+
binaryFixedValues.add(new BytesRef(randomAlphaOfLength(10)));
777+
binaryVariableValues.add(new BytesRef(randomAlphaOfLength(between(0, 10))));
769778
var d = new Document();
770779
long timestamp = currentTimestamp;
771780
// Index sorting doesn't work with NumericDocValuesField:
772781
d.add(SortedNumericDocValuesField.indexedField(timestampField, timestamp));
773782
d.add(new SortedNumericDocValuesField(counterField, currentCounter));
774783
d.add(new SortedSetDocValuesField(counterFieldAsString, new BytesRef(Long.toString(currentCounter))));
775784
d.add(new SortedNumericDocValuesField(gaugeField, gauge1Values[i % gauge1Values.length]));
785+
d.add(new BinaryDocValuesField(binaryFixedField, binaryFixedValues.getLast()));
786+
d.add(new BinaryDocValuesField(binaryVariableField, binaryVariableValues.getLast()));
776787

777788
iw.addDocument(d);
778789
if (i % 100 == 0) {
@@ -794,6 +805,9 @@ public void testOptionalColumnAtATimeReader() throws Exception {
794805
var counterDV = getBaseDenseNumericValues(leaf.reader(), counterField);
795806
var gaugeDV = getBaseDenseNumericValues(leaf.reader(), gaugeField);
796807
var stringCounterDV = getBaseSortedDocValues(leaf.reader(), counterFieldAsString);
808+
var binaryFixedDV = getDenseBinaryValues(leaf.reader(), binaryFixedField);
809+
var binaryVariableDV = getDenseBinaryValues(leaf.reader(), binaryVariableField);
810+
797811
int maxDoc = leaf.reader().maxDoc();
798812
for (int i = 0; i < maxDoc;) {
799813
int size = Math.max(1, random().nextInt(0, maxDoc - i));
@@ -843,6 +857,30 @@ public void testOptionalColumnAtATimeReader() throws Exception {
843857
}
844858
}
845859

860+
{
861+
// bulk loading binary fixed length field:
862+
var block = (TestBlock) binaryFixedDV.tryRead(factory, docs, 0, random().nextBoolean(), null, false);
863+
assertNotNull(block);
864+
assertEquals(size, block.size());
865+
for (int j = 0; j < block.size(); j++) {
866+
var actual = (BytesRef) block.get(j);
867+
var expected = binaryFixedValues.removeLast();
868+
assertEquals(expected, actual);
869+
}
870+
}
871+
872+
{
873+
// bulk loading binary variable length field:
874+
var block = (TestBlock) binaryVariableDV.tryRead(factory, docs, 0, random().nextBoolean(), null, false);
875+
assertNotNull(block);
876+
assertEquals(size, block.size());
877+
for (int j = 0; j < block.size(); j++) {
878+
var actual = (BytesRef) block.get(j);
879+
var expected = binaryVariableValues.removeLast();
880+
assertEquals(expected, actual);
881+
}
882+
}
883+
846884
i += size;
847885
}
848886
}
@@ -1405,6 +1443,10 @@ public void testEncodeRangeWithSortedSetPrimarySortField() throws Exception {
14051443
}
14061444
}
14071445

1446+
private static DenseBinaryDocValues getDenseBinaryValues(LeafReader leafReader, String field) throws IOException {
1447+
return (DenseBinaryDocValues) leafReader.getBinaryDocValues(field);
1448+
}
1449+
14081450
private static BaseDenseNumericValues getBaseDenseNumericValues(LeafReader leafReader, String field) throws IOException {
14091451
return (BaseDenseNumericValues) DocValues.unwrapSingleton(leafReader.getSortedNumericDocValues(field));
14101452
}

0 commit comments

Comments
 (0)