Skip to content

Commit 1a0e927

Browse files
committed
Push compute engine value loading for dense singleton ordinals down to tsdb codec.
This change targets reading field values in bulk mode at codec level when doc values type is sorted doc values or sorted set doc values, there is only one value per document, and the field is dense (all documents have a value). Relates to #128445
1 parent 66107f1 commit 1a0e927

File tree

7 files changed

+123
-1
lines changed

7 files changed

+123
-1
lines changed

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/BulkNumericDocValues.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,7 @@ public abstract class BulkNumericDocValues extends NumericDocValues {
2424
*/
2525
public abstract BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException;
2626

27+
public abstract BlockLoader.Block readOrdinals(BlockLoader.SingletonOrdinalsBuilder builder, BlockLoader.Docs docs, int offset)
28+
throws IOException;
29+
2730
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.codec.tsdb.es819;
11+
12+
import org.apache.lucene.index.SortedDocValues;
13+
import org.elasticsearch.index.mapper.BlockLoader;
14+
15+
import java.io.IOException;
16+
17+
public abstract class BulkSortedDocValues extends SortedDocValues {
18+
19+
public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
20+
throw new UnsupportedOperationException();
21+
}
22+
23+
public boolean supportsBlockRead() {
24+
return false;
25+
}
26+
27+
}

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,10 +369,26 @@ public int advance(int target) throws IOException {
369369
public long cost() {
370370
return ords.cost();
371371
}
372+
373+
@Override
374+
public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
375+
if (ords instanceof BulkNumericDocValues b) {
376+
try (var builder = factory.singletonOrdinalsBuilder(this, docs.count() - offset)) {
377+
return b.readOrdinals(builder, docs, offset);
378+
}
379+
} else {
380+
throw new UnsupportedOperationException();
381+
}
382+
}
383+
384+
@Override
385+
public boolean supportsBlockRead() {
386+
return ords instanceof BulkNumericDocValues;
387+
}
372388
};
373389
}
374390

375-
abstract class BaseSortedDocValues extends SortedDocValues {
391+
abstract class BaseSortedDocValues extends BulkSortedDocValues {
376392

377393
final SortedEntry entry;
378394
final TermsEnum termsEnum;
@@ -1238,6 +1254,59 @@ public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs
12381254
}
12391255
}
12401256

1257+
@Override
1258+
public BlockLoader.Block readOrdinals(BlockLoader.SingletonOrdinalsBuilder builder, BlockLoader.Docs docs, int offset)
1259+
throws IOException {
1260+
assert maxOrd >= 0 : "unexpected maxOrd[" + maxOrd + "]";
1261+
final int docsCount = docs.count();
1262+
doc = docs.get(docsCount - 1);
1263+
for (int i = offset; i < docsCount;) {
1264+
int index = docs.get(i);
1265+
final int blockIndex = index >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT;
1266+
final int blockInIndex = index & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK;
1267+
if (blockIndex != currentBlockIndex) {
1268+
assert blockIndex > currentBlockIndex : blockIndex + " < " + currentBlockIndex;
1269+
// no need to seek if the loading block is the next block
1270+
if (currentBlockIndex + 1 != blockIndex) {
1271+
valuesData.seek(indexReader.get(blockIndex));
1272+
}
1273+
currentBlockIndex = blockIndex;
1274+
decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd);
1275+
}
1276+
1277+
// Try to append more than just one value:
1278+
// Instead of iterating over docs and find the max length, take an optimistic approach to avoid as
1279+
// many comparisons as there are remaining docs and instead do at most 7 comparisons:
1280+
int length = 1;
1281+
int remainingBlockLength = Math.min(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE - blockInIndex, docsCount - i);
1282+
for (int newLength = remainingBlockLength; newLength > 1; newLength = newLength >> 1) {
1283+
int lastIndex = i + newLength - 1;
1284+
if (isDense(index, docs.get(lastIndex), newLength)) {
1285+
length = newLength;
1286+
break;
1287+
}
1288+
}
1289+
1290+
// Unfortunately, no array copy here...
1291+
// Since we need to loop here, let's also keep track of min/max.
1292+
int minOrd = Integer.MAX_VALUE;
1293+
int maxOrd = Integer.MIN_VALUE;
1294+
int counter = 0;
1295+
int[] convertedOrds = new int[length];
1296+
int endBlockInIndex = blockInIndex + length;
1297+
for (int j = blockInIndex; j < endBlockInIndex; j++) {
1298+
int ord = Math.toIntExact(currentBlock[j]);
1299+
convertedOrds[counter++] = ord;
1300+
minOrd = Math.min(minOrd, ord);
1301+
maxOrd = Math.max(maxOrd, ord);
1302+
}
1303+
builder.appendOrds(convertedOrds, 0, length, minOrd, maxOrd);
1304+
i += length;
1305+
}
1306+
return builder.build();
1307+
1308+
}
1309+
12411310
static boolean isDense(int firstDocId, int lastDocId, int length) {
12421311
// This does not detect duplicate docids (e.g [1, 1, 2, 4] would be detected as dense),
12431312
// this can happen with enrich or lookup. However this codec isn't used for enrich / lookup.

server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
2323
import org.elasticsearch.index.IndexVersion;
2424
import org.elasticsearch.index.codec.tsdb.es819.BulkNumericDocValues;
25+
import org.elasticsearch.index.codec.tsdb.es819.BulkSortedDocValues;
2526
import org.elasticsearch.index.mapper.BlockLoader.BlockFactory;
2627
import org.elasticsearch.index.mapper.BlockLoader.BooleanBuilder;
2728
import org.elasticsearch.index.mapper.BlockLoader.Builder;
@@ -659,6 +660,9 @@ public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throw
659660
if (docs.count() - offset == 1) {
660661
return readSingleDoc(factory, docs.get(offset));
661662
}
663+
if (ordinals instanceof BulkSortedDocValues bulkDv && bulkDv.supportsBlockRead()) {
664+
return bulkDv.read(factory, docs, offset);
665+
}
662666
try (var builder = factory.singletonOrdinalsBuilder(ordinals, docs.count() - offset)) {
663667
for (int i = offset; i < docs.count(); i++) {
664668
int doc = docs.get(i);

server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,8 @@ interface SingletonOrdinalsBuilder extends Builder {
531531
* Appends an ordinal to the builder.
532532
*/
533533
SingletonOrdinalsBuilder appendOrd(int value);
534+
535+
SingletonOrdinalsBuilder appendOrds(int[] values, int from, int length, int minOrd, int maxOrd);
534536
}
535537

536538
interface SortedSetOrdinalsBuilder extends Builder {

test/framework/src/main/java/org/elasticsearch/index/mapper/TestBlock.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,14 @@ public SingletonOrdsBuilder appendOrd(int value) {
283283
throw new UncheckedIOException(e);
284284
}
285285
}
286+
287+
@Override
288+
public BlockLoader.SingletonOrdinalsBuilder appendOrds(int[] values, int from, int length, int minOrd, int maxOrd) {
289+
for (int i = from; i < from + length; i++) {
290+
appendOrd(values[i]);
291+
}
292+
return this;
293+
}
286294
}
287295
return new SingletonOrdsBuilder();
288296
}

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/SingletonOrdinalsBuilder.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,15 @@ public SingletonOrdinalsBuilder appendOrd(int ord) {
5555
return this;
5656
}
5757

58+
@Override
59+
public BlockLoader.SingletonOrdinalsBuilder appendOrds(int[] values, int from, int length, int minOrd, int maxOrd) {
60+
System.arraycopy(values, from, ords, count, length);
61+
this.count += length;
62+
this.minOrd = Math.min(this.minOrd, minOrd);
63+
this.maxOrd = Math.max(this.maxOrd, maxOrd);
64+
return this;
65+
}
66+
5867
@Override
5968
public SingletonOrdinalsBuilder beginPositionEntry() {
6069
throw new UnsupportedOperationException("should only have one value per doc");

0 commit comments

Comments
 (0)