Skip to content

Commit 64f8209

Browse files
authored
Speed up loading keyword fields with index sorts (#132950)
Reading keyword fields that are the primary sort in the index can be sped up by skip reading, as identical values are stored together. In this case, we can use values from the doc_values skipper instead of loading values from doc_values. However, the doc_values skipper is not enabled yet. Here, we use two buffers when reading ordinals: one for the beginning of the block and one for the end. If both return the same value, we can skip the middle. There is a follow-up step where we fill the values in the middle until we reach the last value. This optimization should speed up time-series queries.
1 parent 61575f0 commit 64f8209

File tree

11 files changed

+323
-95
lines changed

11 files changed

+323
-95
lines changed

docs/changelog/132950.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 132950
2+
summary: Speed up loading keyword fields with index sorts
3+
area: "ES|QL"
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/BulkNumericDocValues.java

Lines changed: 0 additions & 27 deletions
This file was deleted.

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java

Lines changed: 101 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353

5454
final class ES819TSDBDocValuesProducer extends DocValuesProducer {
5555
final IntObjectHashMap<NumericEntry> numerics;
56+
private int primarySortFieldNumber = -1;
5657
final IntObjectHashMap<BinaryEntry> binaries;
5758
final IntObjectHashMap<SortedEntry> sorted;
5859
final IntObjectHashMap<SortedSetEntry> sortedSets;
@@ -91,7 +92,14 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer {
9192
);
9293

9394
readFields(in, state.fieldInfos);
94-
95+
final var indexSort = state.segmentInfo.getIndexSort();
96+
if (indexSort != null && indexSort.getSort().length > 0) {
97+
var primarySortField = indexSort.getSort()[0];
98+
var sortField = state.fieldInfos.fieldInfo(primarySortField.getField());
99+
if (sortField != null) {
100+
primarySortFieldNumber = sortField.number;
101+
}
102+
}
95103
} catch (Throwable exception) {
96104
priorE = exception;
97105
} finally {
@@ -333,10 +341,10 @@ public boolean advanceExact(int target) throws IOException {
333341
@Override
334342
public SortedDocValues getSorted(FieldInfo field) throws IOException {
335343
SortedEntry entry = sorted.get(field.number);
336-
return getSorted(entry);
344+
return getSorted(entry, field.number == primarySortFieldNumber);
337345
}
338346

339-
private SortedDocValues getSorted(SortedEntry entry) throws IOException {
347+
private SortedDocValues getSorted(SortedEntry entry, boolean valuesSorted) throws IOException {
340348
final NumericDocValues ords = getNumeric(entry.ordsEntry, entry.termsDictEntry.termsDictSize);
341349
return new BaseSortedDocValues(entry) {
342350

@@ -369,10 +377,29 @@ public int advance(int target) throws IOException {
369377
public long cost() {
370378
return ords.cost();
371379
}
380+
381+
@Override
382+
public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
383+
if (valuesSorted && ords instanceof BaseDenseNumericValues denseOrds) {
384+
int firstDoc = docs.get(offset);
385+
denseOrds.advanceExact(firstDoc);
386+
long startValue = denseOrds.longValue();
387+
final int docCount = docs.count();
388+
int lastDoc = docs.get(docCount - 1);
389+
long lastValue = denseOrds.lookAheadValueAt(lastDoc);
390+
if (lastValue == startValue) {
391+
BytesRef b = lookupOrd(Math.toIntExact(startValue));
392+
return factory.constantBytes(BytesRef.deepCopyOf(b), docCount - offset);
393+
}
394+
// TODO: Since ordinals are sorted, start at 0 (offset by startValue), scan until lastValue,
395+
// then fill remaining positions with lastValue.
396+
}
397+
return null;
398+
}
372399
};
373400
}
374401

375-
abstract class BaseSortedDocValues extends SortedDocValues {
402+
abstract class BaseSortedDocValues extends SortedDocValues implements BlockLoader.OptionalColumnAtATimeReader {
376403

377404
final SortedEntry entry;
378405
final TermsEnum termsEnum;
@@ -406,6 +433,15 @@ public int lookupTerm(BytesRef key) throws IOException {
406433
public TermsEnum termsEnum() throws IOException {
407434
return new TermsDict(entry.termsDictEntry, data, merging);
408435
}
436+
437+
@Override
438+
public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
439+
return null;
440+
}
441+
}
442+
443+
abstract static class BaseDenseNumericValues extends NumericDocValues implements BlockLoader.OptionalColumnAtATimeReader {
444+
abstract long lookAheadValueAt(int targetDoc) throws IOException;
409445
}
410446

411447
abstract static class BaseSortedSetDocValues extends SortedSetDocValues {
@@ -695,7 +731,7 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOExcepti
695731
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
696732
SortedSetEntry entry = sortedSets.get(field.number);
697733
if (entry.singleValueEntry != null) {
698-
return DocValues.singleton(getSorted(entry.singleValueEntry));
734+
return DocValues.singleton(getSorted(entry.singleValueEntry, field.number == primarySortFieldNumber));
699735
}
700736

701737
SortedNumericEntry ordsEntry = entry.ordsEntry;
@@ -1047,7 +1083,7 @@ private NumericDocValues getNumeric(NumericEntry entry, long maxOrd) throws IOEx
10471083
// Special case for maxOrd 1, no need to read blocks and use ordinal 0 as only value
10481084
if (entry.docsWithFieldOffset == -1) {
10491085
// Special case when all docs have a value
1050-
return new NumericDocValues() {
1086+
return new BaseDenseNumericValues() {
10511087

10521088
private final int maxDoc = ES819TSDBDocValuesProducer.this.maxDoc;
10531089
private int doc = -1;
@@ -1086,6 +1122,17 @@ public boolean advanceExact(int target) {
10861122
public long cost() {
10871123
return maxDoc;
10881124
}
1125+
1126+
@Override
1127+
long lookAheadValueAt(int targetDoc) throws IOException {
1128+
return 0L; // Only one ordinal!
1129+
}
1130+
1131+
@Override
1132+
public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset)
1133+
throws IOException {
1134+
return null;
1135+
}
10891136
};
10901137
} else {
10911138
final IndexedDISI disi = new IndexedDISI(
@@ -1141,13 +1188,17 @@ public long longValue() {
11411188
final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1;
11421189
if (entry.docsWithFieldOffset == -1) {
11431190
// dense
1144-
return new BulkNumericDocValues() {
1191+
return new BaseDenseNumericValues() {
11451192

11461193
private final int maxDoc = ES819TSDBDocValuesProducer.this.maxDoc;
11471194
private int doc = -1;
11481195
private final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
11491196
private long currentBlockIndex = -1;
11501197
private final long[] currentBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
1198+
// lookahead block
1199+
private long lookaheadBlockIndex = -1;
1200+
private long[] lookaheadBlock;
1201+
private IndexInput lookaheadData = null;
11511202

11521203
@Override
11531204
public int docID() {
@@ -1183,24 +1234,28 @@ public long longValue() throws IOException {
11831234
final int index = doc;
11841235
final int blockIndex = index >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT;
11851236
final int blockInIndex = index & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK;
1186-
if (blockIndex != currentBlockIndex) {
1187-
assert blockIndex > currentBlockIndex : blockIndex + " < " + currentBlockIndex;
1188-
// no need to seek if the loading block is the next block
1189-
if (currentBlockIndex + 1 != blockIndex) {
1190-
valuesData.seek(indexReader.get(blockIndex));
1191-
}
1192-
currentBlockIndex = blockIndex;
1193-
if (maxOrd >= 0) {
1194-
decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd);
1195-
} else {
1196-
decoder.decode(valuesData, currentBlock);
1197-
}
1237+
if (blockIndex == currentBlockIndex) {
1238+
return currentBlock[blockInIndex];
1239+
}
1240+
if (blockIndex == lookaheadBlockIndex) {
1241+
return lookaheadBlock[blockInIndex];
1242+
}
1243+
assert blockIndex > currentBlockIndex : blockIndex + " < " + currentBlockIndex;
1244+
// no need to seek if the loading block is the next block
1245+
if (currentBlockIndex + 1 != blockIndex) {
1246+
valuesData.seek(indexReader.get(blockIndex));
1247+
}
1248+
currentBlockIndex = blockIndex;
1249+
if (maxOrd >= 0) {
1250+
decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd);
1251+
} else {
1252+
decoder.decode(valuesData, currentBlock);
11981253
}
11991254
return currentBlock[blockInIndex];
12001255
}
12011256

12021257
@Override
1203-
public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
1258+
public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
12041259
assert maxOrd == -1 : "unexpected maxOrd[" + maxOrd + "]";
12051260
final int docsCount = docs.count();
12061261
doc = docs.get(docsCount - 1);
@@ -1238,6 +1293,32 @@ public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs
12381293
}
12391294
}
12401295

1296+
@Override
1297+
long lookAheadValueAt(int targetDoc) throws IOException {
1298+
final int blockIndex = targetDoc >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT;
1299+
final int valueIndex = targetDoc & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK;
1300+
if (blockIndex == currentBlockIndex) {
1301+
return currentBlock[valueIndex];
1302+
}
1303+
// load data to the lookahead block
1304+
if (lookaheadBlockIndex != blockIndex) {
1305+
if (lookaheadBlock == null) {
1306+
lookaheadBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
1307+
lookaheadData = data.slice("look_ahead_values", entry.valuesOffset, entry.valuesLength);
1308+
}
1309+
if (lookaheadBlockIndex + 1 != blockIndex) {
1310+
lookaheadData.seek(indexReader.get(blockIndex));
1311+
}
1312+
if (maxOrd == -1L) {
1313+
decoder.decode(lookaheadData, lookaheadBlock);
1314+
} else {
1315+
decoder.decodeOrdinals(lookaheadData, lookaheadBlock, bitsPerOrd);
1316+
}
1317+
lookaheadBlockIndex = blockIndex;
1318+
}
1319+
return lookaheadBlock[valueIndex];
1320+
}
1321+
12411322
static boolean isDense(int firstDocId, int lastDocId, int length) {
12421323
// This does not detect duplicate docids (e.g [1, 1, 2, 4] would be detected as dense),
12431324
// this can happen with enrich or lookup. However this codec isn't used for enrich / lookup.

server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import org.apache.lucene.util.BytesRef;
2323
import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
2424
import org.elasticsearch.index.IndexVersion;
25-
import org.elasticsearch.index.codec.tsdb.es819.BulkNumericDocValues;
2625
import org.elasticsearch.index.mapper.BlockLoader.BlockFactory;
2726
import org.elasticsearch.index.mapper.BlockLoader.BooleanBuilder;
2827
import org.elasticsearch.index.mapper.BlockLoader.Builder;
@@ -133,8 +132,11 @@ static class SingletonLongs extends BlockDocValuesReader {
133132

134133
@Override
135134
public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throws IOException {
136-
if (numericDocValues instanceof BulkNumericDocValues bulkDv) {
137-
return bulkDv.read(factory, docs, offset);
135+
if (numericDocValues instanceof BlockLoader.OptionalColumnAtATimeReader direct) {
136+
BlockLoader.Block result = direct.tryRead(factory, docs, offset);
137+
if (result != null) {
138+
return result;
139+
}
138140
}
139141
try (BlockLoader.LongBuilder builder = factory.longsFromDocValues(docs.count() - offset)) {
140142
int lastDoc = -1;
@@ -748,6 +750,12 @@ public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throw
748750
if (docs.count() - offset == 1) {
749751
return readSingleDoc(factory, docs.get(offset));
750752
}
753+
if (ordinals instanceof BlockLoader.OptionalColumnAtATimeReader direct) {
754+
BlockLoader.Block block = direct.tryRead(factory, docs, offset);
755+
if (block != null) {
756+
return block;
757+
}
758+
}
751759
try (var builder = factory.singletonOrdinalsBuilder(ordinals, docs.count() - offset)) {
752760
for (int i = offset; i < docs.count(); i++) {
753761
int doc = docs.get(i);

server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import org.apache.lucene.index.SortedDocValues;
1414
import org.apache.lucene.index.SortedSetDocValues;
1515
import org.apache.lucene.util.BytesRef;
16+
import org.elasticsearch.core.Nullable;
1617
import org.elasticsearch.core.Releasable;
1718
import org.elasticsearch.search.fetch.StoredFieldsSpec;
1819
import org.elasticsearch.search.lookup.Source;
@@ -46,6 +47,22 @@ interface ColumnAtATimeReader extends Reader {
4647
BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throws IOException;
4748
}
4849

50+
/**
51+
* An interface for readers that attempt to load all document values in a column-at-a-time fashion.
52+
* <p>
53+
* Unlike {@link ColumnAtATimeReader}, implementations may return {@code null} if they are unable
54+
* to load the requested values, for example due to unsupported underlying data.
55+
* This allows callers to optimistically try optimized loading strategies first, and fall back if necessary.
56+
*/
57+
interface OptionalColumnAtATimeReader {
58+
/**
59+
* Attempts to read the values of all documents in {@code docs}
60+
* Returns {@code null} if unable to load the values.
61+
*/
62+
@Nullable
63+
BlockLoader.Block tryRead(BlockFactory factory, Docs docs, int offset) throws IOException;
64+
}
65+
4966
interface RowStrideReader extends Reader {
5067
/**
5168
* Reads the values of the given document into the builder.
@@ -549,6 +566,5 @@ interface AggregateMetricDoubleBuilder extends Builder {
549566
DoubleBuilder sum();
550567

551568
IntBuilder count();
552-
553569
}
554570
}

0 commit comments

Comments
 (0)