Skip to content

Commit dc2fa8b

Browse files
authored
Merge branch 'main' into esql/add-month-name
2 parents 30893f1 + 64f8209 commit dc2fa8b

File tree

11 files changed

+323
-95
lines changed

11 files changed

+323
-95
lines changed

docs/changelog/132950.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 132950
2+
summary: Speed up loading keyword fields with index sorts
3+
area: "ES|QL"
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/BulkNumericDocValues.java

Lines changed: 0 additions & 27 deletions
This file was deleted.

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java

Lines changed: 101 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353

5454
final class ES819TSDBDocValuesProducer extends DocValuesProducer {
5555
final IntObjectHashMap<NumericEntry> numerics;
56+
private int primarySortFieldNumber = -1;
5657
final IntObjectHashMap<BinaryEntry> binaries;
5758
final IntObjectHashMap<SortedEntry> sorted;
5859
final IntObjectHashMap<SortedSetEntry> sortedSets;
@@ -91,7 +92,14 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer {
9192
);
9293

9394
readFields(in, state.fieldInfos);
94-
95+
final var indexSort = state.segmentInfo.getIndexSort();
96+
if (indexSort != null && indexSort.getSort().length > 0) {
97+
var primarySortField = indexSort.getSort()[0];
98+
var sortField = state.fieldInfos.fieldInfo(primarySortField.getField());
99+
if (sortField != null) {
100+
primarySortFieldNumber = sortField.number;
101+
}
102+
}
95103
} catch (Throwable exception) {
96104
priorE = exception;
97105
} finally {
@@ -333,10 +341,10 @@ public boolean advanceExact(int target) throws IOException {
333341
@Override
334342
public SortedDocValues getSorted(FieldInfo field) throws IOException {
335343
SortedEntry entry = sorted.get(field.number);
336-
return getSorted(entry);
344+
return getSorted(entry, field.number == primarySortFieldNumber);
337345
}
338346

339-
private SortedDocValues getSorted(SortedEntry entry) throws IOException {
347+
private SortedDocValues getSorted(SortedEntry entry, boolean valuesSorted) throws IOException {
340348
final NumericDocValues ords = getNumeric(entry.ordsEntry, entry.termsDictEntry.termsDictSize);
341349
return new BaseSortedDocValues(entry) {
342350

@@ -369,10 +377,29 @@ public int advance(int target) throws IOException {
369377
public long cost() {
370378
return ords.cost();
371379
}
380+
381+
@Override
382+
public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
383+
if (valuesSorted && ords instanceof BaseDenseNumericValues denseOrds) {
384+
int firstDoc = docs.get(offset);
385+
denseOrds.advanceExact(firstDoc);
386+
long startValue = denseOrds.longValue();
387+
final int docCount = docs.count();
388+
int lastDoc = docs.get(docCount - 1);
389+
long lastValue = denseOrds.lookAheadValueAt(lastDoc);
390+
if (lastValue == startValue) {
391+
BytesRef b = lookupOrd(Math.toIntExact(startValue));
392+
return factory.constantBytes(BytesRef.deepCopyOf(b), docCount - offset);
393+
}
394+
// TODO: Since ordinals are sorted, start at 0 (offset by startValue), scan until lastValue,
395+
// then fill remaining positions with lastValue.
396+
}
397+
return null;
398+
}
372399
};
373400
}
374401

375-
abstract class BaseSortedDocValues extends SortedDocValues {
402+
abstract class BaseSortedDocValues extends SortedDocValues implements BlockLoader.OptionalColumnAtATimeReader {
376403

377404
final SortedEntry entry;
378405
final TermsEnum termsEnum;
@@ -406,6 +433,15 @@ public int lookupTerm(BytesRef key) throws IOException {
406433
public TermsEnum termsEnum() throws IOException {
407434
return new TermsDict(entry.termsDictEntry, data, merging);
408435
}
436+
437+
@Override
438+
public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
439+
return null;
440+
}
441+
}
442+
443+
abstract static class BaseDenseNumericValues extends NumericDocValues implements BlockLoader.OptionalColumnAtATimeReader {
444+
abstract long lookAheadValueAt(int targetDoc) throws IOException;
409445
}
410446

411447
abstract static class BaseSortedSetDocValues extends SortedSetDocValues {
@@ -695,7 +731,7 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOExcepti
695731
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
696732
SortedSetEntry entry = sortedSets.get(field.number);
697733
if (entry.singleValueEntry != null) {
698-
return DocValues.singleton(getSorted(entry.singleValueEntry));
734+
return DocValues.singleton(getSorted(entry.singleValueEntry, field.number == primarySortFieldNumber));
699735
}
700736

701737
SortedNumericEntry ordsEntry = entry.ordsEntry;
@@ -1047,7 +1083,7 @@ private NumericDocValues getNumeric(NumericEntry entry, long maxOrd) throws IOEx
10471083
// Special case for maxOrd 1, no need to read blocks and use ordinal 0 as only value
10481084
if (entry.docsWithFieldOffset == -1) {
10491085
// Special case when all docs have a value
1050-
return new NumericDocValues() {
1086+
return new BaseDenseNumericValues() {
10511087

10521088
private final int maxDoc = ES819TSDBDocValuesProducer.this.maxDoc;
10531089
private int doc = -1;
@@ -1086,6 +1122,17 @@ public boolean advanceExact(int target) {
10861122
public long cost() {
10871123
return maxDoc;
10881124
}
1125+
1126+
@Override
1127+
long lookAheadValueAt(int targetDoc) throws IOException {
1128+
return 0L; // Only one ordinal!
1129+
}
1130+
1131+
@Override
1132+
public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset)
1133+
throws IOException {
1134+
return null;
1135+
}
10891136
};
10901137
} else {
10911138
final IndexedDISI disi = new IndexedDISI(
@@ -1141,13 +1188,17 @@ public long longValue() {
11411188
final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1;
11421189
if (entry.docsWithFieldOffset == -1) {
11431190
// dense
1144-
return new BulkNumericDocValues() {
1191+
return new BaseDenseNumericValues() {
11451192

11461193
private final int maxDoc = ES819TSDBDocValuesProducer.this.maxDoc;
11471194
private int doc = -1;
11481195
private final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
11491196
private long currentBlockIndex = -1;
11501197
private final long[] currentBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
1198+
// lookahead block
1199+
private long lookaheadBlockIndex = -1;
1200+
private long[] lookaheadBlock;
1201+
private IndexInput lookaheadData = null;
11511202

11521203
@Override
11531204
public int docID() {
@@ -1183,24 +1234,28 @@ public long longValue() throws IOException {
11831234
final int index = doc;
11841235
final int blockIndex = index >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT;
11851236
final int blockInIndex = index & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK;
1186-
if (blockIndex != currentBlockIndex) {
1187-
assert blockIndex > currentBlockIndex : blockIndex + " < " + currentBlockIndex;
1188-
// no need to seek if the loading block is the next block
1189-
if (currentBlockIndex + 1 != blockIndex) {
1190-
valuesData.seek(indexReader.get(blockIndex));
1191-
}
1192-
currentBlockIndex = blockIndex;
1193-
if (maxOrd >= 0) {
1194-
decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd);
1195-
} else {
1196-
decoder.decode(valuesData, currentBlock);
1197-
}
1237+
if (blockIndex == currentBlockIndex) {
1238+
return currentBlock[blockInIndex];
1239+
}
1240+
if (blockIndex == lookaheadBlockIndex) {
1241+
return lookaheadBlock[blockInIndex];
1242+
}
1243+
assert blockIndex > currentBlockIndex : blockIndex + " < " + currentBlockIndex;
1244+
// no need to seek if the loading block is the next block
1245+
if (currentBlockIndex + 1 != blockIndex) {
1246+
valuesData.seek(indexReader.get(blockIndex));
1247+
}
1248+
currentBlockIndex = blockIndex;
1249+
if (maxOrd >= 0) {
1250+
decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd);
1251+
} else {
1252+
decoder.decode(valuesData, currentBlock);
11981253
}
11991254
return currentBlock[blockInIndex];
12001255
}
12011256

12021257
@Override
1203-
public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
1258+
public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
12041259
assert maxOrd == -1 : "unexpected maxOrd[" + maxOrd + "]";
12051260
final int docsCount = docs.count();
12061261
doc = docs.get(docsCount - 1);
@@ -1238,6 +1293,32 @@ public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs
12381293
}
12391294
}
12401295

1296+
@Override
1297+
long lookAheadValueAt(int targetDoc) throws IOException {
1298+
final int blockIndex = targetDoc >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT;
1299+
final int valueIndex = targetDoc & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK;
1300+
if (blockIndex == currentBlockIndex) {
1301+
return currentBlock[valueIndex];
1302+
}
1303+
// load data to the lookahead block
1304+
if (lookaheadBlockIndex != blockIndex) {
1305+
if (lookaheadBlock == null) {
1306+
lookaheadBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
1307+
lookaheadData = data.slice("look_ahead_values", entry.valuesOffset, entry.valuesLength);
1308+
}
1309+
if (lookaheadBlockIndex + 1 != blockIndex) {
1310+
lookaheadData.seek(indexReader.get(blockIndex));
1311+
}
1312+
if (maxOrd == -1L) {
1313+
decoder.decode(lookaheadData, lookaheadBlock);
1314+
} else {
1315+
decoder.decodeOrdinals(lookaheadData, lookaheadBlock, bitsPerOrd);
1316+
}
1317+
lookaheadBlockIndex = blockIndex;
1318+
}
1319+
return lookaheadBlock[valueIndex];
1320+
}
1321+
12411322
static boolean isDense(int firstDocId, int lastDocId, int length) {
12421323
// This does not detect duplicate docids (e.g [1, 1, 2, 4] would be detected as dense),
12431324
// this can happen with enrich or lookup. However this codec isn't used for enrich / lookup.

server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import org.apache.lucene.util.BytesRef;
2323
import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
2424
import org.elasticsearch.index.IndexVersion;
25-
import org.elasticsearch.index.codec.tsdb.es819.BulkNumericDocValues;
2625
import org.elasticsearch.index.mapper.BlockLoader.BlockFactory;
2726
import org.elasticsearch.index.mapper.BlockLoader.BooleanBuilder;
2827
import org.elasticsearch.index.mapper.BlockLoader.Builder;
@@ -133,8 +132,11 @@ static class SingletonLongs extends BlockDocValuesReader {
133132

134133
@Override
135134
public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throws IOException {
136-
if (numericDocValues instanceof BulkNumericDocValues bulkDv) {
137-
return bulkDv.read(factory, docs, offset);
135+
if (numericDocValues instanceof BlockLoader.OptionalColumnAtATimeReader direct) {
136+
BlockLoader.Block result = direct.tryRead(factory, docs, offset);
137+
if (result != null) {
138+
return result;
139+
}
138140
}
139141
try (BlockLoader.LongBuilder builder = factory.longsFromDocValues(docs.count() - offset)) {
140142
int lastDoc = -1;
@@ -748,6 +750,12 @@ public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throw
748750
if (docs.count() - offset == 1) {
749751
return readSingleDoc(factory, docs.get(offset));
750752
}
753+
if (ordinals instanceof BlockLoader.OptionalColumnAtATimeReader direct) {
754+
BlockLoader.Block block = direct.tryRead(factory, docs, offset);
755+
if (block != null) {
756+
return block;
757+
}
758+
}
751759
try (var builder = factory.singletonOrdinalsBuilder(ordinals, docs.count() - offset)) {
752760
for (int i = offset; i < docs.count(); i++) {
753761
int doc = docs.get(i);

server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import org.apache.lucene.index.SortedDocValues;
1414
import org.apache.lucene.index.SortedSetDocValues;
1515
import org.apache.lucene.util.BytesRef;
16+
import org.elasticsearch.core.Nullable;
1617
import org.elasticsearch.core.Releasable;
1718
import org.elasticsearch.search.fetch.StoredFieldsSpec;
1819
import org.elasticsearch.search.lookup.Source;
@@ -46,6 +47,22 @@ interface ColumnAtATimeReader extends Reader {
4647
BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throws IOException;
4748
}
4849

50+
/**
51+
* An interface for readers that attempt to load all document values in a column-at-a-time fashion.
52+
* <p>
53+
* Unlike {@link ColumnAtATimeReader}, implementations may return {@code null} if they are unable
54+
* to load the requested values, for example due to unsupported underlying data.
55+
* This allows callers to optimistically try optimized loading strategies first, and fall back if necessary.
56+
*/
57+
interface OptionalColumnAtATimeReader {
58+
/**
59+
* Attempts to read the values of all documents in {@code docs}
60+
* Returns {@code null} if unable to load the values.
61+
*/
62+
@Nullable
63+
BlockLoader.Block tryRead(BlockFactory factory, Docs docs, int offset) throws IOException;
64+
}
65+
4966
interface RowStrideReader extends Reader {
5067
/**
5168
* Reads the values of the given document into the builder.
@@ -549,6 +566,5 @@ interface AggregateMetricDoubleBuilder extends Builder {
549566
DoubleBuilder sum();
550567

551568
IntBuilder count();
552-
553569
}
554570
}

0 commit comments

Comments
 (0)