Skip to content

Commit b2ae4bf

Browse files
committed
Speed up loading keyword fields with index sorts
1 parent 2864dd8 commit b2ae4bf

File tree

10 files changed

+180
-94
lines changed

10 files changed

+180
-94
lines changed

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/BulkNumericDocValues.java

Lines changed: 0 additions & 27 deletions
This file was deleted.

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java

Lines changed: 84 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353

5454
final class ES819TSDBDocValuesProducer extends DocValuesProducer {
5555
final IntObjectHashMap<NumericEntry> numerics;
56+
private int primarySortFieldNumber = -1;
5657
final IntObjectHashMap<BinaryEntry> binaries;
5758
final IntObjectHashMap<SortedEntry> sorted;
5859
final IntObjectHashMap<SortedSetEntry> sortedSets;
@@ -91,7 +92,11 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer {
9192
);
9293

9394
readFields(in, state.fieldInfos);
94-
95+
final var indexSort = state.segmentInfo.getIndexSort();
96+
if (indexSort != null) {
97+
var primaryField = indexSort.getSort()[0];
98+
primarySortFieldNumber = state.fieldInfos.fieldInfo(primaryField.getField()).number;
99+
}
95100
} catch (Throwable exception) {
96101
priorE = exception;
97102
} finally {
@@ -333,10 +338,10 @@ public boolean advanceExact(int target) throws IOException {
333338
@Override
334339
public SortedDocValues getSorted(FieldInfo field) throws IOException {
335340
SortedEntry entry = sorted.get(field.number);
336-
return getSorted(entry);
341+
return getSorted(entry, field.number == primarySortFieldNumber);
337342
}
338343

339-
private SortedDocValues getSorted(SortedEntry entry) throws IOException {
344+
private SortedDocValues getSorted(SortedEntry entry, boolean valuesSorted) throws IOException {
340345
final NumericDocValues ords = getNumeric(entry.ordsEntry, entry.termsDictEntry.termsDictSize);
341346
return new BaseSortedDocValues(entry) {
342347

@@ -369,10 +374,27 @@ public int advance(int target) throws IOException {
369374
public long cost() {
370375
return ords.cost();
371376
}
377+
378+
@Override
379+
public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
380+
if (valuesSorted && ords instanceof BaseDenseNumericValues denseOrds) {
381+
int firstDoc = docs.get(offset);
382+
denseOrds.advanceExact(firstDoc);
383+
long startValue = denseOrds.longValue();
384+
final int docCount = docs.count();
385+
int lastDoc = docs.get(docCount - 1);
386+
long lastValue = denseOrds.lookAheadValueAt(lastDoc);
387+
if (lastValue == startValue) {
388+
BytesRef b = lookupOrd(Math.toIntExact(startValue));
389+
return factory.constantBytes(BytesRef.deepCopyOf(b), docCount - offset);
390+
}
391+
}
392+
return null;
393+
}
372394
};
373395
}
374396

375-
abstract class BaseSortedDocValues extends SortedDocValues {
397+
abstract class BaseSortedDocValues extends SortedDocValues implements BlockLoader.OptionalColumnAtATimeReader {
376398

377399
final SortedEntry entry;
378400
final TermsEnum termsEnum;
@@ -406,6 +428,15 @@ public int lookupTerm(BytesRef key) throws IOException {
406428
public TermsEnum termsEnum() throws IOException {
407429
return new TermsDict(entry.termsDictEntry, data, merging);
408430
}
431+
432+
@Override
433+
public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
434+
return null;
435+
}
436+
}
437+
438+
abstract static class BaseDenseNumericValues extends NumericDocValues implements BlockLoader.OptionalColumnAtATimeReader {
439+
abstract long lookAheadValueAt(int targetDoc) throws IOException;
409440
}
410441

411442
abstract static class BaseSortedSetDocValues extends SortedSetDocValues {
@@ -695,7 +726,7 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOExcepti
695726
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
696727
SortedSetEntry entry = sortedSets.get(field.number);
697728
if (entry.singleValueEntry != null) {
698-
return DocValues.singleton(getSorted(entry.singleValueEntry));
729+
return DocValues.singleton(getSorted(entry.singleValueEntry, field.number == primarySortFieldNumber));
699730
}
700731

701732
SortedNumericEntry ordsEntry = entry.ordsEntry;
@@ -1141,13 +1172,17 @@ public long longValue() {
11411172
final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1;
11421173
if (entry.docsWithFieldOffset == -1) {
11431174
// dense
1144-
return new BulkNumericDocValues() {
1175+
return new BaseDenseNumericValues() {
11451176

11461177
private final int maxDoc = ES819TSDBDocValuesProducer.this.maxDoc;
11471178
private int doc = -1;
11481179
private final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
11491180
private long currentBlockIndex = -1;
11501181
private final long[] currentBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
1182+
// lookahead block
1183+
private long lookaheadBlockIndex = -1;
1184+
private long[] lookaheadBlock;
1185+
private IndexInput lookaheadData = null;
11511186

11521187
@Override
11531188
public int docID() {
@@ -1183,24 +1218,28 @@ public long longValue() throws IOException {
11831218
final int index = doc;
11841219
final int blockIndex = index >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT;
11851220
final int blockInIndex = index & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK;
1186-
if (blockIndex != currentBlockIndex) {
1187-
assert blockIndex > currentBlockIndex : blockIndex + " < " + currentBlockIndex;
1188-
// no need to seek if the loading block is the next block
1189-
if (currentBlockIndex + 1 != blockIndex) {
1190-
valuesData.seek(indexReader.get(blockIndex));
1191-
}
1192-
currentBlockIndex = blockIndex;
1193-
if (maxOrd >= 0) {
1194-
decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd);
1195-
} else {
1196-
decoder.decode(valuesData, currentBlock);
1197-
}
1221+
if (blockIndex == currentBlockIndex) {
1222+
return currentBlock[blockInIndex];
1223+
}
1224+
if (blockIndex == lookaheadBlockIndex) {
1225+
return lookaheadBlock[blockInIndex];
1226+
}
1227+
assert blockIndex > currentBlockIndex : blockIndex + " < " + currentBlockIndex;
1228+
// no need to seek if the loading block is the next block
1229+
if (currentBlockIndex + 1 != blockIndex) {
1230+
valuesData.seek(indexReader.get(blockIndex));
1231+
}
1232+
currentBlockIndex = blockIndex;
1233+
if (maxOrd >= 0) {
1234+
decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd);
1235+
} else {
1236+
decoder.decode(valuesData, currentBlock);
11981237
}
11991238
return currentBlock[blockInIndex];
12001239
}
12011240

12021241
@Override
1203-
public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
1242+
public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
12041243
assert maxOrd == -1 : "unexpected maxOrd[" + maxOrd + "]";
12051244
final int docsCount = docs.count();
12061245
doc = docs.get(docsCount - 1);
@@ -1238,6 +1277,32 @@ public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs
12381277
}
12391278
}
12401279

1280+
@Override
1281+
long lookAheadValueAt(int targetDoc) throws IOException {
1282+
final int blockIndex = targetDoc >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT;
1283+
final int valueIndex = targetDoc & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK;
1284+
if (blockIndex == currentBlockIndex) {
1285+
return currentBlock[valueIndex];
1286+
}
1287+
// load data to the lookahead block
1288+
if (lookaheadBlockIndex != blockIndex) {
1289+
if (lookaheadBlock == null) {
1290+
lookaheadBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
1291+
lookaheadData = data.slice("look_ahead_values", entry.valuesOffset, entry.valuesLength);
1292+
}
1293+
if (lookaheadBlockIndex + 1 != blockIndex) {
1294+
lookaheadData.seek(indexReader.get(blockIndex));
1295+
}
1296+
if (maxOrd >= 0) {
1297+
decoder.decodeOrdinals(lookaheadData, lookaheadBlock, bitsPerOrd);
1298+
} else {
1299+
decoder.decode(lookaheadData, lookaheadBlock);
1300+
}
1301+
lookaheadBlockIndex = blockIndex;
1302+
}
1303+
return lookaheadBlock[valueIndex];
1304+
}
1305+
12411306
static boolean isDense(int firstDocId, int lastDocId, int length) {
12421307
// This does not detect duplicate docids (e.g [1, 1, 2, 4] would be detected as dense),
12431308
// this can happen with enrich or lookup. However this codec isn't used for enrich / lookup.

server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import org.apache.lucene.util.BytesRef;
2222
import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
2323
import org.elasticsearch.index.IndexVersion;
24-
import org.elasticsearch.index.codec.tsdb.es819.BulkNumericDocValues;
2524
import org.elasticsearch.index.mapper.BlockLoader.BlockFactory;
2625
import org.elasticsearch.index.mapper.BlockLoader.BooleanBuilder;
2726
import org.elasticsearch.index.mapper.BlockLoader.Builder;
@@ -127,8 +126,11 @@ static class SingletonLongs extends BlockDocValuesReader {
127126

128127
@Override
129128
public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throws IOException {
130-
if (numericDocValues instanceof BulkNumericDocValues bulkDv) {
131-
return bulkDv.read(factory, docs, offset);
129+
if (numericDocValues instanceof BlockLoader.OptionalColumnAtATimeReader direct) {
130+
BlockLoader.Block result = direct.tryRead(factory, docs, offset);
131+
if (result != null) {
132+
return result;
133+
}
132134
}
133135
try (BlockLoader.LongBuilder builder = factory.longsFromDocValues(docs.count() - offset)) {
134136
int lastDoc = -1;
@@ -659,6 +661,12 @@ public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throw
659661
if (docs.count() - offset == 1) {
660662
return readSingleDoc(factory, docs.get(offset));
661663
}
664+
if (ordinals instanceof BlockLoader.OptionalColumnAtATimeReader direct) {
665+
BlockLoader.Block block = direct.tryRead(factory, docs, offset);
666+
if (block != null) {
667+
return block;
668+
}
669+
}
662670
try (var builder = factory.singletonOrdinalsBuilder(ordinals, docs.count() - offset)) {
663671
for (int i = offset; i < docs.count(); i++) {
664672
int doc = docs.get(i);

server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import org.apache.lucene.index.SortedDocValues;
1414
import org.apache.lucene.index.SortedSetDocValues;
1515
import org.apache.lucene.util.BytesRef;
16+
import org.elasticsearch.core.Nullable;
1617
import org.elasticsearch.core.Releasable;
1718
import org.elasticsearch.search.fetch.StoredFieldsSpec;
1819
import org.elasticsearch.search.lookup.Source;
@@ -46,6 +47,22 @@ interface ColumnAtATimeReader extends Reader {
4647
BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throws IOException;
4748
}
4849

50+
/**
51+
* An interface for readers that attempt to load all document values in a column-at-a-time fashion.
52+
* <p>
53+
* Unlike {@link ColumnAtATimeReader}, implementations may return {@code null} if they are unable
54+
* to load the requested values, for example due to unsupported underlying data.
55+
* This allows callers to optimistically try optimized loading strategies first, and fall back if necessary.
56+
*/
57+
interface OptionalColumnAtATimeReader {
58+
/**
59+
* Attempts to read the values of all documents in {@code docs}
60+
* Returns {@code null} if unable to load the values.
61+
*/
62+
@Nullable
63+
BlockLoader.Block tryRead(BlockFactory factory, Docs docs, int offset) throws IOException;
64+
}
65+
4966
interface RowStrideReader extends Reader {
5067
/**
5168
* Reads the values of the given document into the builder.
@@ -549,6 +566,5 @@ interface AggregateMetricDoubleBuilder extends Builder {
549566
DoubleBuilder sum();
550567

551568
IntBuilder count();
552-
553569
}
554570
}

0 commit comments

Comments
 (0)