Skip to content

Commit 503b524

Browse files
committed
Speed up loading keyword fields with index sorts
1 parent d557309 commit 503b524

File tree

10 files changed

+182
-94
lines changed

10 files changed

+182
-94
lines changed

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/BulkNumericDocValues.java

Lines changed: 0 additions & 27 deletions
This file was deleted.

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java

Lines changed: 86 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353

5454
final class ES819TSDBDocValuesProducer extends DocValuesProducer {
5555
final IntObjectHashMap<NumericEntry> numerics;
56+
private int primarySortFieldNumber = -1;
5657
final IntObjectHashMap<BinaryEntry> binaries;
5758
final IntObjectHashMap<SortedEntry> sorted;
5859
final IntObjectHashMap<SortedSetEntry> sortedSets;
@@ -91,7 +92,11 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer {
9192
);
9293

9394
readFields(in, state.fieldInfos);
94-
95+
final var indexSort = state.segmentInfo.getIndexSort();
96+
if (indexSort != null) {
97+
var primaryField = indexSort.getSort()[0];
98+
primarySortFieldNumber = state.fieldInfos.fieldInfo(primaryField.getField()).number;
99+
}
95100
} catch (Throwable exception) {
96101
priorE = exception;
97102
} finally {
@@ -333,10 +338,10 @@ public boolean advanceExact(int target) throws IOException {
333338
@Override
334339
public SortedDocValues getSorted(FieldInfo field) throws IOException {
335340
SortedEntry entry = sorted.get(field.number);
336-
return getSorted(entry);
341+
return getSorted(entry, field.number == primarySortFieldNumber);
337342
}
338343

339-
private SortedDocValues getSorted(SortedEntry entry) throws IOException {
344+
private SortedDocValues getSorted(SortedEntry entry, boolean valuesSorted) throws IOException {
340345
final NumericDocValues ords = getNumeric(entry.ordsEntry, entry.termsDictEntry.termsDictSize);
341346
return new BaseSortedDocValues(entry) {
342347

@@ -369,10 +374,29 @@ public int advance(int target) throws IOException {
369374
public long cost() {
370375
return ords.cost();
371376
}
377+
378+
@Override
379+
public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
380+
if (valuesSorted && ords instanceof BaseDenseNumericValues denseOrds) {
381+
int firstDoc = docs.get(offset);
382+
denseOrds.advanceExact(firstDoc);
383+
long startValue = denseOrds.longValue();
384+
final int docCount = docs.count();
385+
int lastDoc = docs.get(docCount - 1);
386+
long lastValue = denseOrds.lookAheadValueAt(lastDoc);
387+
if (lastValue == startValue) {
388+
BytesRef b = lookupOrd(Math.toIntExact(startValue));
389+
return factory.constantBytes(BytesRef.deepCopyOf(b), docCount - offset);
390+
}
391+
// TODO: Since ordinals are sorted, start at 0 (offset by startValue), scan until lastValue,
392+
// then fill remaining positions with lastValue.
393+
}
394+
return null;
395+
}
372396
};
373397
}
374398

375-
abstract class BaseSortedDocValues extends SortedDocValues {
399+
abstract class BaseSortedDocValues extends SortedDocValues implements BlockLoader.OptionalColumnAtATimeReader {
376400

377401
final SortedEntry entry;
378402
final TermsEnum termsEnum;
@@ -406,6 +430,15 @@ public int lookupTerm(BytesRef key) throws IOException {
406430
public TermsEnum termsEnum() throws IOException {
407431
return new TermsDict(entry.termsDictEntry, data, merging);
408432
}
433+
434+
@Override
435+
public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
436+
return null;
437+
}
438+
}
439+
440+
abstract static class BaseDenseNumericValues extends NumericDocValues implements BlockLoader.OptionalColumnAtATimeReader {
441+
abstract long lookAheadValueAt(int targetDoc) throws IOException;
409442
}
410443

411444
abstract static class BaseSortedSetDocValues extends SortedSetDocValues {
@@ -695,7 +728,7 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOExcepti
695728
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
696729
SortedSetEntry entry = sortedSets.get(field.number);
697730
if (entry.singleValueEntry != null) {
698-
return DocValues.singleton(getSorted(entry.singleValueEntry));
731+
return DocValues.singleton(getSorted(entry.singleValueEntry, field.number == primarySortFieldNumber));
699732
}
700733

701734
SortedNumericEntry ordsEntry = entry.ordsEntry;
@@ -1141,13 +1174,17 @@ public long longValue() {
11411174
final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1;
11421175
if (entry.docsWithFieldOffset == -1) {
11431176
// dense
1144-
return new BulkNumericDocValues() {
1177+
return new BaseDenseNumericValues() {
11451178

11461179
private final int maxDoc = ES819TSDBDocValuesProducer.this.maxDoc;
11471180
private int doc = -1;
11481181
private final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
11491182
private long currentBlockIndex = -1;
11501183
private final long[] currentBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
1184+
// lookahead block
1185+
private long lookaheadBlockIndex = -1;
1186+
private long[] lookaheadBlock;
1187+
private IndexInput lookaheadData = null;
11511188

11521189
@Override
11531190
public int docID() {
@@ -1183,24 +1220,28 @@ public long longValue() throws IOException {
11831220
final int index = doc;
11841221
final int blockIndex = index >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT;
11851222
final int blockInIndex = index & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK;
1186-
if (blockIndex != currentBlockIndex) {
1187-
assert blockIndex > currentBlockIndex : blockIndex + " < " + currentBlockIndex;
1188-
// no need to seek if the loading block is the next block
1189-
if (currentBlockIndex + 1 != blockIndex) {
1190-
valuesData.seek(indexReader.get(blockIndex));
1191-
}
1192-
currentBlockIndex = blockIndex;
1193-
if (maxOrd >= 0) {
1194-
decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd);
1195-
} else {
1196-
decoder.decode(valuesData, currentBlock);
1197-
}
1223+
if (blockIndex == currentBlockIndex) {
1224+
return currentBlock[blockInIndex];
1225+
}
1226+
if (blockIndex == lookaheadBlockIndex) {
1227+
return lookaheadBlock[blockInIndex];
1228+
}
1229+
assert blockIndex > currentBlockIndex : blockIndex + " < " + currentBlockIndex;
1230+
// no need to seek if the loading block is the next block
1231+
if (currentBlockIndex + 1 != blockIndex) {
1232+
valuesData.seek(indexReader.get(blockIndex));
1233+
}
1234+
currentBlockIndex = blockIndex;
1235+
if (maxOrd >= 0) {
1236+
decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd);
1237+
} else {
1238+
decoder.decode(valuesData, currentBlock);
11981239
}
11991240
return currentBlock[blockInIndex];
12001241
}
12011242

12021243
@Override
1203-
public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
1244+
public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
12041245
assert maxOrd == -1 : "unexpected maxOrd[" + maxOrd + "]";
12051246
final int docsCount = docs.count();
12061247
doc = docs.get(docsCount - 1);
@@ -1238,6 +1279,32 @@ public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs
12381279
}
12391280
}
12401281

1282+
@Override
1283+
long lookAheadValueAt(int targetDoc) throws IOException {
1284+
final int blockIndex = targetDoc >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT;
1285+
final int valueIndex = targetDoc & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK;
1286+
if (blockIndex == currentBlockIndex) {
1287+
return currentBlock[valueIndex];
1288+
}
1289+
// load data to the lookahead block
1290+
if (lookaheadBlockIndex != blockIndex) {
1291+
if (lookaheadBlock == null) {
1292+
lookaheadBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
1293+
lookaheadData = data.slice("look_ahead_values", entry.valuesOffset, entry.valuesLength);
1294+
}
1295+
if (lookaheadBlockIndex + 1 != blockIndex) {
1296+
lookaheadData.seek(indexReader.get(blockIndex));
1297+
}
1298+
if (maxOrd >= 0) {
1299+
decoder.decodeOrdinals(lookaheadData, lookaheadBlock, bitsPerOrd);
1300+
} else {
1301+
decoder.decode(lookaheadData, lookaheadBlock);
1302+
}
1303+
lookaheadBlockIndex = blockIndex;
1304+
}
1305+
return lookaheadBlock[valueIndex];
1306+
}
1307+
12411308
static boolean isDense(int firstDocId, int lastDocId, int length) {
12421309
// This does not detect duplicate docids (e.g [1, 1, 2, 4] would be detected as dense),
12431310
// this can happen with enrich or lookup. However this codec isn't used for enrich / lookup.

server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import org.apache.lucene.util.BytesRef;
2323
import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
2424
import org.elasticsearch.index.IndexVersion;
25-
import org.elasticsearch.index.codec.tsdb.es819.BulkNumericDocValues;
2625
import org.elasticsearch.index.mapper.BlockLoader.BlockFactory;
2726
import org.elasticsearch.index.mapper.BlockLoader.BooleanBuilder;
2827
import org.elasticsearch.index.mapper.BlockLoader.Builder;
@@ -133,8 +132,11 @@ static class SingletonLongs extends BlockDocValuesReader {
133132

134133
@Override
135134
public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throws IOException {
136-
if (numericDocValues instanceof BulkNumericDocValues bulkDv) {
137-
return bulkDv.read(factory, docs, offset);
135+
if (numericDocValues instanceof BlockLoader.OptionalColumnAtATimeReader direct) {
136+
BlockLoader.Block result = direct.tryRead(factory, docs, offset);
137+
if (result != null) {
138+
return result;
139+
}
138140
}
139141
try (BlockLoader.LongBuilder builder = factory.longsFromDocValues(docs.count() - offset)) {
140142
int lastDoc = -1;
@@ -748,6 +750,12 @@ public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throw
748750
if (docs.count() - offset == 1) {
749751
return readSingleDoc(factory, docs.get(offset));
750752
}
753+
if (ordinals instanceof BlockLoader.OptionalColumnAtATimeReader direct) {
754+
BlockLoader.Block block = direct.tryRead(factory, docs, offset);
755+
if (block != null) {
756+
return block;
757+
}
758+
}
751759
try (var builder = factory.singletonOrdinalsBuilder(ordinals, docs.count() - offset)) {
752760
for (int i = offset; i < docs.count(); i++) {
753761
int doc = docs.get(i);

server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import org.apache.lucene.index.SortedDocValues;
1414
import org.apache.lucene.index.SortedSetDocValues;
1515
import org.apache.lucene.util.BytesRef;
16+
import org.elasticsearch.core.Nullable;
1617
import org.elasticsearch.core.Releasable;
1718
import org.elasticsearch.search.fetch.StoredFieldsSpec;
1819
import org.elasticsearch.search.lookup.Source;
@@ -46,6 +47,22 @@ interface ColumnAtATimeReader extends Reader {
4647
BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throws IOException;
4748
}
4849

50+
/**
51+
* An interface for readers that attempt to load all document values in a column-at-a-time fashion.
52+
* <p>
53+
* Unlike {@link ColumnAtATimeReader}, implementations may return {@code null} if they are unable
54+
* to load the requested values, for example due to unsupported underlying data.
55+
* This allows callers to optimistically try optimized loading strategies first, and fall back if necessary.
56+
*/
57+
interface OptionalColumnAtATimeReader {
58+
/**
59+
* Attempts to read the values of all documents in {@code docs}
60+
* Returns {@code null} if unable to load the values.
61+
*/
62+
@Nullable
63+
BlockLoader.Block tryRead(BlockFactory factory, Docs docs, int offset) throws IOException;
64+
}
65+
4966
interface RowStrideReader extends Reader {
5067
/**
5168
* Reads the values of the given document into the builder.
@@ -549,6 +566,5 @@ interface AggregateMetricDoubleBuilder extends Builder {
549566
DoubleBuilder sum();
550567

551568
IntBuilder count();
552-
553569
}
554570
}

0 commit comments

Comments
 (0)