Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/132950.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 132950
summary: Speed up loading keyword fields with index sorts
area: "ES|QL"
type: enhancement
issues: []

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@

final class ES819TSDBDocValuesProducer extends DocValuesProducer {
final IntObjectHashMap<NumericEntry> numerics;
private int primarySortFieldNumber = -1;
final IntObjectHashMap<BinaryEntry> binaries;
final IntObjectHashMap<SortedEntry> sorted;
final IntObjectHashMap<SortedSetEntry> sortedSets;
Expand Down Expand Up @@ -91,7 +92,14 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer {
);

readFields(in, state.fieldInfos);

final var indexSort = state.segmentInfo.getIndexSort();
if (indexSort != null && indexSort.getSort().length > 0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 - good idea to extract this information from segment info, no need to check for index setting.

var primarySortField = indexSort.getSort()[0];
var sortField = state.fieldInfos.fieldInfo(primarySortField.getField());
if (sortField != null) {
primarySortFieldNumber = sortField.number;
}
}
} catch (Throwable exception) {
priorE = exception;
} finally {
Expand Down Expand Up @@ -333,10 +341,10 @@ public boolean advanceExact(int target) throws IOException {
@Override
public SortedDocValues getSorted(FieldInfo field) throws IOException {
SortedEntry entry = sorted.get(field.number);
return getSorted(entry);
return getSorted(entry, field.number == primarySortFieldNumber);
}

private SortedDocValues getSorted(SortedEntry entry) throws IOException {
private SortedDocValues getSorted(SortedEntry entry, boolean valuesSorted) throws IOException {
final NumericDocValues ords = getNumeric(entry.ordsEntry, entry.termsDictEntry.termsDictSize);
return new BaseSortedDocValues(entry) {

Expand Down Expand Up @@ -369,10 +377,29 @@ public int advance(int target) throws IOException {
public long cost() {
return ords.cost();
}

@Override
public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
if (valuesSorted && ords instanceof BaseDenseNumericValues denseOrds) {
int firstDoc = docs.get(offset);
denseOrds.advanceExact(firstDoc);
long startValue = denseOrds.longValue();
final int docCount = docs.count();
int lastDoc = docs.get(docCount - 1);
long lastValue = denseOrds.lookAheadValueAt(lastDoc);
if (lastValue == startValue) {
BytesRef b = lookupOrd(Math.toIntExact(startValue));
return factory.constantBytes(BytesRef.deepCopyOf(b), docCount - offset);
}
// TODO: Since ordinals are sorted, start at 0 (offset by startValue), scan until lastValue,
// then fill remaining positions with lastValue.
}
return null;
}
};
}

abstract class BaseSortedDocValues extends SortedDocValues {
abstract class BaseSortedDocValues extends SortedDocValues implements BlockLoader.OptionalColumnAtATimeReader {

final SortedEntry entry;
final TermsEnum termsEnum;
Expand Down Expand Up @@ -406,6 +433,15 @@ public int lookupTerm(BytesRef key) throws IOException {
public TermsEnum termsEnum() throws IOException {
return new TermsDict(entry.termsDictEntry, data, merging);
}

@Override
public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
return null;
}
}

abstract static class BaseDenseNumericValues extends NumericDocValues implements BlockLoader.OptionalColumnAtATimeReader {
abstract long lookAheadValueAt(int targetDoc) throws IOException;
}

abstract static class BaseSortedSetDocValues extends SortedSetDocValues {
Expand Down Expand Up @@ -695,7 +731,7 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOExcepti
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
SortedSetEntry entry = sortedSets.get(field.number);
if (entry.singleValueEntry != null) {
return DocValues.singleton(getSorted(entry.singleValueEntry));
return DocValues.singleton(getSorted(entry.singleValueEntry, field.number == primarySortFieldNumber));
}

SortedNumericEntry ordsEntry = entry.ordsEntry;
Expand Down Expand Up @@ -1047,7 +1083,7 @@ private NumericDocValues getNumeric(NumericEntry entry, long maxOrd) throws IOEx
// Special case for maxOrd 1, no need to read blocks and use ordinal 0 as only value
if (entry.docsWithFieldOffset == -1) {
// Special case when all docs have a value
return new NumericDocValues() {
return new BaseDenseNumericValues() {

private final int maxDoc = ES819TSDBDocValuesProducer.this.maxDoc;
private int doc = -1;
Expand Down Expand Up @@ -1086,6 +1122,17 @@ public boolean advanceExact(int target) {
public long cost() {
return maxDoc;
}

@Override
long lookAheadValueAt(int targetDoc) throws IOException {
return 0L; // Only one ordinal!
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

easy one :)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the test found it :)

}

@Override
public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset)
throws IOException {
return null;
}
};
} else {
final IndexedDISI disi = new IndexedDISI(
Expand Down Expand Up @@ -1141,13 +1188,17 @@ public long longValue() {
final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1;
if (entry.docsWithFieldOffset == -1) {
// dense
return new BulkNumericDocValues() {
return new BaseDenseNumericValues() {

private final int maxDoc = ES819TSDBDocValuesProducer.this.maxDoc;
private int doc = -1;
private final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
private long currentBlockIndex = -1;
private final long[] currentBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
// lookahead block
private long lookaheadBlockIndex = -1;
private long[] lookaheadBlock;
private IndexInput lookaheadData = null;

@Override
public int docID() {
Expand Down Expand Up @@ -1183,24 +1234,28 @@ public long longValue() throws IOException {
final int index = doc;
final int blockIndex = index >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT;
final int blockInIndex = index & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK;
if (blockIndex != currentBlockIndex) {
assert blockIndex > currentBlockIndex : blockIndex + " < " + currentBlockIndex;
// no need to seek if the loading block is the next block
if (currentBlockIndex + 1 != blockIndex) {
valuesData.seek(indexReader.get(blockIndex));
}
currentBlockIndex = blockIndex;
if (maxOrd >= 0) {
decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd);
} else {
decoder.decode(valuesData, currentBlock);
}
if (blockIndex == currentBlockIndex) {
return currentBlock[blockInIndex];
}
if (blockIndex == lookaheadBlockIndex) {
return lookaheadBlock[blockInIndex];
}
assert blockIndex > currentBlockIndex : blockIndex + " < " + currentBlockIndex;
// no need to seek if the loading block is the next block
if (currentBlockIndex + 1 != blockIndex) {
valuesData.seek(indexReader.get(blockIndex));
}
currentBlockIndex = blockIndex;
if (maxOrd >= 0) {
decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd);
} else {
decoder.decode(valuesData, currentBlock);
}
return currentBlock[blockInIndex];
}

@Override
public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
assert maxOrd == -1 : "unexpected maxOrd[" + maxOrd + "]";
final int docsCount = docs.count();
doc = docs.get(docsCount - 1);
Expand Down Expand Up @@ -1238,6 +1293,32 @@ public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs
}
}

@Override
long lookAheadValueAt(int targetDoc) throws IOException {
final int blockIndex = targetDoc >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT;
final int valueIndex = targetDoc & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK;
if (blockIndex == currentBlockIndex) {
return currentBlock[valueIndex];
}
// load data to the lookahead block
if (lookaheadBlockIndex != blockIndex) {
if (lookaheadBlock == null) {
lookaheadBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
lookaheadData = data.slice("look_ahead_values", entry.valuesOffset, entry.valuesLength);
}
if (lookaheadBlockIndex + 1 != blockIndex) {
lookaheadData.seek(indexReader.get(blockIndex));
}
if (maxOrd == -1L) {
decoder.decode(lookaheadData, lookaheadBlock);
} else {
decoder.decodeOrdinals(lookaheadData, lookaheadBlock, bitsPerOrd);
}
lookaheadBlockIndex = blockIndex;
}
return lookaheadBlock[valueIndex];
}

static boolean isDense(int firstDocId, int lastDocId, int length) {
// This does not detect duplicate docids (e.g [1, 1, 2, 4] would be detected as dense),
// this can happen with enrich or lookup. However this codec isn't used for enrich / lookup.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.codec.tsdb.es819.BulkNumericDocValues;
import org.elasticsearch.index.mapper.BlockLoader.BlockFactory;
import org.elasticsearch.index.mapper.BlockLoader.BooleanBuilder;
import org.elasticsearch.index.mapper.BlockLoader.Builder;
Expand Down Expand Up @@ -133,8 +132,11 @@ static class SingletonLongs extends BlockDocValuesReader {

@Override
public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throws IOException {
if (numericDocValues instanceof BulkNumericDocValues bulkDv) {
return bulkDv.read(factory, docs, offset);
if (numericDocValues instanceof BlockLoader.OptionalColumnAtATimeReader direct) {
BlockLoader.Block result = direct.tryRead(factory, docs, offset);
if (result != null) {
return result;
}
}
try (BlockLoader.LongBuilder builder = factory.longsFromDocValues(docs.count() - offset)) {
int lastDoc = -1;
Expand Down Expand Up @@ -748,6 +750,12 @@ public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throw
if (docs.count() - offset == 1) {
return readSingleDoc(factory, docs.get(offset));
}
if (ordinals instanceof BlockLoader.OptionalColumnAtATimeReader direct) {
BlockLoader.Block block = direct.tryRead(factory, docs, offset);
if (block != null) {
return block;
}
}
try (var builder = factory.singletonOrdinalsBuilder(ordinals, docs.count() - offset)) {
for (int i = offset; i < docs.count(); i++) {
int doc = docs.get(i);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.Releasable;
import org.elasticsearch.search.fetch.StoredFieldsSpec;
import org.elasticsearch.search.lookup.Source;
Expand Down Expand Up @@ -46,6 +47,22 @@ interface ColumnAtATimeReader extends Reader {
BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throws IOException;
}

/**
* An interface for readers that attempt to load all document values in a column-at-a-time fashion.
* <p>
* Unlike {@link ColumnAtATimeReader}, implementations may return {@code null} if they are unable
* to load the requested values, for example due to unsupported underlying data.
* This allows callers to optimistically try optimized loading strategies first, and fall back if necessary.
*/
interface OptionalColumnAtATimeReader {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 This is better than previous abstraction (BulkNumericDocValues).

/**
* Attempts to read the values of all documents in {@code docs}
* Returns {@code null} if unable to load the values.
*/
@Nullable
BlockLoader.Block tryRead(BlockFactory factory, Docs docs, int offset) throws IOException;
}

interface RowStrideReader extends Reader {
/**
* Reads the values of the given document into the builder.
Expand Down Expand Up @@ -549,6 +566,5 @@ interface AggregateMetricDoubleBuilder extends Builder {
DoubleBuilder sum();

IntBuilder count();

}
}
Loading