Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.common.breaker.NoopCircuitBreaker;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.compute.data.BlockFactory;
import org.elasticsearch.compute.data.BytesRefBlock;
Expand All @@ -50,6 +51,7 @@
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.xpack.esql.plugin.EsqlPlugin;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
Expand Down Expand Up @@ -335,7 +337,7 @@ public void benchmark() {
fields(name),
List.of(new ValuesSourceReaderOperator.ShardContext(reader, () -> {
throw new UnsupportedOperationException("can't load _source here");
})),
}, EsqlPlugin.STORED_FIELDS_SEQUENTIAL_PROPORTION.getDefault(Settings.EMPTY))),
0
);
long sum = 0;
Expand Down
5 changes: 5 additions & 0 deletions docs/changelog/127348.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 127348
summary: Speed loading stored fields
area: ES|QL
type: enhancement
issues: []
7 changes: 6 additions & 1 deletion docs/reference/elasticsearch/index-settings/index-modules.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ $$$index-codec$$$ `index.codec`

$$$index-mode-setting$$$ `index.mode`
: The `index.mode` setting is used to control settings applied in specific domains like ingestion of time series data or logs. Different mutually exclusive modes exist, which are used to apply settings or default values controlling indexing of documents, sorting and other parameters whose value affects indexing or query performance.

**Example**

```console
Expand Down Expand Up @@ -248,3 +248,8 @@ $$$index-final-pipeline$$$

$$$index-hidden$$$ `index.hidden`
: Indicates whether the index should be hidden by default. Hidden indices are not returned by default when using a wildcard expression. This behavior is controlled per request through the use of the `expand_wildcards` parameter. Possible values are `true` and `false` (default).

$$$index-esql-stored-fields-sequential-proportion$$$

`index.esql.stored_fields_sequential_proportion`
: Tuning parameter for deciding when {{esql}} will load [Stored fields](/reference/elasticsearch/rest-apis/retrieve-selected-fields.md#stored-fields) using a strategy tuned for loading dense sequence of documents. Allows values between 0.0 and 1.0 and defaults to 0.2. Indices with documents smaller than 10kb may see speed improvements loading `text` fields by setting this lower.
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ public String describe() {
*/
public record FieldInfo(String name, ElementType type, IntFunction<BlockLoader> blockLoader) {}

public record ShardContext(IndexReader reader, Supplier<SourceLoader> newSourceLoader) {}
public record ShardContext(IndexReader reader, Supplier<SourceLoader> newSourceLoader, double storedFieldsSequentialProportion) {}

private final FieldWork[] fields;
private final List<ShardContext> shardContexts;
Expand Down Expand Up @@ -247,8 +247,9 @@ private void loadFromSingleLeaf(Block[] blocks, int shard, int segment, BlockLoa
}

SourceLoader sourceLoader = null;
ShardContext shardContext = shardContexts.get(shard);
if (storedFieldsSpec.requiresSource()) {
sourceLoader = shardContexts.get(shard).newSourceLoader.get();
sourceLoader = shardContext.newSourceLoader.get();
storedFieldsSpec = storedFieldsSpec.merge(new StoredFieldsSpec(true, false, sourceLoader.requiredStoredFields()));
}

Expand All @@ -261,7 +262,7 @@ private void loadFromSingleLeaf(Block[] blocks, int shard, int segment, BlockLoa
);
}
StoredFieldLoader storedFieldLoader;
if (useSequentialStoredFieldsReader(docs)) {
if (useSequentialStoredFieldsReader(docs, shardContext.storedFieldsSequentialProportion())) {
storedFieldLoader = StoredFieldLoader.fromSpecSequential(storedFieldsSpec);
trackStoredFields(storedFieldsSpec, true);
} else {
Expand Down Expand Up @@ -438,9 +439,13 @@ public void close() {
* Is it more efficient to use a sequential stored field reader
* when reading stored fields for the documents contained in {@code docIds}?
*/
private boolean useSequentialStoredFieldsReader(BlockLoader.Docs docs) {
private boolean useSequentialStoredFieldsReader(BlockLoader.Docs docs, double storedFieldsSequentialProportion) {
int count = docs.count();
return count >= SEQUENTIAL_BOUNDARY && docs.get(count - 1) - docs.get(0) == count - 1;
if (count < SEQUENTIAL_BOUNDARY) {
return false;
}
int range = docs.get(count - 1) - docs.get(0);
return range * storedFieldsSequentialProportion <= count;
}

private void trackStoredFields(StoredFieldsSpec spec, boolean sequential) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ public String toString() {
operators.add(
new OrdinalsGroupingOperator(
shardIdx -> new KeywordFieldMapper.KeywordFieldType("g").blockLoader(mockBlContext()),
List.of(new ValuesSourceReaderOperator.ShardContext(reader, () -> SourceLoader.FROM_STORED_SOURCE)),
List.of(new ValuesSourceReaderOperator.ShardContext(reader, () -> SourceLoader.FROM_STORED_SOURCE, 0.2)),
ElementType.BYTES_REF,
0,
gField,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ private List<Page> runQuery(Set<String> values, Query query, boolean shuffleDocs
),
List.of(new ValuesSourceReaderOperator.ShardContext(reader, () -> {
throw new UnsupportedOperationException();
})),
}, 0.2)),
0
)
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ private MapperService mapperService(String indexKey) {
private List<ValuesSourceReaderOperator.ShardContext> initShardContexts() {
return INDICES.keySet()
.stream()
.map(index -> new ValuesSourceReaderOperator.ShardContext(reader(index), () -> SourceLoader.FROM_STORED_SOURCE))
.map(index -> new ValuesSourceReaderOperator.ShardContext(reader(index), () -> SourceLoader.FROM_STORED_SOURCE, 0.2))
.toList();
}

Expand Down Expand Up @@ -1297,7 +1297,7 @@ public void testWithNulls() throws IOException {
LuceneOperator.NO_LIMIT,
false // no scoring
);
var vsShardContext = new ValuesSourceReaderOperator.ShardContext(reader(indexKey), () -> SourceLoader.FROM_STORED_SOURCE);
var vsShardContext = new ValuesSourceReaderOperator.ShardContext(reader(indexKey), () -> SourceLoader.FROM_STORED_SOURCE, 0.2);
try (
Driver driver = TestDriverFactory.create(
driverContext,
Expand Down Expand Up @@ -1415,7 +1415,7 @@ public void testDescriptionOfMany() throws IOException {

ValuesSourceReaderOperator.Factory factory = new ValuesSourceReaderOperator.Factory(
cases.stream().map(c -> c.info).toList(),
List.of(new ValuesSourceReaderOperator.ShardContext(reader(indexKey), () -> SourceLoader.FROM_STORED_SOURCE)),
List.of(new ValuesSourceReaderOperator.ShardContext(reader(indexKey), () -> SourceLoader.FROM_STORED_SOURCE, 0.2)),
0
);
assertThat(factory.describe(), equalTo("ValuesSourceReaderOperator[fields = [" + cases.size() + " fields]]"));
Expand Down Expand Up @@ -1443,7 +1443,9 @@ public void testManyShards() throws IOException {
List<ValuesSourceReaderOperator.ShardContext> readerShardContexts = new ArrayList<>();
for (int s = 0; s < shardCount; s++) {
contexts.add(new LuceneSourceOperatorTests.MockShardContext(readers[s], s));
readerShardContexts.add(new ValuesSourceReaderOperator.ShardContext(readers[s], () -> SourceLoader.FROM_STORED_SOURCE));
readerShardContexts.add(
new ValuesSourceReaderOperator.ShardContext(readers[s], () -> SourceLoader.FROM_STORED_SOURCE, 0.2)
);
}
var luceneFactory = new LuceneSourceOperator.Factory(
contexts,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ public class ValuesSourceReaderOperatorTests extends OperatorTestCase {
{ false, true, true },
{ false, false, true, true } };

static final double STORED_FIELDS_SEQUENTIAL_PROPORTIONS = 0.2;

private Directory directory = newDirectory();
private MapperService mapperService;
private IndexReader reader;
Expand Down Expand Up @@ -147,7 +149,16 @@ static Operator.OperatorFactory factory(IndexReader reader, String name, Element
fail("unexpected shardIdx [" + shardIdx + "]");
}
return loader;
})), List.of(new ValuesSourceReaderOperator.ShardContext(reader, () -> SourceLoader.FROM_STORED_SOURCE)), 0);
})),
List.of(
new ValuesSourceReaderOperator.ShardContext(
reader,
() -> SourceLoader.FROM_STORED_SOURCE,
STORED_FIELDS_SEQUENTIAL_PROPORTIONS
)
),
0
);
}

@Override
Expand Down Expand Up @@ -443,7 +454,13 @@ public void testManySingleDocPages() {
operators.add(
new ValuesSourceReaderOperator.Factory(
List.of(testCase.info, fieldInfo(mapperService.fieldType("key"), ElementType.INT)),
List.of(new ValuesSourceReaderOperator.ShardContext(reader, () -> SourceLoader.FROM_STORED_SOURCE)),
List.of(
new ValuesSourceReaderOperator.ShardContext(
reader,
() -> SourceLoader.FROM_STORED_SOURCE,
STORED_FIELDS_SEQUENTIAL_PROPORTIONS
)
),
0
).get(driverContext)
);
Expand Down Expand Up @@ -549,7 +566,13 @@ private void loadSimpleAndAssert(
operators.add(
new ValuesSourceReaderOperator.Factory(
List.of(fieldInfo(mapperService.fieldType("key"), ElementType.INT)),
List.of(new ValuesSourceReaderOperator.ShardContext(reader, () -> SourceLoader.FROM_STORED_SOURCE)),
List.of(
new ValuesSourceReaderOperator.ShardContext(
reader,
() -> SourceLoader.FROM_STORED_SOURCE,
STORED_FIELDS_SEQUENTIAL_PROPORTIONS
)
),
0
).get(driverContext)
);
Expand All @@ -561,7 +584,13 @@ private void loadSimpleAndAssert(
operators.add(
new ValuesSourceReaderOperator.Factory(
b.stream().map(i -> i.info).toList(),
List.of(new ValuesSourceReaderOperator.ShardContext(reader, () -> SourceLoader.FROM_STORED_SOURCE)),
List.of(
new ValuesSourceReaderOperator.ShardContext(
reader,
() -> SourceLoader.FROM_STORED_SOURCE,
STORED_FIELDS_SEQUENTIAL_PROPORTIONS
)
),
0
).get(driverContext)
);
Expand Down Expand Up @@ -651,7 +680,13 @@ private void testLoadAllStatus(boolean allInOnePage) {
.map(
i -> new ValuesSourceReaderOperator.Factory(
List.of(i.info),
List.of(new ValuesSourceReaderOperator.ShardContext(reader, () -> SourceLoader.FROM_STORED_SOURCE)),
List.of(
new ValuesSourceReaderOperator.ShardContext(
reader,
() -> SourceLoader.FROM_STORED_SOURCE,
STORED_FIELDS_SEQUENTIAL_PROPORTIONS
)
),
0
).get(driverContext)
)
Expand Down Expand Up @@ -1417,7 +1452,13 @@ public void testNullsShared() {
new ValuesSourceReaderOperator.FieldInfo("null1", ElementType.NULL, shardIdx -> BlockLoader.CONSTANT_NULLS),
new ValuesSourceReaderOperator.FieldInfo("null2", ElementType.NULL, shardIdx -> BlockLoader.CONSTANT_NULLS)
),
List.of(new ValuesSourceReaderOperator.ShardContext(reader, () -> SourceLoader.FROM_STORED_SOURCE)),
List.of(
new ValuesSourceReaderOperator.ShardContext(
reader,
() -> SourceLoader.FROM_STORED_SOURCE,
STORED_FIELDS_SEQUENTIAL_PROPORTIONS
)
),
0
).get(driverContext)
),
Expand Down Expand Up @@ -1462,7 +1503,13 @@ private void testSequentialStoredFields(boolean sequential, int docCount) throws
fieldInfo(mapperService.fieldType("key"), ElementType.INT),
fieldInfo(storedTextField("stored_text"), ElementType.BYTES_REF)
),
List.of(new ValuesSourceReaderOperator.ShardContext(reader, () -> SourceLoader.FROM_STORED_SOURCE)),
List.of(
new ValuesSourceReaderOperator.ShardContext(
reader,
() -> SourceLoader.FROM_STORED_SOURCE,
STORED_FIELDS_SEQUENTIAL_PROPORTIONS
)
),
0
).get(driverContext);
List<Page> results = drive(op, source.iterator(), driverContext);
Expand Down Expand Up @@ -1490,7 +1537,13 @@ public void testDescriptionOfMany() throws IOException {

ValuesSourceReaderOperator.Factory factory = new ValuesSourceReaderOperator.Factory(
cases.stream().map(c -> c.info).toList(),
List.of(new ValuesSourceReaderOperator.ShardContext(reader, () -> SourceLoader.FROM_STORED_SOURCE)),
List.of(
new ValuesSourceReaderOperator.ShardContext(
reader,
() -> SourceLoader.FROM_STORED_SOURCE,
STORED_FIELDS_SEQUENTIAL_PROPORTIONS
)
),
0
);
assertThat(factory.describe(), equalTo("ValuesSourceReaderOperator[fields = [" + cases.size() + " fields]]"));
Expand All @@ -1517,7 +1570,13 @@ public void testManyShards() throws IOException {
List<ValuesSourceReaderOperator.ShardContext> readerShardContexts = new ArrayList<>();
for (int s = 0; s < shardCount; s++) {
contexts.add(new LuceneSourceOperatorTests.MockShardContext(readers[s], s));
readerShardContexts.add(new ValuesSourceReaderOperator.ShardContext(readers[s], () -> SourceLoader.FROM_STORED_SOURCE));
readerShardContexts.add(
new ValuesSourceReaderOperator.ShardContext(
readers[s],
() -> SourceLoader.FROM_STORED_SOURCE,
STORED_FIELDS_SEQUENTIAL_PROPORTIONS
)
);
}
var luceneFactory = new LuceneSourceOperator.Factory(
contexts,
Expand Down
Loading
Loading