Skip to content

Commit f532e95

Browse files
committed
Improve reading ignored source in esql
* Parse sub field fields once in IgnoredSourceRowStrideReader instead of for each doc that gets read. * Introduce a dedicated StoredFieldLoader for ignored source (IgnoredSourceFieldLoader). Which optimizes reading stored fields just for ignored source by avoiding relatively expensive set stuff (see CustomFieldsVisitor) and make use aborting loading stored fields (`Status.STOP`) when ignored source is read.
1 parent 96315ec commit f532e95

File tree

3 files changed

+195
-45
lines changed

3 files changed

+195
-45
lines changed
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.fieldvisitor;
11+
12+
import org.apache.lucene.index.FieldInfo;
13+
import org.apache.lucene.index.LeafReader;
14+
import org.apache.lucene.index.LeafReaderContext;
15+
import org.apache.lucene.index.StoredFieldVisitor;
16+
import org.apache.lucene.util.BytesRef;
17+
import org.elasticsearch.common.CheckedBiConsumer;
18+
import org.elasticsearch.common.bytes.BytesReference;
19+
import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader;
20+
import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper;
21+
import org.elasticsearch.search.fetch.StoredFieldsSpec;
22+
23+
import java.io.IOException;
24+
import java.util.ArrayList;
25+
import java.util.List;
26+
import java.util.Map;
27+
28+
class IgnoredSourceFieldLoader extends StoredFieldLoader {
29+
30+
@Override
31+
public LeafStoredFieldLoader getLoader(LeafReaderContext ctx, int[] docs) throws IOException {
32+
var reader = sequentialReader(ctx);
33+
var visitor = new SFV();
34+
return new LeafStoredFieldLoader() {
35+
36+
private int doc = -1;
37+
38+
@Override
39+
public void advanceTo(int doc) throws IOException {
40+
if (doc != this.doc) {
41+
visitor.reset();
42+
reader.accept(doc, visitor);
43+
this.doc = doc;
44+
}
45+
}
46+
47+
@Override
48+
public BytesReference source() {
49+
return null;
50+
}
51+
52+
@Override
53+
public String id() {
54+
return null;
55+
}
56+
57+
@Override
58+
public String routing() {
59+
return null;
60+
}
61+
62+
@Override
63+
public Map<String, List<Object>> storedFields() {
64+
return Map.of(IgnoredSourceFieldMapper.NAME, visitor.values);
65+
}
66+
};
67+
}
68+
69+
@Override
70+
public List<String> fieldsToLoad() {
71+
return List.of(IgnoredSourceFieldMapper.NAME);
72+
}
73+
74+
static class SFV extends StoredFieldVisitor {
75+
76+
boolean processing;
77+
final List<Object> values = new ArrayList<>();
78+
79+
@Override
80+
public Status needsField(FieldInfo fieldInfo) throws IOException {
81+
if (IgnoredSourceFieldMapper.NAME.equals(fieldInfo.name)) {
82+
processing = true;
83+
return Status.YES;
84+
} else if (processing) {
85+
return Status.STOP;
86+
}
87+
88+
return Status.NO;
89+
}
90+
91+
@Override
92+
public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
93+
values.add(new BytesRef(value));
94+
}
95+
96+
void reset() {
97+
values.clear();
98+
processing = false;
99+
}
100+
101+
}
102+
103+
static boolean supports(StoredFieldsSpec spec) {
104+
return spec.requiresSource() == false
105+
&& spec.requiresMetadata() == false
106+
&& spec.requiredStoredFields().size() == 1
107+
&& spec.requiredStoredFields().contains(IgnoredSourceFieldMapper.NAME);
108+
}
109+
110+
// TODO: use provided one
111+
private static CheckedBiConsumer<Integer, StoredFieldVisitor, IOException> sequentialReader(LeafReaderContext ctx) throws IOException {
112+
LeafReader leafReader = ctx.reader();
113+
if (leafReader instanceof SequentialStoredFieldsLeafReader lf) {
114+
return lf.getSequentialStoredFieldsReader()::document;
115+
}
116+
return leafReader.storedFields()::document;
117+
}
118+
}

server/src/main/java/org/elasticsearch/index/fieldvisitor/StoredFieldLoader.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ public static StoredFieldLoader fromSpec(StoredFieldsSpec spec) {
5050
if (spec.noRequirements()) {
5151
return StoredFieldLoader.empty();
5252
}
53+
if (IgnoredSourceFieldLoader.supports(spec)) {
54+
return new IgnoredSourceFieldLoader();
55+
}
5356
return create(spec.requiresSource(), spec.requiredStoredFields());
5457
}
5558

@@ -91,6 +94,10 @@ public static StoredFieldLoader fromSpecSequential(StoredFieldsSpec spec) {
9194
if (spec.noRequirements()) {
9295
return StoredFieldLoader.empty();
9396
}
97+
if (IgnoredSourceFieldLoader.supports(spec)) {
98+
return new IgnoredSourceFieldLoader();
99+
}
100+
94101
List<String> fieldsToLoad = fieldsToLoad(spec.requiresSource(), spec.requiredStoredFields());
95102
return new StoredFieldLoader() {
96103
@Override

server/src/main/java/org/elasticsearch/index/mapper/FallbackSyntheticSourceBlockLoader.java

Lines changed: 70 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.util.HashSet;
2222
import java.util.List;
2323
import java.util.Map;
24+
import java.util.Objects;
2425
import java.util.Optional;
2526
import java.util.Set;
2627
import java.util.Stack;
@@ -70,24 +71,20 @@ public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException
7071
throw new UnsupportedOperationException();
7172
}
7273

73-
private record IgnoredSourceRowStrideReader<T>(String fieldName, Reader<T> reader) implements RowStrideReader {
74-
@Override
75-
public void read(int docId, StoredFields storedFields, Builder builder) throws IOException {
76-
var ignoredSource = storedFields.storedFields().get(IgnoredSourceFieldMapper.NAME);
77-
if (ignoredSource == null) {
78-
builder.appendNull();
79-
return;
80-
}
81-
82-
Map<String, List<IgnoredSourceFieldMapper.NameValue>> valuesForFieldAndParents = new HashMap<>();
74+
private static final class IgnoredSourceRowStrideReader<T> implements RowStrideReader {
75+
private final String fieldName;
76+
private final Reader<T> reader;
77+
private final Set<String> fieldNames;
8378

79+
private IgnoredSourceRowStrideReader(String fieldName, Reader<T> reader) {
80+
this.fieldName = fieldName;
81+
this.reader = reader;
8482
// Contains name of the field and all its parents
85-
Set<String> fieldNames = new HashSet<>() {
83+
this.fieldNames = new HashSet<>() {
8684
{
8785
add("_doc");
8886
}
8987
};
90-
9188
var current = new StringBuilder();
9289
for (String part : fieldName.split("\\.")) {
9390
if (current.isEmpty() == false) {
@@ -96,11 +93,21 @@ public void read(int docId, StoredFields storedFields, Builder builder) throws I
9693
current.append(part);
9794
fieldNames.add(current.toString());
9895
}
96+
}
9997

98+
@Override
99+
public void read(int docId, StoredFields storedFields, Builder builder) throws IOException {
100+
var ignoredSource = storedFields.storedFields().get(IgnoredSourceFieldMapper.NAME);
101+
if (ignoredSource == null) {
102+
builder.appendNull();
103+
return;
104+
}
105+
106+
Map<String, IgnoredSourceFieldMapper.NameValue> valuesForFieldAndParents = new HashMap<>();
100107
for (Object value : ignoredSource) {
101108
IgnoredSourceFieldMapper.NameValue nameValue = IgnoredSourceFieldMapper.decode(value);
102109
if (fieldNames.contains(nameValue.name())) {
103-
valuesForFieldAndParents.computeIfAbsent(nameValue.name(), k -> new ArrayList<>()).add(nameValue);
110+
valuesForFieldAndParents.putIfAbsent(nameValue.name(), nameValue);
104111
}
105112
}
106113

@@ -130,41 +137,35 @@ public void read(int docId, StoredFields storedFields, Builder builder) throws I
130137
}
131138
}
132139

133-
private void readFromFieldValue(List<IgnoredSourceFieldMapper.NameValue> nameValues, List<T> blockValues) throws IOException {
134-
if (nameValues.isEmpty()) {
140+
private void readFromFieldValue(IgnoredSourceFieldMapper.NameValue nameValue, List<T> blockValues) throws IOException {
141+
// Leaf field is stored directly (not as a part of a parent object), let's try to decode it.
142+
Optional<Object> singleValue = XContentDataHelper.decode(nameValue.value());
143+
if (singleValue.isPresent()) {
144+
reader.convertValue(singleValue.get(), blockValues);
135145
return;
136146
}
137147

138-
for (var nameValue : nameValues) {
139-
// Leaf field is stored directly (not as a part of a parent object), let's try to decode it.
140-
Optional<Object> singleValue = XContentDataHelper.decode(nameValue.value());
141-
if (singleValue.isPresent()) {
142-
reader.convertValue(singleValue.get(), blockValues);
143-
continue;
144-
}
148+
// We have a value for this field but it's an array or an object
149+
var type = XContentDataHelper.decodeType(nameValue.value());
150+
assert type.isPresent();
145151

146-
// We have a value for this field but it's an array or an object
147-
var type = XContentDataHelper.decodeType(nameValue.value());
148-
assert type.isPresent();
149-
150-
try (
151-
XContentParser parser = type.get()
152-
.xContent()
153-
.createParser(
154-
XContentParserConfiguration.EMPTY,
155-
nameValue.value().bytes,
156-
nameValue.value().offset + 1,
157-
nameValue.value().length - 1
158-
)
159-
) {
160-
parser.nextToken();
161-
parseWithReader(parser, blockValues);
162-
}
152+
try (
153+
XContentParser parser = type.get()
154+
.xContent()
155+
.createParser(
156+
XContentParserConfiguration.EMPTY,
157+
nameValue.value().bytes,
158+
nameValue.value().offset + 1,
159+
nameValue.value().length - 1
160+
)
161+
) {
162+
parser.nextToken();
163+
parseWithReader(parser, blockValues);
163164
}
164165
}
165166

166167
private void readFromParentValue(
167-
Map<String, List<IgnoredSourceFieldMapper.NameValue>> valuesForFieldAndParents,
168+
Map<String, IgnoredSourceFieldMapper.NameValue> valuesForFieldAndParents,
168169
List<T> blockValues
169170
) throws IOException {
170171
if (valuesForFieldAndParents.isEmpty()) {
@@ -174,11 +175,8 @@ private void readFromParentValue(
174175
// If a parent object is stored at a particular level its children won't be stored.
175176
// So we should only ever have one parent here.
176177
assert valuesForFieldAndParents.size() == 1 : "_ignored_source field contains multiple levels of the same object";
177-
var parentValues = valuesForFieldAndParents.values().iterator().next();
178-
179-
for (var nameValue : parentValues) {
180-
parseFieldFromParent(nameValue, blockValues);
181-
}
178+
var parentValue = valuesForFieldAndParents.values().iterator().next();
179+
parseFieldFromParent(parentValue, blockValues);
182180
}
183181

184182
private void parseFieldFromParent(IgnoredSourceFieldMapper.NameValue nameValue, List<T> blockValues) throws IOException {
@@ -243,6 +241,33 @@ private void parseWithReader(XContentParser parser, List<T> blockValues) throws
243241
public boolean canReuse(int startingDocID) {
244242
return true;
245243
}
244+
245+
public String fieldName() {
246+
return fieldName;
247+
}
248+
249+
public Reader<T> reader() {
250+
return reader;
251+
}
252+
253+
@Override
254+
public boolean equals(Object obj) {
255+
if (obj == this) return true;
256+
if (obj == null || obj.getClass() != this.getClass()) return false;
257+
var that = (IgnoredSourceRowStrideReader) obj;
258+
return Objects.equals(this.fieldName, that.fieldName) && Objects.equals(this.reader, that.reader);
259+
}
260+
261+
@Override
262+
public int hashCode() {
263+
return Objects.hash(fieldName, reader);
264+
}
265+
266+
@Override
267+
public String toString() {
268+
return "IgnoredSourceRowStrideReader[" + "fieldName=" + fieldName + ", " + "reader=" + reader + ']';
269+
}
270+
246271
}
247272

248273
/**

0 commit comments

Comments
 (0)