Skip to content

Commit 08f59c3

Browse files
committed
Improve reading ignored source in esql
* Parse sub field fields once in IgnoredSourceRowStrideReader instead of for each doc that gets read. * Introduce a dedicated StoredFieldLoader for ignored source (IgnoredSourceFieldLoader). Which optimizes reading stored fields just for ignored source by avoiding relatively expensive set stuff (see CustomFieldsVisitor) and make use aborting loading stored fields (`Status.STOP`) when ignored source is read.
1 parent 96315ec commit 08f59c3

File tree

3 files changed

+207
-54
lines changed

3 files changed

+207
-54
lines changed
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.fieldvisitor;
11+
12+
import org.apache.lucene.index.FieldInfo;
13+
import org.apache.lucene.index.LeafReader;
14+
import org.apache.lucene.index.LeafReaderContext;
15+
import org.apache.lucene.index.StoredFieldVisitor;
16+
import org.apache.lucene.util.BytesRef;
17+
import org.elasticsearch.common.CheckedBiConsumer;
18+
import org.elasticsearch.common.bytes.BytesReference;
19+
import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader;
20+
import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper;
21+
import org.elasticsearch.search.fetch.StoredFieldsSpec;
22+
23+
import java.io.IOException;
24+
import java.util.ArrayList;
25+
import java.util.List;
26+
import java.util.Map;
27+
28+
class IgnoredSourceFieldLoader extends StoredFieldLoader {
29+
30+
@Override
31+
public LeafStoredFieldLoader getLoader(LeafReaderContext ctx, int[] docs) throws IOException {
32+
var reader = sequentialReader(ctx);
33+
var visitor = new SFV();
34+
return new LeafStoredFieldLoader() {
35+
36+
private int doc = -1;
37+
38+
@Override
39+
public void advanceTo(int doc) throws IOException {
40+
if (doc != this.doc) {
41+
visitor.reset();
42+
reader.accept(doc, visitor);
43+
this.doc = doc;
44+
}
45+
}
46+
47+
@Override
48+
public BytesReference source() {
49+
return null;
50+
}
51+
52+
@Override
53+
public String id() {
54+
return null;
55+
}
56+
57+
@Override
58+
public String routing() {
59+
return null;
60+
}
61+
62+
@Override
63+
public Map<String, List<Object>> storedFields() {
64+
return Map.of(IgnoredSourceFieldMapper.NAME, visitor.values);
65+
}
66+
};
67+
}
68+
69+
@Override
70+
public List<String> fieldsToLoad() {
71+
return List.of(IgnoredSourceFieldMapper.NAME);
72+
}
73+
74+
static class SFV extends StoredFieldVisitor {
75+
76+
boolean processing;
77+
final List<Object> values = new ArrayList<>();
78+
79+
@Override
80+
public Status needsField(FieldInfo fieldInfo) throws IOException {
81+
if (IgnoredSourceFieldMapper.NAME.equals(fieldInfo.name)) {
82+
processing = true;
83+
return Status.YES;
84+
} else if (processing) {
85+
return Status.STOP;
86+
}
87+
88+
return Status.NO;
89+
}
90+
91+
@Override
92+
public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
93+
values.add(new BytesRef(value));
94+
}
95+
96+
void reset() {
97+
values.clear();
98+
processing = false;
99+
}
100+
101+
}
102+
103+
static boolean supports(StoredFieldsSpec spec) {
104+
return spec.requiresSource() == false
105+
&& spec.requiresMetadata() == false
106+
&& spec.requiredStoredFields().size() == 1
107+
&& spec.requiredStoredFields().contains(IgnoredSourceFieldMapper.NAME);
108+
}
109+
110+
// TODO: use provided one
111+
private static CheckedBiConsumer<Integer, StoredFieldVisitor, IOException> sequentialReader(LeafReaderContext ctx) throws IOException {
112+
LeafReader leafReader = ctx.reader();
113+
if (leafReader instanceof SequentialStoredFieldsLeafReader lf) {
114+
return lf.getSequentialStoredFieldsReader()::document;
115+
}
116+
return leafReader.storedFields()::document;
117+
}
118+
}

server/src/main/java/org/elasticsearch/index/fieldvisitor/StoredFieldLoader.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ public static StoredFieldLoader fromSpec(StoredFieldsSpec spec) {
5050
if (spec.noRequirements()) {
5151
return StoredFieldLoader.empty();
5252
}
53+
if (IgnoredSourceFieldLoader.supports(spec)) {
54+
return new IgnoredSourceFieldLoader();
55+
}
5356
return create(spec.requiresSource(), spec.requiredStoredFields());
5457
}
5558

@@ -91,6 +94,10 @@ public static StoredFieldLoader fromSpecSequential(StoredFieldsSpec spec) {
9194
if (spec.noRequirements()) {
9295
return StoredFieldLoader.empty();
9396
}
97+
if (IgnoredSourceFieldLoader.supports(spec)) {
98+
return new IgnoredSourceFieldLoader();
99+
}
100+
94101
List<String> fieldsToLoad = fieldsToLoad(spec.requiresSource(), spec.requiredStoredFields());
95102
return new StoredFieldLoader() {
96103
@Override

server/src/main/java/org/elasticsearch/index/mapper/FallbackSyntheticSourceBlockLoader.java

Lines changed: 82 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.util.HashSet;
2222
import java.util.List;
2323
import java.util.Map;
24+
import java.util.Objects;
2425
import java.util.Optional;
2526
import java.util.Set;
2627
import java.util.Stack;
@@ -39,10 +40,12 @@
3940
public abstract class FallbackSyntheticSourceBlockLoader implements BlockLoader {
4041
private final Reader<?> reader;
4142
private final String fieldName;
43+
private final Set<String> fieldPaths;
4244

4345
protected FallbackSyntheticSourceBlockLoader(Reader<?> reader, String fieldName) {
4446
this.reader = reader;
4547
this.fieldName = fieldName;
48+
this.fieldPaths = splitIntoFieldPaths(fieldName);
4649
}
4750

4851
@Override
@@ -52,7 +55,7 @@ public ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) throws
5255

5356
@Override
5457
public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException {
55-
return new IgnoredSourceRowStrideReader<>(fieldName, reader);
58+
return new IgnoredSourceRowStrideReader<>(fieldName, reader, fieldPaths);
5659
}
5760

5861
@Override
@@ -70,7 +73,31 @@ public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException
7073
throw new UnsupportedOperationException();
7174
}
7275

73-
private record IgnoredSourceRowStrideReader<T>(String fieldName, Reader<T> reader) implements RowStrideReader {
76+
static Set<String> splitIntoFieldPaths(String fieldName) {
77+
var paths = new HashSet<String>();
78+
paths.add("_doc");
79+
var current = new StringBuilder();
80+
for (var part : fieldName.split("\\.")) {
81+
if (current.isEmpty() == false) {
82+
current.append('.');
83+
}
84+
current.append(part);
85+
paths.add(current.toString());
86+
}
87+
return paths;
88+
}
89+
90+
private static final class IgnoredSourceRowStrideReader<T> implements RowStrideReader {
91+
private final String fieldName;
92+
private final Reader<T> reader;
93+
private final Set<String> fieldPaths;
94+
95+
private IgnoredSourceRowStrideReader(String fieldName, Reader<T> reader, Set<String> fieldPaths) {
96+
this.fieldName = fieldName;
97+
this.reader = reader;
98+
this.fieldPaths = fieldPaths;
99+
}
100+
74101
@Override
75102
public void read(int docId, StoredFields storedFields, Builder builder) throws IOException {
76103
var ignoredSource = storedFields.storedFields().get(IgnoredSourceFieldMapper.NAME);
@@ -79,28 +106,11 @@ public void read(int docId, StoredFields storedFields, Builder builder) throws I
79106
return;
80107
}
81108

82-
Map<String, List<IgnoredSourceFieldMapper.NameValue>> valuesForFieldAndParents = new HashMap<>();
83-
84-
// Contains name of the field and all its parents
85-
Set<String> fieldNames = new HashSet<>() {
86-
{
87-
add("_doc");
88-
}
89-
};
90-
91-
var current = new StringBuilder();
92-
for (String part : fieldName.split("\\.")) {
93-
if (current.isEmpty() == false) {
94-
current.append('.');
95-
}
96-
current.append(part);
97-
fieldNames.add(current.toString());
98-
}
99-
109+
Map<String, IgnoredSourceFieldMapper.NameValue> valuesForFieldAndParents = new HashMap<>();
100110
for (Object value : ignoredSource) {
101111
IgnoredSourceFieldMapper.NameValue nameValue = IgnoredSourceFieldMapper.decode(value);
102-
if (fieldNames.contains(nameValue.name())) {
103-
valuesForFieldAndParents.computeIfAbsent(nameValue.name(), k -> new ArrayList<>()).add(nameValue);
112+
if (fieldPaths.contains(nameValue.name())) {
113+
valuesForFieldAndParents.putIfAbsent(nameValue.name(), nameValue);
104114
}
105115
}
106116

@@ -130,41 +140,35 @@ public void read(int docId, StoredFields storedFields, Builder builder) throws I
130140
}
131141
}
132142

133-
private void readFromFieldValue(List<IgnoredSourceFieldMapper.NameValue> nameValues, List<T> blockValues) throws IOException {
134-
if (nameValues.isEmpty()) {
143+
private void readFromFieldValue(IgnoredSourceFieldMapper.NameValue nameValue, List<T> blockValues) throws IOException {
144+
// Leaf field is stored directly (not as a part of a parent object), let's try to decode it.
145+
Optional<Object> singleValue = XContentDataHelper.decode(nameValue.value());
146+
if (singleValue.isPresent()) {
147+
reader.convertValue(singleValue.get(), blockValues);
135148
return;
136149
}
137150

138-
for (var nameValue : nameValues) {
139-
// Leaf field is stored directly (not as a part of a parent object), let's try to decode it.
140-
Optional<Object> singleValue = XContentDataHelper.decode(nameValue.value());
141-
if (singleValue.isPresent()) {
142-
reader.convertValue(singleValue.get(), blockValues);
143-
continue;
144-
}
151+
// We have a value for this field but it's an array or an object
152+
var type = XContentDataHelper.decodeType(nameValue.value());
153+
assert type.isPresent();
145154

146-
// We have a value for this field but it's an array or an object
147-
var type = XContentDataHelper.decodeType(nameValue.value());
148-
assert type.isPresent();
149-
150-
try (
151-
XContentParser parser = type.get()
152-
.xContent()
153-
.createParser(
154-
XContentParserConfiguration.EMPTY,
155-
nameValue.value().bytes,
156-
nameValue.value().offset + 1,
157-
nameValue.value().length - 1
158-
)
159-
) {
160-
parser.nextToken();
161-
parseWithReader(parser, blockValues);
162-
}
155+
try (
156+
XContentParser parser = type.get()
157+
.xContent()
158+
.createParser(
159+
XContentParserConfiguration.EMPTY,
160+
nameValue.value().bytes,
161+
nameValue.value().offset + 1,
162+
nameValue.value().length - 1
163+
)
164+
) {
165+
parser.nextToken();
166+
parseWithReader(parser, blockValues);
163167
}
164168
}
165169

166170
private void readFromParentValue(
167-
Map<String, List<IgnoredSourceFieldMapper.NameValue>> valuesForFieldAndParents,
171+
Map<String, IgnoredSourceFieldMapper.NameValue> valuesForFieldAndParents,
168172
List<T> blockValues
169173
) throws IOException {
170174
if (valuesForFieldAndParents.isEmpty()) {
@@ -174,11 +178,8 @@ private void readFromParentValue(
174178
// If a parent object is stored at a particular level its children won't be stored.
175179
// So we should only ever have one parent here.
176180
assert valuesForFieldAndParents.size() == 1 : "_ignored_source field contains multiple levels of the same object";
177-
var parentValues = valuesForFieldAndParents.values().iterator().next();
178-
179-
for (var nameValue : parentValues) {
180-
parseFieldFromParent(nameValue, blockValues);
181-
}
181+
var parentValue = valuesForFieldAndParents.values().iterator().next();
182+
parseFieldFromParent(parentValue, blockValues);
182183
}
183184

184185
private void parseFieldFromParent(IgnoredSourceFieldMapper.NameValue nameValue, List<T> blockValues) throws IOException {
@@ -243,6 +244,33 @@ private void parseWithReader(XContentParser parser, List<T> blockValues) throws
243244
public boolean canReuse(int startingDocID) {
244245
return true;
245246
}
247+
248+
public String fieldName() {
249+
return fieldName;
250+
}
251+
252+
public Reader<T> reader() {
253+
return reader;
254+
}
255+
256+
@Override
257+
public boolean equals(Object obj) {
258+
if (obj == this) return true;
259+
if (obj == null || obj.getClass() != this.getClass()) return false;
260+
var that = (IgnoredSourceRowStrideReader) obj;
261+
return Objects.equals(this.fieldName, that.fieldName) && Objects.equals(this.reader, that.reader);
262+
}
263+
264+
@Override
265+
public int hashCode() {
266+
return Objects.hash(fieldName, reader);
267+
}
268+
269+
@Override
270+
public String toString() {
271+
return "IgnoredSourceRowStrideReader[" + "fieldName=" + fieldName + ", " + "reader=" + reader + ']';
272+
}
273+
246274
}
247275

248276
/**

0 commit comments

Comments
 (0)