Skip to content

Commit 84470e9

Browse files
Alternate approach to speed up ignored_source access (#133839)
In #132142 and #132428 we split up ignored_source entries into distinct lucene fields, then added an optimized field visitor to speed up retrieving unmapped values for INSIST_🐔. However, since this approach creates a unique lucene field for every ignored_source entry, we can very quickly have a lot of lucene fields if there are a lot of unique unmapped fields per document. This can cause significant slowdowns in indexing throughput and merge time. This PR addresses those limitations by reverting back to keeping all ignored_source entries under the same lucene field. However, we still keep some of the speedups from that prior work by continuing to coalesce multiple ignored_source entries for the same field into a single entry, allowing the field visitor to exit early. Unfortunately, we do lose some time compared to the original optimizations because now the field visitor cannot look at the fieldInfo to decide whether or not to visit a field, and it instead needs to actually visit and materialize each ignored_source entry before it can decide whether or not to keep it.
1 parent 89d919d commit 84470e9

File tree

18 files changed

+575
-482
lines changed

18 files changed

+575
-482
lines changed

server/src/main/java/org/elasticsearch/index/fieldvisitor/IgnoredSourceFieldLoader.java

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@
1414
import org.apache.lucene.index.StoredFieldVisitor;
1515
import org.apache.lucene.util.BytesRef;
1616
import org.elasticsearch.common.bytes.BytesReference;
17+
import org.elasticsearch.index.mapper.FallbackSyntheticSourceBlockLoader;
1718
import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper;
1819
import org.elasticsearch.search.fetch.StoredFieldsSpec;
1920

2021
import java.io.IOException;
2122
import java.util.ArrayList;
23+
import java.util.Collections;
2224
import java.util.HashMap;
2325
import java.util.HashSet;
2426
import java.util.List;
@@ -38,8 +40,8 @@ class IgnoredSourceFieldLoader extends StoredFieldLoader {
3840

3941
HashMap<String, Set<String>> potentialFieldsInIgnoreSource = new HashMap<>();
4042
for (String requiredIgnoredField : spec.ignoredFieldsSpec().requiredIgnoredFields()) {
41-
for (String potentialStoredField : spec.ignoredFieldsSpec().format().requiredStoredFields(requiredIgnoredField)) {
42-
potentialFieldsInIgnoreSource.computeIfAbsent(potentialStoredField, k -> new HashSet<>()).add(requiredIgnoredField);
43+
for (String potentialIgnoredField : FallbackSyntheticSourceBlockLoader.splitIntoFieldPaths(requiredIgnoredField)) {
44+
potentialFieldsInIgnoreSource.computeIfAbsent(potentialIgnoredField, k -> new HashSet<>()).add(requiredIgnoredField);
4345
}
4446
}
4547
this.potentialFieldsInIgnoreSource = potentialFieldsInIgnoreSource;
@@ -82,7 +84,7 @@ public String routing() {
8284

8385
@Override
8486
public Map<String, List<Object>> storedFields() {
85-
return visitor.values;
87+
return Map.of(IgnoredSourceFieldMapper.NAME, Collections.unmodifiableList(visitor.values));
8688
}
8789
};
8890
}
@@ -93,7 +95,7 @@ public List<String> fieldsToLoad() {
9395
}
9496

9597
static class SFV extends StoredFieldVisitor {
96-
final Map<String, List<Object>> values = new HashMap<>();
98+
List<List<IgnoredSourceFieldMapper.NameValue>> values = new ArrayList<>();
9799
final Set<String> fieldNames;
98100
private final Set<String> unvisitedFields;
99101
final Map<String, Set<String>> potentialFieldsInIgnoreSource;
@@ -110,18 +112,26 @@ public Status needsField(FieldInfo fieldInfo) throws IOException {
110112
return Status.STOP;
111113
}
112114

113-
Set<String> foundFields = potentialFieldsInIgnoreSource.get(fieldInfo.name);
114-
if (foundFields == null) {
115-
return Status.NO;
115+
if (fieldInfo.name.equals(IgnoredSourceFieldMapper.NAME)) {
116+
return Status.YES;
116117
}
117118

118-
unvisitedFields.removeAll(foundFields);
119-
return Status.YES;
119+
return Status.NO;
120120
}
121121

122122
@Override
123123
public void binaryField(FieldInfo fieldInfo, byte[] value) {
124-
values.computeIfAbsent(fieldInfo.name, k -> new ArrayList<>()).add(new BytesRef(value));
124+
var nameValues = IgnoredSourceFieldMapper.CoalescedIgnoredSourceEncoding.decode(new BytesRef(value));
125+
assert nameValues.isEmpty() == false;
126+
String fieldPath = nameValues.getFirst().name();
127+
128+
Set<String> foundValues = potentialFieldsInIgnoreSource.get(fieldPath);
129+
if (foundValues == null) {
130+
return;
131+
}
132+
133+
unvisitedFields.removeAll(foundValues);
134+
values.add(nameValues);
125135
}
126136

127137
void reset() {
@@ -133,6 +143,6 @@ void reset() {
133143

134144
static boolean supports(StoredFieldsSpec spec) {
135145
return spec.onlyRequiresIgnoredFields()
136-
&& spec.ignoredFieldsSpec().format() == IgnoredSourceFieldMapper.IgnoredSourceFormat.PER_FIELD_IGNORED_SOURCE;
146+
&& spec.ignoredFieldsSpec().format() == IgnoredSourceFieldMapper.IgnoredSourceFormat.COALESCED_SINGLE_IGNORED_SOURCE;
137147
}
138148
}

server/src/main/java/org/elasticsearch/index/fieldvisitor/StoredFieldLoader.java

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,13 @@
99

1010
package org.elasticsearch.index.fieldvisitor;
1111

12-
import org.apache.lucene.index.FieldInfo;
1312
import org.apache.lucene.index.LeafReader;
1413
import org.apache.lucene.index.LeafReaderContext;
1514
import org.apache.lucene.index.StoredFieldVisitor;
1615
import org.apache.lucene.index.StoredFields;
1716
import org.elasticsearch.common.CheckedBiConsumer;
1817
import org.elasticsearch.common.bytes.BytesReference;
1918
import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader;
20-
import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper;
2119
import org.elasticsearch.search.fetch.StoredFieldsSpec;
2220

2321
import java.io.IOException;
@@ -202,29 +200,13 @@ private static class ReaderStoredFieldLoader implements LeafStoredFieldLoader {
202200
private final CustomFieldsVisitor visitor;
203201
private int doc = -1;
204202

205-
private static CustomFieldsVisitor getFieldsVisitor(Set<String> fields, boolean loadSource) {
206-
if (fields.contains(IgnoredSourceFieldMapper.NAME)) {
207-
return new CustomFieldsVisitor(fields, loadSource) {
208-
@Override
209-
public Status needsField(FieldInfo fieldInfo) {
210-
if (fieldInfo.name.startsWith(IgnoredSourceFieldMapper.NAME)) {
211-
return Status.YES;
212-
}
213-
return super.needsField(fieldInfo);
214-
}
215-
};
216-
}
217-
218-
return new CustomFieldsVisitor(fields, loadSource);
219-
}
220-
221203
ReaderStoredFieldLoader(
222204
CheckedBiConsumer<Integer, StoredFieldVisitor, IOException> reader,
223205
boolean loadSource,
224206
Set<String> fields
225207
) {
226208
this.reader = reader;
227-
this.visitor = getFieldsVisitor(fields, loadSource);
209+
this.visitor = new CustomFieldsVisitor(fields, loadSource);
228210
}
229211

230212
@Override

server/src/main/java/org/elasticsearch/index/get/GetResult.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ public XContentBuilder toXContentEmbedded(XContentBuilder builder, Params params
244244

245245
for (DocumentField field : metaFields.values()) {
246246
// TODO: can we avoid having an exception here?
247-
if (field.getName().equals(IgnoredFieldMapper.NAME) || field.getName().startsWith(IgnoredSourceFieldMapper.NAME)) {
247+
if (field.getName().equals(IgnoredFieldMapper.NAME) || field.getName().equals(IgnoredSourceFieldMapper.NAME)) {
248248
builder.field(field.getName(), field.getValues());
249249
} else {
250250
builder.field(field.getName(), field.<Object>getValue());

server/src/main/java/org/elasticsearch/index/mapper/IgnoredFieldsSpec.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313

1414
import java.util.HashSet;
1515
import java.util.Set;
16-
import java.util.stream.Collectors;
1716

1817
/**
1918
* Defines which fields need to be loaded from _ignored_source during a fetch.
@@ -54,7 +53,6 @@ public IgnoredFieldsSpec merge(IgnoredFieldsSpec other) {
5453
* Get the set of stored fields required to load the specified fields from _ignored_source.
5554
*/
5655
public Set<String> requiredStoredFields() {
57-
return requiredIgnoredFields.stream().flatMap(field -> format.requiredStoredFields(field).stream()).collect(Collectors.toSet());
58-
56+
return Set.of(IgnoredSourceFieldMapper.NAME);
5957
}
6058
}

0 commit comments

Comments
 (0)