Skip to content

Commit 1075553

Browse files
Store ignored source in unique stored fields per entry (elastic#132142)
This PR does the following: * Stores each _ignored_source entry in a unique stored field called _ignored_source.<field_name> * Coalesces multiple entries for the same field name into a single lucene stored field * Adds the WildcardFieldMaskingReader so that when running synthetic source roundtrip tests, we can ignore differences in fields that match the pattern ignored_source.* For now, these changes are by default disabled behind a feature flag.
1 parent 8ca0947 commit 1075553

File tree

33 files changed

+881
-167
lines changed

33 files changed

+881
-167
lines changed

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
import org.elasticsearch.index.mapper.FallbackSyntheticSourceBlockLoader;
4040
import org.elasticsearch.index.mapper.FieldMapper;
4141
import org.elasticsearch.index.mapper.IgnoreMalformedStoredValues;
42+
import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper;
4243
import org.elasticsearch.index.mapper.MapperBuilderContext;
4344
import org.elasticsearch.index.mapper.NumberFieldMapper;
4445
import org.elasticsearch.index.mapper.SimpleMappedFieldType;
@@ -380,7 +381,11 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
380381
}
381382
// Multi fields don't have fallback synthetic source.
382383
if (isSyntheticSource && blContext.parentField(name()) == null) {
383-
return new FallbackSyntheticSourceBlockLoader(fallbackSyntheticSourceBlockLoaderReader(), name()) {
384+
return new FallbackSyntheticSourceBlockLoader(
385+
fallbackSyntheticSourceBlockLoaderReader(),
386+
name(),
387+
IgnoredSourceFieldMapper.ignoredSourceFormat(blContext.indexSettings().getIndexVersionCreated())
388+
) {
384389
@Override
385390
public Builder builder(BlockFactory factory, int expectedCount) {
386391
return factory.doubles(expectedCount);

server/src/main/java/org/elasticsearch/index/IndexVersions.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ private static Version parseUnchecked(String version) {
180180
public static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT = def(9_031_0_00, Version.LUCENE_10_2_2);
181181
public static final IndexVersion DEFAULT_DENSE_VECTOR_TO_BBQ_HNSW = def(9_032_0_00, Version.LUCENE_10_2_2);
182182
public static final IndexVersion MATCH_ONLY_TEXT_STORED_AS_BYTES = def(9_033_0_00, Version.LUCENE_10_2_2);
183+
public static final IndexVersion IGNORED_SOURCE_FIELDS_PER_ENTRY_WITH_FF = def(9_034_0_00, Version.LUCENE_10_2_2);
183184

184185
/*
185186
* STOP! READ THIS FIRST! No, really,

server/src/main/java/org/elasticsearch/index/fieldvisitor/StoredFieldLoader.java

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,14 @@
99

1010
package org.elasticsearch.index.fieldvisitor;
1111

12+
import org.apache.lucene.index.FieldInfo;
1213
import org.apache.lucene.index.LeafReader;
1314
import org.apache.lucene.index.LeafReaderContext;
1415
import org.apache.lucene.index.StoredFields;
1516
import org.elasticsearch.common.CheckedBiConsumer;
1617
import org.elasticsearch.common.bytes.BytesReference;
1718
import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader;
19+
import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper;
1820
import org.elasticsearch.search.fetch.StoredFieldsSpec;
1921

2022
import java.io.IOException;
@@ -203,9 +205,25 @@ private static class ReaderStoredFieldLoader implements LeafStoredFieldLoader {
203205
private final CustomFieldsVisitor visitor;
204206
private int doc = -1;
205207

208+
private static CustomFieldsVisitor getFieldsVisitor(Set<String> fields, boolean loadSource) {
209+
if (fields.contains(IgnoredSourceFieldMapper.NAME)) {
210+
return new CustomFieldsVisitor(fields, loadSource) {
211+
@Override
212+
public Status needsField(FieldInfo fieldInfo) {
213+
if (fieldInfo.name.startsWith(IgnoredSourceFieldMapper.NAME)) {
214+
return Status.YES;
215+
}
216+
return super.needsField(fieldInfo);
217+
}
218+
};
219+
}
220+
221+
return new CustomFieldsVisitor(fields, loadSource);
222+
}
223+
206224
ReaderStoredFieldLoader(CheckedBiConsumer<Integer, FieldsVisitor, IOException> reader, boolean loadSource, Set<String> fields) {
207225
this.reader = reader;
208-
this.visitor = new CustomFieldsVisitor(fields, loadSource);
226+
this.visitor = getFieldsVisitor(fields, loadSource);
209227
}
210228

211229
@Override

server/src/main/java/org/elasticsearch/index/get/GetResult.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ public XContentBuilder toXContentEmbedded(XContentBuilder builder, Params params
244244

245245
for (DocumentField field : metaFields.values()) {
246246
// TODO: can we avoid having an exception here?
247-
if (field.getName().equals(IgnoredFieldMapper.NAME) || field.getName().equals(IgnoredSourceFieldMapper.NAME)) {
247+
if (field.getName().equals(IgnoredFieldMapper.NAME) || field.getName().startsWith(IgnoredSourceFieldMapper.NAME)) {
248248
builder.field(field.getName(), field.getValues());
249249
} else {
250250
builder.field(field.getName(), field.<Object>getValue());

server/src/main/java/org/elasticsearch/index/get/ShardGetService.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,8 @@ private GetResult innerGetFetch(
322322
? new SourceLoader.Synthetic(
323323
sourceFilter,
324324
() -> mappingLookup.getMapping().syntheticFieldLoader(sourceFilter),
325-
mapperMetrics.sourceFieldMetrics()
325+
mapperMetrics.sourceFieldMetrics(),
326+
mappingLookup.getMapping().ignoredSourceFormat()
326327
)
327328
: mappingLookup.newSourceLoader(sourceFilter, mapperMetrics.sourceFieldMetrics());
328329
StoredFieldLoader storedFieldLoader = buildStoredFieldLoader(storedFieldSet, fetchSourceContext, loader);

server/src/main/java/org/elasticsearch/index/mapper/AbstractGeometryFieldMapper.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,11 @@ protected BlockLoader blockLoaderFromSource(BlockLoaderContext blContext) {
194194
protected abstract Object nullValueAsSource(T nullValue);
195195

196196
protected BlockLoader blockLoaderFromFallbackSyntheticSource(BlockLoaderContext blContext) {
197-
return new FallbackSyntheticSourceBlockLoader(new GeometriesFallbackSyntheticSourceReader(), name()) {
197+
return new FallbackSyntheticSourceBlockLoader(
198+
new GeometriesFallbackSyntheticSourceReader(),
199+
name(),
200+
IgnoredSourceFieldMapper.ignoredSourceFormat(blContext.indexSettings().getIndexVersionCreated())
201+
) {
198202
@Override
199203
public Builder builder(BlockFactory factory, int expectedCount) {
200204
return factory.bytesRefs(expectedCount);

server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,11 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
352352

353353
// Multi fields don't have fallback synthetic source.
354354
if (isSyntheticSource && blContext.parentField(name()) == null) {
355-
return new FallbackSyntheticSourceBlockLoader(fallbackSyntheticSourceBlockLoaderReader(), name()) {
355+
return new FallbackSyntheticSourceBlockLoader(
356+
fallbackSyntheticSourceBlockLoaderReader(),
357+
name(),
358+
IgnoredSourceFieldMapper.ignoredSourceFormat(blContext.indexSettings().getIndexVersionCreated())
359+
) {
356360
@Override
357361
public Builder builder(BlockFactory factory, int expectedCount) {
358362
return factory.booleans(expectedCount);

server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1018,7 +1018,11 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
10181018

10191019
// Multi fields don't have fallback synthetic source.
10201020
if (isSyntheticSource && blContext.parentField(name()) == null) {
1021-
return new FallbackSyntheticSourceBlockLoader(fallbackSyntheticSourceBlockLoaderReader(), name()) {
1021+
return new FallbackSyntheticSourceBlockLoader(
1022+
fallbackSyntheticSourceBlockLoaderReader(),
1023+
name(),
1024+
IgnoredSourceFieldMapper.ignoredSourceFormat(blContext.indexSettings().getIndexVersionCreated())
1025+
) {
10221026
@Override
10231027
public Builder builder(BlockFactory factory, int expectedCount) {
10241028
return factory.longs(expectedCount);

server/src/main/java/org/elasticsearch/index/mapper/FallbackSyntheticSourceBlockLoader.java

Lines changed: 53 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@
1717

1818
import java.io.IOException;
1919
import java.util.ArrayList;
20-
import java.util.HashMap;
2120
import java.util.HashSet;
2221
import java.util.List;
2322
import java.util.Map;
2423
import java.util.Optional;
2524
import java.util.Set;
2625
import java.util.Stack;
26+
import java.util.stream.Collectors;
2727

2828
/**
2929
* Block loader for fields that use fallback synthetic source implementation.
@@ -39,10 +39,19 @@
3939
public abstract class FallbackSyntheticSourceBlockLoader implements BlockLoader {
4040
private final Reader<?> reader;
4141
private final String fieldName;
42-
43-
protected FallbackSyntheticSourceBlockLoader(Reader<?> reader, String fieldName) {
42+
private final Set<String> fieldPaths;
43+
private final IgnoredSourceFieldMapper.IgnoredSourceFormat ignoredSourceFormat;
44+
45+
protected FallbackSyntheticSourceBlockLoader(
46+
Reader<?> reader,
47+
String fieldName,
48+
IgnoredSourceFieldMapper.IgnoredSourceFormat ignoredSourceFormat
49+
) {
50+
assert ignoredSourceFormat != IgnoredSourceFieldMapper.IgnoredSourceFormat.NO_IGNORED_SOURCE;
4451
this.reader = reader;
4552
this.fieldName = fieldName;
53+
this.ignoredSourceFormat = ignoredSourceFormat;
54+
this.fieldPaths = splitIntoFieldPaths(fieldName);
4655
}
4756

4857
@Override
@@ -52,12 +61,19 @@ public ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) throws
5261

5362
@Override
5463
public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException {
55-
return new IgnoredSourceRowStrideReader<>(fieldName, reader);
64+
return new IgnoredSourceRowStrideReader<>(fieldName, fieldPaths, reader, ignoredSourceFormat);
5665
}
5766

5867
@Override
5968
public StoredFieldsSpec rowStrideStoredFieldSpec() {
60-
return new StoredFieldsSpec(false, false, Set.of(IgnoredSourceFieldMapper.NAME));
69+
Set<String> ignoredFieldNames;
70+
if (ignoredSourceFormat == IgnoredSourceFieldMapper.IgnoredSourceFormat.PER_FIELD_IGNORED_SOURCE) {
71+
ignoredFieldNames = fieldPaths.stream().map(IgnoredSourceFieldMapper::ignoredFieldName).collect(Collectors.toSet());
72+
} else {
73+
ignoredFieldNames = Set.of(IgnoredSourceFieldMapper.NAME);
74+
}
75+
76+
return new StoredFieldsSpec(false, false, ignoredFieldNames);
6177
}
6278

6379
@Override
@@ -70,49 +86,51 @@ public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException
7086
throw new UnsupportedOperationException();
7187
}
7288

89+
public static Set<String> splitIntoFieldPaths(String fieldName) {
90+
var paths = new HashSet<String>();
91+
paths.add("_doc");
92+
var current = new StringBuilder();
93+
for (var part : fieldName.split("\\.")) {
94+
if (current.isEmpty() == false) {
95+
current.append('.');
96+
}
97+
current.append(part);
98+
paths.add(current.toString());
99+
}
100+
return paths;
101+
}
102+
73103
private static class IgnoredSourceRowStrideReader<T> implements RowStrideReader {
74-
// Contains name of the field and all its parents
75-
private final Set<String> fieldNames;
76104
private final String fieldName;
105+
// Contains name of the field and all its parents
106+
private final Set<String> fieldPaths;
77107
private final Reader<T> reader;
78-
79-
IgnoredSourceRowStrideReader(String fieldName, Reader<T> reader) {
108+
private final IgnoredSourceFieldMapper.IgnoredSourceFormat ignoredSourceFormat;
109+
110+
IgnoredSourceRowStrideReader(
111+
String fieldName,
112+
Set<String> fieldPaths,
113+
Reader<T> reader,
114+
IgnoredSourceFieldMapper.IgnoredSourceFormat ignoredSourceFormat
115+
) {
80116
this.fieldName = fieldName;
117+
this.fieldPaths = fieldPaths;
81118
this.reader = reader;
82-
this.fieldNames = new HashSet<>() {
83-
{
84-
add("_doc");
85-
}
86-
};
87-
88-
var current = new StringBuilder();
89-
for (String part : fieldName.split("\\.")) {
90-
if (current.isEmpty() == false) {
91-
current.append('.');
92-
}
93-
current.append(part);
94-
fieldNames.add(current.toString());
95-
}
96-
119+
this.ignoredSourceFormat = ignoredSourceFormat;
97120
}
98121

99122
@Override
100123
public void read(int docId, StoredFields storedFields, Builder builder) throws IOException {
101-
var ignoredSource = storedFields.storedFields().get(IgnoredSourceFieldMapper.NAME);
102-
if (ignoredSource == null) {
124+
Map<String, List<IgnoredSourceFieldMapper.NameValue>> valuesForFieldAndParents = ignoredSourceFormat.loadSingleIgnoredField(
125+
fieldPaths,
126+
storedFields.storedFields()
127+
);
128+
129+
if (valuesForFieldAndParents.isEmpty()) {
103130
builder.appendNull();
104131
return;
105132
}
106133

107-
Map<String, List<IgnoredSourceFieldMapper.NameValue>> valuesForFieldAndParents = new HashMap<>();
108-
109-
for (Object value : ignoredSource) {
110-
IgnoredSourceFieldMapper.NameValue nameValue = IgnoredSourceFieldMapper.decode(value);
111-
if (fieldNames.contains(nameValue.name())) {
112-
valuesForFieldAndParents.computeIfAbsent(nameValue.name(), k -> new ArrayList<>()).add(nameValue);
113-
}
114-
}
115-
116134
// TODO figure out how to handle XContentDataHelper#voidValue()
117135

118136
var blockValues = new ArrayList<T>();

0 commit comments

Comments
 (0)