Skip to content

Commit 05173f2

Browse files
authored
Add option to skip using _ignored_source field for synthetic source (#112963) (#113065)
(cherry picked from commit 36b3549) # Conflicts: # server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java # server/src/main/java/org/elasticsearch/index/IndexSettings.java # server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java # server/src/main/java/org/elasticsearch/index/mapper/NestedObjectMapper.java # x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java
1 parent b80b018 commit 05173f2

File tree

15 files changed

+501
-44
lines changed

15 files changed

+501
-44
lines changed

server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import org.elasticsearch.index.engine.EngineConfig;
3333
import org.elasticsearch.index.fielddata.IndexFieldDataService;
3434
import org.elasticsearch.index.mapper.FieldMapper;
35+
import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper;
3536
import org.elasticsearch.index.mapper.MapperService;
3637
import org.elasticsearch.index.similarity.SimilarityService;
3738
import org.elasticsearch.index.store.FsDirectoryFactory;
@@ -181,6 +182,8 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
181182
IndexSettings.TIME_SERIES_ES87TSDB_CODEC_ENABLED_SETTING,
182183
IndexSettings.PREFER_ILM_SETTING,
183184
DataStreamFailureStoreDefinition.FAILURE_STORE_DEFINITION_VERSION_SETTING,
185+
IgnoredSourceFieldMapper.SKIP_IGNORED_SOURCE_WRITE_SETTING,
186+
IgnoredSourceFieldMapper.SKIP_IGNORED_SOURCE_READ_SETTING,
184187

185188
// validate that built-in similarities don't get redefined
186189
Setting.groupSetting("index.similarity.", (s) -> {

server/src/main/java/org/elasticsearch/index/IndexSettings.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.elasticsearch.common.unit.ByteSizeUnit;
2525
import org.elasticsearch.common.unit.ByteSizeValue;
2626
import org.elasticsearch.core.TimeValue;
27+
import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper;
2728
import org.elasticsearch.index.translog.Translog;
2829
import org.elasticsearch.ingest.IngestService;
2930
import org.elasticsearch.node.Node;
@@ -776,6 +777,8 @@ private void setRetentionLeaseMillis(final TimeValue retentionLease) {
776777
private volatile long mappingDepthLimit;
777778
private volatile long mappingFieldNameLengthLimit;
778779
private volatile long mappingDimensionFieldsLimit;
780+
private volatile boolean skipIgnoredSourceWrite;
781+
private volatile boolean skipIgnoredSourceRead;
779782

780783
/**
781784
* The maximum number of refresh listeners allows on this shard.
@@ -923,6 +926,8 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
923926
mappingDimensionFieldsLimit = scopedSettings.get(INDEX_MAPPING_DIMENSION_FIELDS_LIMIT_SETTING);
924927
indexRouting = IndexRouting.fromIndexMetadata(indexMetadata);
925928
es87TSDBCodecEnabled = scopedSettings.get(TIME_SERIES_ES87TSDB_CODEC_ENABLED_SETTING);
929+
skipIgnoredSourceWrite = scopedSettings.get(IgnoredSourceFieldMapper.SKIP_IGNORED_SOURCE_WRITE_SETTING);
930+
skipIgnoredSourceRead = scopedSettings.get(IgnoredSourceFieldMapper.SKIP_IGNORED_SOURCE_READ_SETTING);
926931

927932
scopedSettings.addSettingsUpdateConsumer(
928933
MergePolicyConfig.INDEX_COMPOUND_FORMAT_SETTING,
@@ -1005,6 +1010,11 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
10051010
scopedSettings.addSettingsUpdateConsumer(INDEX_MAPPING_DEPTH_LIMIT_SETTING, this::setMappingDepthLimit);
10061011
scopedSettings.addSettingsUpdateConsumer(INDEX_MAPPING_FIELD_NAME_LENGTH_LIMIT_SETTING, this::setMappingFieldNameLengthLimit);
10071012
scopedSettings.addSettingsUpdateConsumer(INDEX_MAPPING_DIMENSION_FIELDS_LIMIT_SETTING, this::setMappingDimensionFieldsLimit);
1013+
scopedSettings.addSettingsUpdateConsumer(
1014+
IgnoredSourceFieldMapper.SKIP_IGNORED_SOURCE_WRITE_SETTING,
1015+
this::setSkipIgnoredSourceWrite
1016+
);
1017+
scopedSettings.addSettingsUpdateConsumer(IgnoredSourceFieldMapper.SKIP_IGNORED_SOURCE_READ_SETTING, this::setSkipIgnoredSourceRead);
10081018
}
10091019

10101020
private void setSearchIdleAfter(TimeValue searchIdleAfter) {
@@ -1581,6 +1591,22 @@ private void setMappingDimensionFieldsLimit(long value) {
15811591
this.mappingDimensionFieldsLimit = value;
15821592
}
15831593

1594+
public boolean getSkipIgnoredSourceWrite() {
1595+
return skipIgnoredSourceWrite;
1596+
}
1597+
1598+
private void setSkipIgnoredSourceWrite(boolean value) {
1599+
this.skipIgnoredSourceWrite = value;
1600+
}
1601+
1602+
public boolean getSkipIgnoredSourceRead() {
1603+
return skipIgnoredSourceRead;
1604+
}
1605+
1606+
private void setSkipIgnoredSourceRead(boolean value) {
1607+
this.skipIgnoredSourceRead = value;
1608+
}
1609+
15841610
/**
15851611
* The bounds for {@code @timestamp} on this index or
15861612
* {@code null} if there are no bounds.

server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ final boolean getClonedSource() {
326326
}
327327

328328
public final boolean canAddIgnoredField() {
329-
return mappingLookup.isSourceSynthetic() && clonedSource == false;
329+
return mappingLookup.isSourceSynthetic() && clonedSource == false && indexSettings().getSkipIgnoredSourceWrite() == false;
330330
}
331331

332332
/**

server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java

Lines changed: 85 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,15 @@
99
package org.elasticsearch.index.mapper;
1010

1111
import org.apache.lucene.document.StoredField;
12+
import org.apache.lucene.index.LeafReader;
1213
import org.apache.lucene.util.BytesRef;
1314
import org.elasticsearch.common.bytes.BytesReference;
15+
import org.elasticsearch.common.settings.Setting;
1416
import org.elasticsearch.common.util.ByteUtils;
1517
import org.elasticsearch.common.xcontent.XContentHelper;
1618
import org.elasticsearch.core.Tuple;
1719
import org.elasticsearch.features.NodeFeature;
20+
import org.elasticsearch.index.IndexSettings;
1821
import org.elasticsearch.index.query.SearchExecutionContext;
1922
import org.elasticsearch.xcontent.XContentBuilder;
2023
import org.elasticsearch.xcontent.XContentType;
@@ -26,6 +29,8 @@
2629
import java.util.Comparator;
2730
import java.util.List;
2831
import java.util.Map;
32+
import java.util.Set;
33+
import java.util.stream.Stream;
2934

3035
/**
3136
@@ -39,6 +44,7 @@
3944
* if we can replace it for all use cases to avoid duplication, assuming that the storage tradeoff is favorable.
4045
*/
4146
public class IgnoredSourceFieldMapper extends MetadataFieldMapper {
47+
private final IndexSettings indexSettings;
4248

4349
// This factor is used to combine two offsets within the same integer:
4450
// - the offset of the end of the parent field within the field name (N / PARENT_OFFSET_IN_NAME_OFFSET)
@@ -48,12 +54,32 @@ public class IgnoredSourceFieldMapper extends MetadataFieldMapper {
4854

4955
public static final String NAME = "_ignored_source";
5056

51-
public static final IgnoredSourceFieldMapper INSTANCE = new IgnoredSourceFieldMapper();
52-
53-
public static final TypeParser PARSER = new FixedTypeParser(context -> INSTANCE);
57+
public static final TypeParser PARSER = new FixedTypeParser(context -> new IgnoredSourceFieldMapper(context.getIndexSettings()));
5458

5559
static final NodeFeature TRACK_IGNORED_SOURCE = new NodeFeature("mapper.track_ignored_source");
5660

61+
/*
62+
Setting to disable encoding and writing values for this field.
63+
This is needed to unblock index functionality in case there is a bug on this code path.
64+
*/
65+
public static final Setting<Boolean> SKIP_IGNORED_SOURCE_WRITE_SETTING = Setting.boolSetting(
66+
"index.mapping.synthetic_source.skip_ignored_source_write",
67+
false,
68+
Setting.Property.Dynamic,
69+
Setting.Property.IndexScope
70+
);
71+
72+
/*
73+
Setting to disable reading and decoding values stored in this field.
74+
This is needed to unblock search functionality in case there is a bug on this code path.
75+
*/
76+
public static final Setting<Boolean> SKIP_IGNORED_SOURCE_READ_SETTING = Setting.boolSetting(
77+
"index.mapping.synthetic_source.skip_ignored_source_read",
78+
false,
79+
Setting.Property.Dynamic,
80+
Setting.Property.IndexScope
81+
);
82+
5783
/*
5884
* Container for the ignored field data:
5985
* - the full name
@@ -107,8 +133,9 @@ public ValueFetcher valueFetcher(SearchExecutionContext context, String format)
107133
}
108134
}
109135

110-
private IgnoredSourceFieldMapper() {
136+
private IgnoredSourceFieldMapper(IndexSettings indexSettings) {
111137
super(IgnoredValuesFieldMapperType.INSTANCE);
138+
this.indexSettings = indexSettings;
112139
}
113140

114141
@Override
@@ -150,6 +177,18 @@ static NameValue decode(Object field) {
150177
return new NameValue(name, parentOffset, value, null);
151178
}
152179

180+
// In rare cases decoding values stored in this field can fail leading to entire source
181+
// not being available.
182+
// We would like to have an option to lose some values in synthetic source
183+
// but have search not fail.
184+
public static Set<String> ensureLoaded(Set<String> fieldsToLoadForSyntheticSource, IndexSettings indexSettings) {
185+
if (indexSettings.getSkipIgnoredSourceRead() == false) {
186+
fieldsToLoadForSyntheticSource.add(NAME);
187+
}
188+
189+
return fieldsToLoadForSyntheticSource;
190+
}
191+
153192
public record MappedNameValue(NameValue nameValue, XContentType type, Map<String, Object> map) {}
154193

155194
/**
@@ -200,11 +239,49 @@ public static byte[] encodeFromMap(MappedNameValue mappedNameValue, Map<String,
200239
return IgnoredSourceFieldMapper.encode(filteredNameValue);
201240
}
202241

203-
// This mapper doesn't contribute to source directly as it has no access to the object structure. Instead, its contents
204-
// are loaded by SourceLoader and passed to object mappers that, in turn, write their ignore fields at the appropriate level.
242+
// This loader controls if this field is loaded in scope of synthetic source constructions.
243+
// In rare cases decoding values stored in this field can fail leading to entire source
244+
// not being available.
245+
// We would like to have an option to lose some values in synthetic source
246+
// but have search not fail.
205247
@Override
206248
public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
207-
return SourceLoader.SyntheticFieldLoader.NOTHING;
249+
return new SourceLoader.SyntheticFieldLoader() {
250+
@Override
251+
public Stream<Map.Entry<String, StoredFieldLoader>> storedFieldLoaders() {
252+
if (indexSettings.getSkipIgnoredSourceRead()) {
253+
return Stream.empty();
254+
}
255+
256+
// Values are handled in `SourceLoader`.
257+
return Stream.of(Map.entry(NAME, (v) -> {}));
258+
}
259+
260+
@Override
261+
public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException {
262+
return null;
263+
}
264+
265+
@Override
266+
public boolean hasValue() {
267+
return false;
268+
}
269+
270+
@Override
271+
public void write(XContentBuilder b) throws IOException {
272+
273+
}
274+
275+
@Override
276+
public String fieldName() {
277+
// Does not really matter.
278+
return NAME;
279+
}
280+
281+
@Override
282+
public void reset() {
283+
284+
}
285+
};
208286
}
209-
210287
}

server/src/main/java/org/elasticsearch/index/mapper/NestedObjectMapper.java

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import org.elasticsearch.common.Explicit;
1919
import org.elasticsearch.common.lucene.search.Queries;
2020
import org.elasticsearch.common.xcontent.support.XContentMapValues;
21+
import org.elasticsearch.index.IndexSettings;
2122
import org.elasticsearch.index.IndexVersion;
2223
import org.elasticsearch.index.IndexVersions;
2324
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
@@ -46,11 +47,18 @@ public static class Builder extends ObjectMapper.Builder {
4647
private Explicit<Boolean> includeInParent = Explicit.IMPLICIT_FALSE;
4748
private final IndexVersion indexCreatedVersion;
4849
private final Function<Query, BitSetProducer> bitSetProducer;
50+
private final IndexSettings indexSettings;
4951

50-
public Builder(String name, IndexVersion indexCreatedVersion, Function<Query, BitSetProducer> bitSetProducer) {
52+
public Builder(
53+
String name,
54+
IndexVersion indexCreatedVersion,
55+
Function<Query, BitSetProducer> bitSetProducer,
56+
IndexSettings indexSettings
57+
) {
5158
super(name, Explicit.IMPLICIT_TRUE);
5259
this.indexCreatedVersion = indexCreatedVersion;
5360
this.bitSetProducer = bitSetProducer;
61+
this.indexSettings = indexSettings;
5462
}
5563

5664
Builder includeInRoot(boolean includeInRoot) {
@@ -111,7 +119,8 @@ public NestedObjectMapper build(MapperBuilderContext context) {
111119
parentTypeFilter,
112120
nestedTypePath,
113121
nestedTypeFilter,
114-
bitSetProducer
122+
bitSetProducer,
123+
indexSettings
115124
);
116125
}
117126
}
@@ -126,7 +135,8 @@ public Mapper.Builder parse(String name, Map<String, Object> node, MappingParser
126135
NestedObjectMapper.Builder builder = new NestedObjectMapper.Builder(
127136
name,
128137
parserContext.indexVersionCreated(),
129-
parserContext::bitSetProducer
138+
parserContext::bitSetProducer,
139+
parserContext.getIndexSettings()
130140
);
131141
parseNested(name, node, builder);
132142
parseObjectFields(node, parserContext, builder);
@@ -193,6 +203,7 @@ public MapperBuilderContext createChildContext(String name, Dynamic dynamic) {
193203
private final Query nestedTypeFilter;
194204
// Function to create a bitset for identifying parent documents
195205
private final Function<Query, BitSetProducer> bitsetProducer;
206+
private final IndexSettings indexSettings;
196207

197208
NestedObjectMapper(
198209
String name,
@@ -206,7 +217,8 @@ public MapperBuilderContext createChildContext(String name, Dynamic dynamic) {
206217
Query parentTypeFilter,
207218
String nestedTypePath,
208219
Query nestedTypeFilter,
209-
Function<Query, BitSetProducer> bitsetProducer
220+
Function<Query, BitSetProducer> bitsetProducer,
221+
IndexSettings indexSettings
210222
) {
211223
super(name, fullPath, enabled, Explicit.IMPLICIT_TRUE, storeArraySource, dynamic, mappers);
212224
this.parentTypeFilter = parentTypeFilter;
@@ -215,6 +227,7 @@ public MapperBuilderContext createChildContext(String name, Dynamic dynamic) {
215227
this.includeInParent = includeInParent;
216228
this.includeInRoot = includeInRoot;
217229
this.bitsetProducer = bitsetProducer;
230+
this.indexSettings = indexSettings;
218231
}
219232

220233
public Query parentTypeFilter() {
@@ -252,7 +265,7 @@ public Map<String, Mapper> getChildren() {
252265

253266
@Override
254267
public ObjectMapper.Builder newBuilder(IndexVersion indexVersionCreated) {
255-
NestedObjectMapper.Builder builder = new NestedObjectMapper.Builder(leafName(), indexVersionCreated, bitsetProducer);
268+
NestedObjectMapper.Builder builder = new NestedObjectMapper.Builder(leafName(), indexVersionCreated, bitsetProducer, indexSettings);
256269
builder.enabled = enabled;
257270
builder.dynamic = dynamic;
258271
builder.includeInRoot = includeInRoot;
@@ -274,7 +287,8 @@ NestedObjectMapper withoutMappers() {
274287
parentTypeFilter,
275288
nestedTypePath,
276289
nestedTypeFilter,
277-
bitsetProducer
290+
bitsetProducer,
291+
indexSettings
278292
);
279293
}
280294

@@ -349,7 +363,8 @@ public ObjectMapper merge(Mapper mergeWith, MapperMergeContext parentMergeContex
349363
parentTypeFilter,
350364
nestedTypePath,
351365
nestedTypeFilter,
352-
bitsetProducer
366+
bitsetProducer,
367+
indexSettings
353368
);
354369
}
355370

@@ -382,7 +397,9 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
382397
}
383398

384399
SourceLoader sourceLoader = new SourceLoader.Synthetic(() -> super.syntheticFieldLoader(mappers.values().stream(), true), NOOP);
385-
var storedFieldLoader = org.elasticsearch.index.fieldvisitor.StoredFieldLoader.create(false, sourceLoader.requiredStoredFields());
400+
// Some synthetic source use cases require using _ignored_source field
401+
var requiredStoredFields = IgnoredSourceFieldMapper.ensureLoaded(sourceLoader.requiredStoredFields(), indexSettings);
402+
var storedFieldLoader = org.elasticsearch.index.fieldvisitor.StoredFieldLoader.create(false, requiredStoredFields);
386403
return new NestedSyntheticFieldLoader(
387404
storedFieldLoader,
388405
sourceLoader,

server/src/main/java/org/elasticsearch/index/mapper/SourceLoader.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,6 @@ public Synthetic(Supplier<SyntheticFieldLoader> fieldLoaderSupplier, SourceField
117117
.storedFieldLoaders()
118118
.map(Map.Entry::getKey)
119119
.collect(Collectors.toSet());
120-
this.requiredStoredFields.add(IgnoredSourceFieldMapper.NAME);
121120
this.metrics = metrics;
122121
}
123122

server/src/test/java/org/elasticsearch/index/mapper/FieldAliasMapperValidationTests.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -184,9 +184,8 @@ private static ObjectMapper createObjectMapper(String name) {
184184
}
185185

186186
private static NestedObjectMapper createNestedObjectMapper(String name) {
187-
return new NestedObjectMapper.Builder(name, IndexVersion.current(), query -> { throw new UnsupportedOperationException(); }).build(
188-
MapperBuilderContext.root(false, false)
189-
);
187+
return new NestedObjectMapper.Builder(name, IndexVersion.current(), query -> { throw new UnsupportedOperationException(); }, null)
188+
.build(MapperBuilderContext.root(false, false));
190189
}
191190

192191
private static MappingLookup createMappingLookup(

0 commit comments

Comments
 (0)