Skip to content

Commit e80a641

Browse files
authored
[8.16] Track source for objects and fields with [synthetic_source_keep:arrays] in arrays as ignored (#116065) (#116226)
* Track source for objects and fields with [synthetic_source_keep:arrays] in arrays as ignored (#116065) * Track source for objects and fields with [synthetic_source_keep:arrays] in arrays as ignored * Update TransportResumeFollowActionTests.java * rest compat fixes * rest compat fixes * update test (cherry picked from commit 6cf4536) # Conflicts: # rest-api-spec/build.gradle # rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/21_synthetic_source_stored.yml # server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java # server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java # server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java * Update DocumentParserContext.java * fixes
1 parent 9f98d23 commit e80a641

File tree

7 files changed

+133
-258
lines changed

7 files changed

+133
-258
lines changed

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/21_synthetic_source_stored.yml

Lines changed: 6 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -356,8 +356,8 @@ object param - nested object with stored array:
356356
sort: name
357357
- match: { hits.total.value: 2 }
358358
- match: { hits.hits.0._source.name: A }
359-
- match: { hits.hits.0._source.nested_array_regular.0.b.c: [ 10, 100] }
360-
- match: { hits.hits.0._source.nested_array_regular.1.b.c: [ 20, 200] }
359+
- match: { hits.hits.0._source.nested_array_regular.0.b.c: [ 10, 100 ] }
360+
- match: { hits.hits.0._source.nested_array_regular.1.b.c: [ 20, 200 ] }
361361
- match: { hits.hits.1._source.name: B }
362362
- match: { hits.hits.1._source.nested_array_stored.0.b.0.c: 10 }
363363
- match: { hits.hits.1._source.nested_array_stored.0.b.1.c: 100 }
@@ -411,55 +411,6 @@ index param - nested array within array:
411411
- match: { hits.hits.0._source.path.to.some.3.id: [ 1000, 2000 ] }
412412

413413

414-
---
415-
index param - nested array within array - disabled second pass:
416-
- requires:
417-
cluster_features: ["mapper.synthetic_source_keep"]
418-
reason: requires tracking ignored source
419-
420-
- do:
421-
indices.create:
422-
index: test
423-
body:
424-
settings:
425-
index:
426-
synthetic_source:
427-
enable_second_doc_parsing_pass: false
428-
mappings:
429-
_source:
430-
mode: synthetic
431-
properties:
432-
name:
433-
type: keyword
434-
path:
435-
properties:
436-
to:
437-
properties:
438-
some:
439-
synthetic_source_keep: arrays
440-
properties:
441-
id:
442-
type: integer
443-
444-
- do:
445-
bulk:
446-
index: test
447-
refresh: true
448-
body:
449-
- '{ "create": { } }'
450-
- '{ "name": "A", "path": [ { "to": [ { "some" : [ { "id": 10 }, { "id": [1, 3, 2] } ] }, { "some": { "id": 100 } } ] }, { "to": { "some": { "id": [1000, 2000] } } } ] }'
451-
- match: { errors: false }
452-
453-
- do:
454-
search:
455-
index: test
456-
sort: name
457-
- match: { hits.hits.0._source.name: A }
458-
- length: { hits.hits.0._source.path.to.some: 2}
459-
- match: { hits.hits.0._source.path.to.some.0.id: 10 }
460-
- match: { hits.hits.0._source.path.to.some.1.id: [ 1, 3, 2] }
461-
462-
463414
---
464415
# 112156
465416
stored field under object with store_array_source:
@@ -925,8 +876,10 @@ index param - root arrays:
925876
- match: { hits.hits.1._source.obj.1.span.id: "2" }
926877

927878
- match: { hits.hits.2._source.id: 3 }
928-
- match: { hits.hits.2._source.obj_default.trace.id: [aa, bb] }
929-
- match: { hits.hits.2._source.obj_default.span.id: "2" }
879+
- match: { hits.hits.2._source.obj_default.trace.0.id: bb }
880+
- match: { hits.hits.2._source.obj_default.trace.1.id: aa }
881+
- match: { hits.hits.2._source.obj_default.span.0.id: "2" }
882+
- match: { hits.hits.2._source.obj_default.span.1.id: "2" }
930883

931884

932885
---

server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,6 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
188188
FieldMapper.SYNTHETIC_SOURCE_KEEP_INDEX_SETTING,
189189
IgnoredSourceFieldMapper.SKIP_IGNORED_SOURCE_WRITE_SETTING,
190190
IgnoredSourceFieldMapper.SKIP_IGNORED_SOURCE_READ_SETTING,
191-
IndexSettings.SYNTHETIC_SOURCE_SECOND_DOC_PARSING_PASS_SETTING,
192191
SourceFieldMapper.INDEX_MAPPER_SOURCE_MODE_SETTING,
193192

194193
// validate that built-in similarities don't get redefined

server/src/main/java/org/elasticsearch/index/IndexSettings.java

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -656,13 +656,6 @@ public Iterator<Setting<?>> settings() {
656656
Property.Final
657657
);
658658

659-
public static final Setting<Boolean> SYNTHETIC_SOURCE_SECOND_DOC_PARSING_PASS_SETTING = Setting.boolSetting(
660-
"index.synthetic_source.enable_second_doc_parsing_pass",
661-
true,
662-
Property.IndexScope,
663-
Property.Dynamic
664-
);
665-
666659
/**
667660
* Returns <code>true</code> if TSDB encoding is enabled. The default is <code>true</code>
668661
*/
@@ -832,7 +825,6 @@ private void setRetentionLeaseMillis(final TimeValue retentionLease) {
832825
private volatile long mappingDimensionFieldsLimit;
833826
private volatile boolean skipIgnoredSourceWrite;
834827
private volatile boolean skipIgnoredSourceRead;
835-
private volatile boolean syntheticSourceSecondDocParsingPassEnabled;
836828
private final SourceFieldMapper.Mode indexMappingSourceMode;
837829
private final boolean recoverySourceEnabled;
838830

@@ -995,7 +987,6 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
995987
es87TSDBCodecEnabled = scopedSettings.get(TIME_SERIES_ES87TSDB_CODEC_ENABLED_SETTING);
996988
skipIgnoredSourceWrite = scopedSettings.get(IgnoredSourceFieldMapper.SKIP_IGNORED_SOURCE_WRITE_SETTING);
997989
skipIgnoredSourceRead = scopedSettings.get(IgnoredSourceFieldMapper.SKIP_IGNORED_SOURCE_READ_SETTING);
998-
syntheticSourceSecondDocParsingPassEnabled = scopedSettings.get(SYNTHETIC_SOURCE_SECOND_DOC_PARSING_PASS_SETTING);
999990
indexMappingSourceMode = scopedSettings.get(SourceFieldMapper.INDEX_MAPPER_SOURCE_MODE_SETTING);
1000991
recoverySourceEnabled = RecoverySettings.INDICES_RECOVERY_SOURCE_ENABLED_SETTING.get(nodeSettings);
1001992

@@ -1085,10 +1076,6 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
10851076
this::setSkipIgnoredSourceWrite
10861077
);
10871078
scopedSettings.addSettingsUpdateConsumer(IgnoredSourceFieldMapper.SKIP_IGNORED_SOURCE_READ_SETTING, this::setSkipIgnoredSourceRead);
1088-
scopedSettings.addSettingsUpdateConsumer(
1089-
SYNTHETIC_SOURCE_SECOND_DOC_PARSING_PASS_SETTING,
1090-
this::setSyntheticSourceSecondDocParsingPassEnabled
1091-
);
10921079
}
10931080

10941081
private void setSearchIdleAfter(TimeValue searchIdleAfter) {
@@ -1681,14 +1668,6 @@ private void setSkipIgnoredSourceRead(boolean value) {
16811668
this.skipIgnoredSourceRead = value;
16821669
}
16831670

1684-
private void setSyntheticSourceSecondDocParsingPassEnabled(boolean syntheticSourceSecondDocParsingPassEnabled) {
1685-
this.syntheticSourceSecondDocParsingPassEnabled = syntheticSourceSecondDocParsingPassEnabled;
1686-
}
1687-
1688-
public boolean isSyntheticSourceSecondDocParsingPassEnabled() {
1689-
return syntheticSourceSecondDocParsingPassEnabled;
1690-
}
1691-
16921671
public SourceFieldMapper.Mode getIndexMappingSourceMode() {
16931672
return indexMappingSourceMode;
16941673
}

server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java

Lines changed: 9 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -35,16 +35,13 @@
3535

3636
import java.io.IOException;
3737
import java.util.ArrayList;
38-
import java.util.Collection;
3938
import java.util.Collections;
4039
import java.util.HashMap;
41-
import java.util.HashSet;
4240
import java.util.Iterator;
4341
import java.util.LinkedList;
4442
import java.util.List;
4543
import java.util.Map;
4644
import java.util.Optional;
47-
import java.util.Set;
4845
import java.util.function.Consumer;
4946

5047
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT;
@@ -148,9 +145,6 @@ private void internalParseDocument(MetadataFieldMapper[] metadataFieldsMappers,
148145

149146
executeIndexTimeScripts(context);
150147

151-
// Record additional entries for {@link IgnoredSourceFieldMapper} before calling #postParse, so that they get stored.
152-
addIgnoredSourceMissingValues(context);
153-
154148
for (MetadataFieldMapper metadataMapper : metadataFieldsMappers) {
155149
metadataMapper.postParse(context);
156150
}
@@ -159,128 +153,6 @@ private void internalParseDocument(MetadataFieldMapper[] metadataFieldsMappers,
159153
}
160154
}
161155

162-
private void addIgnoredSourceMissingValues(DocumentParserContext context) throws IOException {
163-
Collection<IgnoredSourceFieldMapper.NameValue> ignoredFieldsMissingValues = context.getIgnoredFieldsMissingValues();
164-
if (ignoredFieldsMissingValues.isEmpty()) {
165-
return;
166-
}
167-
168-
// Clean up any conflicting ignored values, to avoid double-printing them as array elements in synthetic source.
169-
Map<String, IgnoredSourceFieldMapper.NameValue> fields = new HashMap<>(ignoredFieldsMissingValues.size());
170-
for (var field : ignoredFieldsMissingValues) {
171-
fields.put(field.name(), field);
172-
}
173-
context.deduplicateIgnoredFieldValues(fields.keySet());
174-
175-
assert context.mappingLookup().isSourceSynthetic();
176-
try (
177-
XContentParser parser = XContentHelper.createParser(
178-
parserConfiguration,
179-
context.sourceToParse().source(),
180-
context.sourceToParse().getXContentType()
181-
)
182-
) {
183-
DocumentParserContext newContext = new RootDocumentParserContext(
184-
context.mappingLookup(),
185-
mappingParserContext,
186-
context.sourceToParse(),
187-
parser
188-
);
189-
var nameValues = parseDocForMissingValues(newContext, fields);
190-
for (var nameValue : nameValues) {
191-
context.addIgnoredField(nameValue);
192-
}
193-
}
194-
}
195-
196-
/**
197-
* Simplified parsing version for retrieving the source of a given set of fields.
198-
*/
199-
private static List<IgnoredSourceFieldMapper.NameValue> parseDocForMissingValues(
200-
DocumentParserContext context,
201-
Map<String, IgnoredSourceFieldMapper.NameValue> fields
202-
) throws IOException {
203-
// Generate all possible parent names for the given fields.
204-
// This is used to skip processing objects that can't generate missing values.
205-
Set<String> parentNames = getPossibleParentNames(fields.keySet());
206-
List<IgnoredSourceFieldMapper.NameValue> result = new ArrayList<>();
207-
208-
XContentParser parser = context.parser();
209-
XContentParser.Token currentToken = parser.nextToken();
210-
List<String> path = new ArrayList<>();
211-
List<Boolean> isObjectInPath = new ArrayList<>(); // Tracks if path components correspond to an object or an array.
212-
String fieldName = null;
213-
while (currentToken != null) {
214-
while (currentToken != XContentParser.Token.FIELD_NAME) {
215-
if (fieldName != null
216-
&& (currentToken == XContentParser.Token.START_OBJECT || currentToken == XContentParser.Token.START_ARRAY)) {
217-
if (parentNames.contains(getCurrentPath(path, fieldName)) == false) {
218-
// No missing value under this parsing subtree, skip it.
219-
parser.skipChildren();
220-
} else {
221-
path.add(fieldName);
222-
isObjectInPath.add(currentToken == XContentParser.Token.START_OBJECT);
223-
}
224-
fieldName = null;
225-
} else if (currentToken == XContentParser.Token.END_OBJECT || currentToken == XContentParser.Token.END_ARRAY) {
226-
// Remove the path, if the scope type matches the one when the path was added.
227-
if (isObjectInPath.isEmpty() == false
228-
&& (isObjectInPath.get(isObjectInPath.size() - 1) && currentToken == XContentParser.Token.END_OBJECT
229-
|| isObjectInPath.get(isObjectInPath.size() - 1) == false && currentToken == XContentParser.Token.END_ARRAY)) {
230-
path.remove(path.size() - 1);
231-
isObjectInPath.remove(isObjectInPath.size() - 1);
232-
}
233-
fieldName = null;
234-
}
235-
currentToken = parser.nextToken();
236-
if (currentToken == null) {
237-
return result;
238-
}
239-
}
240-
fieldName = parser.currentName();
241-
String fullName = getCurrentPath(path, fieldName);
242-
var leaf = fields.get(fullName); // There may be multiple matches for array elements, don't use #remove.
243-
if (leaf != null) {
244-
parser.nextToken(); // Advance the parser to the value to be read.
245-
result.add(leaf.cloneWithValue(context.encodeFlattenedToken()));
246-
fieldName = null;
247-
}
248-
currentToken = parser.nextToken();
249-
}
250-
return result;
251-
}
252-
253-
private static String getCurrentPath(List<String> path, String fieldName) {
254-
assert fieldName != null;
255-
return path.isEmpty() ? fieldName : String.join(".", path) + "." + fieldName;
256-
}
257-
258-
/**
259-
* Generates all possible parent object names for the given full names.
260-
* For instance, for input ['path.to.foo', 'another.path.to.bar'], it returns:
261-
* [ 'path', 'path.to', 'another', 'another.path', 'another.path.to' ]
262-
*/
263-
private static Set<String> getPossibleParentNames(Set<String> fullPaths) {
264-
if (fullPaths.isEmpty()) {
265-
return Collections.emptySet();
266-
}
267-
Set<String> paths = new HashSet<>();
268-
for (String fullPath : fullPaths) {
269-
String[] split = fullPath.split("\\.");
270-
if (split.length < 2) {
271-
continue;
272-
}
273-
StringBuilder builder = new StringBuilder(split[0]);
274-
paths.add(builder.toString());
275-
for (int i = 1; i < split.length - 1; i++) {
276-
builder.append(".");
277-
builder.append(split[i]);
278-
paths.add(builder.toString());
279-
}
280-
}
281-
return paths;
282-
}
283-
284156
private static void executeIndexTimeScripts(DocumentParserContext context) {
285157
List<FieldMapper> indexTimeScriptMappers = context.mappingLookup().indexTimeScriptMappers();
286158
if (indexTimeScriptMappers.isEmpty()) {
@@ -426,7 +298,10 @@ static void parseObjectOrNested(DocumentParserContext context) throws IOExceptio
426298
throwOnConcreteValue(context.parent(), currentFieldName, context);
427299
}
428300

429-
if (context.canAddIgnoredField() && getSourceKeepMode(context, context.parent().sourceKeepMode()) == Mapper.SourceKeepMode.ALL) {
301+
var sourceKeepMode = getSourceKeepMode(context, context.parent().sourceKeepMode());
302+
if (context.canAddIgnoredField()
303+
&& (sourceKeepMode == Mapper.SourceKeepMode.ALL
304+
|| (sourceKeepMode == Mapper.SourceKeepMode.ARRAYS && context.inArrayScope()))) {
430305
context = context.addIgnoredFieldFromContext(
431306
new IgnoredSourceFieldMapper.NameValue(
432307
context.parent().fullPath(),
@@ -571,9 +446,11 @@ static void parseObjectOrField(DocumentParserContext context, Mapper mapper) thr
571446
parseObjectOrNested(context.createFlattenContext(currentFieldName));
572447
context.path().add(currentFieldName);
573448
} else {
449+
var sourceKeepMode = getSourceKeepMode(context, fieldMapper.sourceKeepMode());
574450
if (context.canAddIgnoredField()
575451
&& (fieldMapper.syntheticSourceMode() == FieldMapper.SyntheticSourceMode.FALLBACK
576-
|| getSourceKeepMode(context, fieldMapper.sourceKeepMode()) == Mapper.SourceKeepMode.ALL
452+
|| sourceKeepMode == Mapper.SourceKeepMode.ALL
453+
|| (sourceKeepMode == Mapper.SourceKeepMode.ARRAYS && context.inArrayScope())
577454
|| (context.isWithinCopyTo() == false && context.isCopyToDestinationField(mapper.fullPath())))) {
578455
context = context.addIgnoredFieldFromContext(
579456
IgnoredSourceFieldMapper.NameValue.fromContext(context, fieldMapper.fullPath(), null)
@@ -810,8 +687,8 @@ private static void parseNonDynamicArray(
810687
boolean objectWithFallbackSyntheticSource = false;
811688
if (mapper instanceof ObjectMapper objectMapper) {
812689
mode = getSourceKeepMode(context, objectMapper.sourceKeepMode());
813-
objectWithFallbackSyntheticSource = (mode == Mapper.SourceKeepMode.ALL
814-
|| (mode == Mapper.SourceKeepMode.ARRAYS && objectMapper instanceof NestedObjectMapper == false));
690+
objectWithFallbackSyntheticSource = mode == Mapper.SourceKeepMode.ALL
691+
|| (mode == Mapper.SourceKeepMode.ARRAYS && objectMapper instanceof NestedObjectMapper == false);
815692
}
816693
boolean fieldWithFallbackSyntheticSource = false;
817694
boolean fieldWithStoredArraySource = false;

0 commit comments

Comments
 (0)