Skip to content

Commit f95f292

Browse files
authored
[8.x] Correctly identify parent of copy_to destination field for synthetic source purposes (#113153) (#113307)
* Correctly identify parent of copy_to destination field for synthetic source purposes (#113153) (cherry picked from commit b9855b8) # Conflicts: # server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java * Comma
1 parent 0aebbb5 commit f95f292

File tree

11 files changed

+657
-147
lines changed

11 files changed

+657
-147
lines changed

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml

Lines changed: 404 additions & 6 deletions
Large diffs are not rendered by default.

server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ public int get() {
123123
private Field version;
124124
private final SeqNoFieldMapper.SequenceIDFields seqID;
125125
private final Set<String> fieldsAppliedFromTemplates;
126+
126127
/**
127128
* Fields that are copied from values of other fields via copy_to.
128129
* This per-document state is needed since it is possible
@@ -453,26 +454,16 @@ public boolean isFieldAppliedFromTemplate(String name) {
453454

454455
public void markFieldAsCopyTo(String fieldName) {
455456
copyToFields.add(fieldName);
456-
if (mappingLookup.isSourceSynthetic() && indexSettings().getSkipIgnoredSourceWrite() == false) {
457-
/*
458-
Mark this field as containing copied data meaning it should not be present
459-
in synthetic _source (to be consistent with stored _source).
460-
Ignored source values take precedence over standard synthetic source implementation
461-
so by adding this nothing entry we "disable" field in synthetic source.
462-
Otherwise, it would be constructed f.e. from doc_values which leads to duplicate values
463-
in copied field after reindexing.
464-
465-
Note that this applies to fields that are copied from fields using ignored source themselves
466-
and therefore we don't check for canAddIgnoredField().
467-
*/
468-
ignoredFieldValues.add(IgnoredSourceFieldMapper.NameValue.fromContext(this, fieldName, XContentDataHelper.nothing()));
469-
}
470457
}
471458

472459
public boolean isCopyToDestinationField(String name) {
473460
return copyToFields.contains(name);
474461
}
475462

463+
public Set<String> getCopyToFields() {
464+
return copyToFields;
465+
}
466+
476467
/**
477468
* Add a new mapper dynamically created while parsing.
478469
*

server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,8 @@
2626
import java.io.IOException;
2727
import java.nio.charset.StandardCharsets;
2828
import java.util.ArrayList;
29+
import java.util.Collection;
2930
import java.util.Collections;
30-
import java.util.Comparator;
31-
import java.util.List;
3231
import java.util.Map;
3332
import java.util.Set;
3433
import java.util.stream.Stream;
@@ -113,6 +112,10 @@ NameValue cloneWithValue(BytesRef value) {
113112
assert value() == null;
114113
return new NameValue(name, parentOffset, value, doc);
115114
}
115+
116+
boolean hasValue() {
117+
return XContentDataHelper.isDataPresent(value);
118+
}
116119
}
117120

118121
static final class IgnoredValuesFieldMapperType extends StringFieldType {
@@ -147,11 +150,38 @@ protected String contentType() {
147150
@Override
148151
public void postParse(DocumentParserContext context) {
149152
// Ignored values are only expected in synthetic mode.
150-
assert context.getIgnoredFieldValues().isEmpty() || context.mappingLookup().isSourceSynthetic();
151-
List<NameValue> ignoredFieldValues = new ArrayList<>(context.getIgnoredFieldValues());
152-
// ensure consistent ordering when retrieving synthetic source
153-
Collections.sort(ignoredFieldValues, Comparator.comparing(NameValue::name));
154-
for (NameValue nameValue : ignoredFieldValues) {
153+
if (context.mappingLookup().isSourceSynthetic() == false) {
154+
assert context.getIgnoredFieldValues().isEmpty();
155+
return;
156+
}
157+
158+
Collection<NameValue> ignoredValuesToWrite = context.getIgnoredFieldValues();
159+
if (context.getCopyToFields().isEmpty() == false && indexSettings.getSkipIgnoredSourceWrite() == false) {
160+
/*
161+
Mark fields as containing copied data meaning they should not be present
162+
in synthetic _source (to be consistent with stored _source).
163+
Ignored source values take precedence over standard synthetic source implementation
164+
so by adding the `XContentDataHelper.voidValue()` entry we disable the field in synthetic source.
165+
Otherwise, it would be constructed f.e. from doc_values which leads to duplicate values
166+
in copied field after reindexing.
167+
*/
168+
var mutableList = new ArrayList<>(ignoredValuesToWrite);
169+
for (String copyToField : context.getCopyToFields()) {
170+
ObjectMapper parent = context.parent().findParentMapper(copyToField);
171+
if (parent == null) {
172+
// There are scenarios when this can happen:
173+
// 1. all values of the field that is the source of copy_to are null
174+
// 2. copy_to points at a field inside a disabled object
175+
// 3. copy_to points at dynamic field which is not yet applied to mapping, we will process it properly on re-parse.
176+
continue;
177+
}
178+
int offset = parent.isRoot() ? 0 : parent.fullPath().length() + 1;
179+
mutableList.add(new IgnoredSourceFieldMapper.NameValue(copyToField, offset, XContentDataHelper.voidValue(), context.doc()));
180+
}
181+
ignoredValuesToWrite = mutableList;
182+
}
183+
184+
for (NameValue nameValue : ignoredValuesToWrite) {
155185
nameValue.doc().add(new StoredField(NAME, encode(nameValue)));
156186
}
157187
}

server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ public Set<NodeFeature> getFeatures() {
4040
Mapper.SYNTHETIC_SOURCE_KEEP_FEATURE,
4141
SourceFieldMapper.SYNTHETIC_SOURCE_WITH_COPY_TO_AND_DOC_VALUES_FALSE_SUPPORT,
4242
SourceFieldMapper.SYNTHETIC_SOURCE_COPY_TO_FIX,
43-
FlattenedFieldMapper.IGNORE_ABOVE_SUPPORT
43+
FlattenedFieldMapper.IGNORE_ABOVE_SUPPORT,
44+
SourceFieldMapper.SYNTHETIC_SOURCE_COPY_TO_INSIDE_OBJECTS_FIX
4445
);
4546
}
4647
}

server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java

Lines changed: 119 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -808,6 +808,42 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep
808808

809809
}
810810

811+
ObjectMapper findParentMapper(String leafFieldPath) {
812+
var pathComponents = leafFieldPath.split("\\.");
813+
int startPathComponent = 0;
814+
815+
ObjectMapper current = this;
816+
String pathInCurrent = leafFieldPath;
817+
818+
while (current != null) {
819+
if (current.mappers.containsKey(pathInCurrent)) {
820+
return current;
821+
}
822+
823+
// Go one level down if possible
824+
var parent = current;
825+
current = null;
826+
827+
var childMapperName = new StringBuilder();
828+
for (int i = startPathComponent; i < pathComponents.length - 1; i++) {
829+
if (childMapperName.isEmpty() == false) {
830+
childMapperName.append(".");
831+
}
832+
childMapperName.append(pathComponents[i]);
833+
834+
var childMapper = parent.mappers.get(childMapperName.toString());
835+
if (childMapper instanceof ObjectMapper objectMapper) {
836+
current = objectMapper;
837+
startPathComponent = i + 1;
838+
pathInCurrent = pathInCurrent.substring(childMapperName.length() + 1);
839+
break;
840+
}
841+
}
842+
}
843+
844+
return null;
845+
}
846+
811847
protected SourceLoader.SyntheticFieldLoader syntheticFieldLoader(Stream<Mapper> mappers, boolean isFragment) {
812848
var fields = mappers.sorted(Comparator.comparing(Mapper::fullPath))
813849
.map(Mapper::syntheticFieldLoader)
@@ -828,10 +864,18 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
828864
private class SyntheticSourceFieldLoader implements SourceLoader.SyntheticFieldLoader {
829865
private final List<SourceLoader.SyntheticFieldLoader> fields;
830866
private final boolean isFragment;
867+
831868
private boolean storedFieldLoadersHaveValues;
832869
private boolean docValuesLoadersHaveValues;
833870
private boolean ignoredValuesPresent;
834871
private List<IgnoredSourceFieldMapper.NameValue> ignoredValues;
872+
// If this loader has anything to write.
873+
// In special cases this can be false even if doc values loaders or stored field loaders
874+
// have values.
875+
// F.e. objects that only contain fields that are destinations of copy_to.
876+
private boolean writersHaveValues;
877+
// Use an ordered map between field names and writers to order writing by field name.
878+
private TreeMap<String, FieldWriter> currentWriters;
835879

836880
private SyntheticSourceFieldLoader(List<SourceLoader.SyntheticFieldLoader> fields, boolean isFragment) {
837881
this.fields = fields;
@@ -882,22 +926,69 @@ public boolean advanceToDoc(int docId) throws IOException {
882926
}
883927
}
884928

929+
@Override
930+
public void prepare() {
931+
if ((storedFieldLoadersHaveValues || docValuesLoadersHaveValues || ignoredValuesPresent) == false) {
932+
writersHaveValues = false;
933+
return;
934+
}
935+
936+
for (var loader : fields) {
937+
// Currently this logic is only relevant for object loaders.
938+
if (loader instanceof ObjectMapper.SyntheticSourceFieldLoader objectSyntheticFieldLoader) {
939+
objectSyntheticFieldLoader.prepare();
940+
}
941+
}
942+
943+
currentWriters = new TreeMap<>();
944+
945+
if (ignoredValues != null && ignoredValues.isEmpty() == false) {
946+
for (IgnoredSourceFieldMapper.NameValue value : ignoredValues) {
947+
if (value.hasValue()) {
948+
writersHaveValues |= true;
949+
}
950+
951+
var existing = currentWriters.get(value.name());
952+
if (existing == null) {
953+
currentWriters.put(value.name(), new FieldWriter.IgnoredSource(value));
954+
} else if (existing instanceof FieldWriter.IgnoredSource isw) {
955+
isw.mergeWith(value);
956+
}
957+
}
958+
}
959+
960+
for (SourceLoader.SyntheticFieldLoader field : fields) {
961+
if (field.hasValue()) {
962+
if (currentWriters.containsKey(field.fieldName()) == false) {
963+
writersHaveValues |= true;
964+
currentWriters.put(field.fieldName(), new FieldWriter.FieldLoader(field));
965+
} else {
966+
// Skip if the field source is stored separately, to avoid double-printing.
967+
// Make sure to reset the state of loader so that values stored inside will not
968+
// be used after this document is finished.
969+
field.reset();
970+
}
971+
}
972+
}
973+
}
974+
885975
@Override
886976
public boolean hasValue() {
887-
return storedFieldLoadersHaveValues || docValuesLoadersHaveValues || ignoredValuesPresent;
977+
return writersHaveValues;
888978
}
889979

890980
@Override
891981
public void write(XContentBuilder b) throws IOException {
892982
if (hasValue() == false) {
893983
return;
894984
}
985+
895986
if (isRoot() && isEnabled() == false) {
896987
// If the root object mapper is disabled, it is expected to contain
897988
// the source encapsulated within a single ignored source value.
898989
assert ignoredValues.size() == 1 : ignoredValues.size();
899990
XContentDataHelper.decodeAndWrite(b, ignoredValues.get(0).value());
900-
ignoredValues = null;
991+
softReset();
901992
return;
902993
}
903994

@@ -907,41 +998,12 @@ public void write(XContentBuilder b) throws IOException {
907998
b.startObject(leafName());
908999
}
9091000

910-
if (ignoredValues != null && ignoredValues.isEmpty() == false) {
911-
// Use an ordered map between field names and writer functions, to order writing by field name.
912-
Map<String, FieldWriter> orderedFields = new TreeMap<>();
913-
for (IgnoredSourceFieldMapper.NameValue value : ignoredValues) {
914-
var existing = orderedFields.get(value.name());
915-
if (existing == null) {
916-
orderedFields.put(value.name(), new FieldWriter.IgnoredSource(value));
917-
} else if (existing instanceof FieldWriter.IgnoredSource isw) {
918-
isw.mergeWith(value);
919-
}
920-
}
921-
for (SourceLoader.SyntheticFieldLoader field : fields) {
922-
if (field.hasValue()) {
923-
if (orderedFields.containsKey(field.fieldName()) == false) {
924-
orderedFields.put(field.fieldName(), new FieldWriter.FieldLoader(field));
925-
} else {
926-
// Skip if the field source is stored separately, to avoid double-printing.
927-
// Make sure to reset the state of loader so that values stored inside will not
928-
// be used after this document is finished.
929-
field.reset();
930-
}
931-
}
932-
}
933-
934-
for (var writer : orderedFields.values()) {
1001+
for (var writer : currentWriters.values()) {
1002+
if (writer.hasValue()) {
9351003
writer.writeTo(b);
9361004
}
937-
ignoredValues = null;
938-
} else {
939-
for (SourceLoader.SyntheticFieldLoader field : fields) {
940-
if (field.hasValue()) {
941-
field.write(b);
942-
}
943-
}
9441005
}
1006+
9451007
b.endObject();
9461008
softReset();
9471009
}
@@ -957,6 +1019,8 @@ private void softReset() {
9571019
storedFieldLoadersHaveValues = false;
9581020
docValuesLoadersHaveValues = false;
9591021
ignoredValuesPresent = false;
1022+
ignoredValues = null;
1023+
writersHaveValues = false;
9601024
}
9611025

9621026
@Override
@@ -986,34 +1050,49 @@ public String fieldName() {
9861050
interface FieldWriter {
9871051
void writeTo(XContentBuilder builder) throws IOException;
9881052

1053+
boolean hasValue();
1054+
9891055
record FieldLoader(SourceLoader.SyntheticFieldLoader loader) implements FieldWriter {
9901056
@Override
9911057
public void writeTo(XContentBuilder builder) throws IOException {
9921058
loader.write(builder);
9931059
}
1060+
1061+
@Override
1062+
public boolean hasValue() {
1063+
return loader.hasValue();
1064+
}
9941065
}
9951066

9961067
class IgnoredSource implements FieldWriter {
9971068
private final String fieldName;
9981069
private final String leafName;
999-
private final List<BytesRef> values;
1070+
private final List<BytesRef> encodedValues;
10001071

10011072
IgnoredSource(IgnoredSourceFieldMapper.NameValue initialValue) {
10021073
this.fieldName = initialValue.name();
10031074
this.leafName = initialValue.getFieldName();
1004-
this.values = new ArrayList<>();
1005-
this.values.add(initialValue.value());
1075+
this.encodedValues = new ArrayList<>();
1076+
if (initialValue.hasValue()) {
1077+
this.encodedValues.add(initialValue.value());
1078+
}
10061079
}
10071080

10081081
@Override
10091082
public void writeTo(XContentBuilder builder) throws IOException {
1010-
XContentDataHelper.writeMerged(builder, leafName, values);
1083+
XContentDataHelper.writeMerged(builder, leafName, encodedValues);
1084+
}
1085+
1086+
@Override
1087+
public boolean hasValue() {
1088+
return encodedValues.isEmpty() == false;
10111089
}
10121090

10131091
public FieldWriter mergeWith(IgnoredSourceFieldMapper.NameValue nameValue) {
10141092
assert Objects.equals(nameValue.name(), fieldName) : "IgnoredSource is merged with wrong field data";
1015-
1016-
values.add(nameValue.value());
1093+
if (nameValue.hasValue()) {
1094+
encodedValues.add(nameValue.value());
1095+
}
10171096
return this;
10181097
}
10191098
}

server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ public class SourceFieldMapper extends MetadataFieldMapper {
4848
"mapper.source.synthetic_source_with_copy_to_and_doc_values_false"
4949
);
5050
public static final NodeFeature SYNTHETIC_SOURCE_COPY_TO_FIX = new NodeFeature("mapper.source.synthetic_source_copy_to_fix");
51+
public static final NodeFeature SYNTHETIC_SOURCE_COPY_TO_INSIDE_OBJECTS_FIX = new NodeFeature(
52+
"mapper.source.synthetic_source_copy_to_inside_objects_fix"
53+
);
5154

5255
public static final String NAME = "_source";
5356
public static final String RECOVERY_SOURCE_NAME = "_recovery_source";

server/src/main/java/org/elasticsearch/index/mapper/SourceLoader.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,9 @@ public void write(LeafStoredFieldLoader storedFieldLoader, int docId, XContentBu
209209
if (docValuesLoader != null) {
210210
docValuesLoader.advanceToDoc(docId);
211211
}
212+
213+
loader.prepare();
214+
212215
// TODO accept a requested xcontent type
213216
if (loader.hasValue()) {
214217
loader.write(b);
@@ -299,6 +302,16 @@ public String fieldName() {
299302
*/
300303
DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException;
301304

305+
/**
306+
Perform any preprocessing needed before producing synthetic source
307+
and deduce whether this mapper (and its children, if any) have values to write.
308+
The expectation is for this method to be called before {@link SyntheticFieldLoader#hasValue()}
309+
and {@link SyntheticFieldLoader#write(XContentBuilder)} are used.
310+
*/
311+
default void prepare() {
312+
// Noop
313+
}
314+
302315
/**
303316
* Has this field loaded any values for this document?
304317
*/

0 commit comments

Comments
 (0)