Skip to content

Commit 214ffb0

Browse files
authored
Tweak copy_to handling in synthetic _source to account for nested objects (#120974) (#121097)
1 parent 730ec3e commit 214ffb0

File tree

4 files changed

+95
-46
lines changed

4 files changed

+95
-46
lines changed

docs/changelog/120974.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 120974
2+
summary: Tweak `copy_to` handling in synthetic `_source` to account for nested objects
3+
area: Mapping
4+
type: bug
5+
issues:
6+
- 120831

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1695,6 +1695,74 @@ synthetic_source with copy_to pointing inside object:
16951695
hits.hits.2.fields:
16961696
c.copy: [ "100", "hello", "zap" ]
16971697

1698+
---
1699+
synthetic_source with copy_to inside nested object:
1700+
- do:
1701+
indices.create:
1702+
index: test
1703+
body:
1704+
settings:
1705+
index:
1706+
mapping.source.mode: synthetic
1707+
mappings:
1708+
properties:
1709+
name:
1710+
type: keyword
1711+
my_values:
1712+
type: nested
1713+
properties:
1714+
k:
1715+
type: keyword
1716+
copy_to: my_values.copy
1717+
second_level:
1718+
type: nested
1719+
properties:
1720+
k2:
1721+
type: keyword
1722+
copy_to: my_values.copy
1723+
copy:
1724+
type: keyword
1725+
dummy:
1726+
type: keyword
1727+
1728+
- do:
1729+
index:
1730+
index: test
1731+
id: 1
1732+
refresh: true
1733+
body:
1734+
name: "A"
1735+
my_values:
1736+
k: "hello"
1737+
1738+
- do:
1739+
index:
1740+
index: test
1741+
id: 2
1742+
refresh: true
1743+
body:
1744+
name: "B"
1745+
my_values:
1746+
second_level:
1747+
k2: "hello"
1748+
1749+
- do:
1750+
search:
1751+
index: test
1752+
sort: name
1753+
1754+
- match:
1755+
hits.hits.0._source:
1756+
name: "A"
1757+
my_values:
1758+
k: "hello"
1759+
- match:
1760+
hits.hits.1._source:
1761+
name: "B"
1762+
my_values:
1763+
second_level:
1764+
k2: "hello"
1765+
16981766
---
16991767
synthetic_source with copy_to pointing to ambiguous field:
17001768
- requires:

server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -367,19 +367,6 @@ public final DocumentParserContext maybeCloneForArray(Mapper mapper) throws IOEx
367367
return this;
368368
}
369369

370-
/**
371-
* Creates a sub-context from the current {@link DocumentParserContext} to indicate that the source for the sub-context has been
372-
* recorded and avoid duplicate recording for parts of the sub-context. Applies to synthetic source only.
373-
*/
374-
public final DocumentParserContext cloneWithRecordedSource() throws IOException {
375-
if (canAddIgnoredField()) {
376-
DocumentParserContext subcontext = createChildContext(parent());
377-
subcontext.setRecordedSource(); // Avoids double-storing parts of the source for the same parser subtree.
378-
return subcontext;
379-
}
380-
return this;
381-
}
382-
383370
/**
384371
* Add the given {@code field} to the _field_names field
385372
*
@@ -466,10 +453,6 @@ public boolean isCopyToDestinationField(String name) {
466453
return copyToFields.contains(name);
467454
}
468455

469-
public Set<String> getCopyToFields() {
470-
return copyToFields;
471-
}
472-
473456
/**
474457
* Add a new mapper dynamically created while parsing.
475458
*
@@ -706,6 +689,26 @@ public LuceneDocument doc() {
706689
* @param doc the document to target
707690
*/
708691
public final DocumentParserContext createCopyToContext(String copyToField, LuceneDocument doc) throws IOException {
692+
/*
693+
Mark field as containing copied data meaning it should not be present
694+
in synthetic _source (to be consistent with stored _source).
695+
Ignored source values take precedence over standard synthetic source implementation
696+
so by adding the `XContentDataHelper.voidValue()` entry we disable the field in synthetic source.
697+
Otherwise, it would be constructed f.e. from doc_values which leads to duplicate values
698+
in copied field after reindexing.
699+
*/
700+
if (mappingLookup.isSourceSynthetic() && indexSettings().getSkipIgnoredSourceWrite() == false) {
701+
ObjectMapper parent = root().findParentMapper(copyToField);
702+
// There are scenarios when this is false:
703+
// 1. all values of the field that is the source of copy_to are null
704+
// 2. copy_to points at a field inside a disabled object
705+
// 3. copy_to points at dynamic field which is not yet applied to mapping, we will process it properly after the dynamic update
706+
if (parent != null) {
707+
int offset = parent.isRoot() ? 0 : parent.fullPath().length() + 1;
708+
ignoredFieldValues.add(new IgnoredSourceFieldMapper.NameValue(copyToField, offset, XContentDataHelper.voidValue(), doc));
709+
}
710+
}
711+
709712
ContentPath path = new ContentPath();
710713
XContentParser parser = DotExpandingXContentParser.expandDots(new CopyToParser(copyToField, parser()), path);
711714
return new Wrapper(root(), this) {

server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java

Lines changed: 1 addition & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@
2525

2626
import java.io.IOException;
2727
import java.nio.charset.StandardCharsets;
28-
import java.util.ArrayList;
29-
import java.util.Collection;
3028
import java.util.Collections;
3129
import java.util.Map;
3230
import java.util.Set;
@@ -159,33 +157,7 @@ public void postParse(DocumentParserContext context) {
159157
return;
160158
}
161159

162-
Collection<NameValue> ignoredValuesToWrite = context.getIgnoredFieldValues();
163-
if (context.getCopyToFields().isEmpty() == false && indexSettings.getSkipIgnoredSourceWrite() == false) {
164-
/*
165-
Mark fields as containing copied data meaning they should not be present
166-
in synthetic _source (to be consistent with stored _source).
167-
Ignored source values take precedence over standard synthetic source implementation
168-
so by adding the `XContentDataHelper.voidValue()` entry we disable the field in synthetic source.
169-
Otherwise, it would be constructed f.e. from doc_values which leads to duplicate values
170-
in copied field after reindexing.
171-
*/
172-
var mutableList = new ArrayList<>(ignoredValuesToWrite);
173-
for (String copyToField : context.getCopyToFields()) {
174-
ObjectMapper parent = context.parent().findParentMapper(copyToField);
175-
if (parent == null) {
176-
// There are scenarios when this can happen:
177-
// 1. all values of the field that is the source of copy_to are null
178-
// 2. copy_to points at a field inside a disabled object
179-
// 3. copy_to points at dynamic field which is not yet applied to mapping, we will process it properly on re-parse.
180-
continue;
181-
}
182-
int offset = parent.isRoot() ? 0 : parent.fullPath().length() + 1;
183-
mutableList.add(new IgnoredSourceFieldMapper.NameValue(copyToField, offset, XContentDataHelper.voidValue(), context.doc()));
184-
}
185-
ignoredValuesToWrite = mutableList;
186-
}
187-
188-
for (NameValue nameValue : ignoredValuesToWrite) {
160+
for (NameValue nameValue : context.getIgnoredFieldValues()) {
189161
nameValue.doc().add(new StoredField(NAME, encode(nameValue)));
190162
}
191163
}

0 commit comments

Comments
 (0)