Skip to content

Commit 8d057d8

Browse files
authored
Tweak copy_to handling in synthetic _source to account for nested objects (#120974)
1 parent 8654d6a commit 8d057d8

File tree

4 files changed

+95
-46
lines changed

4 files changed

+95
-46
lines changed

docs/changelog/120974.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 120974
2+
summary: Tweak `copy_to` handling in synthetic `_source` to account for nested objects
3+
area: Mapping
4+
type: bug
5+
issues:
6+
- 120831

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1602,6 +1602,74 @@ synthetic_source with copy_to pointing inside object:
16021602
hits.hits.2.fields:
16031603
c.copy: [ "100", "hello", "zap" ]
16041604

1605+
---
1606+
synthetic_source with copy_to inside nested object:
1607+
- do:
1608+
indices.create:
1609+
index: test
1610+
body:
1611+
settings:
1612+
index:
1613+
mapping.source.mode: synthetic
1614+
mappings:
1615+
properties:
1616+
name:
1617+
type: keyword
1618+
my_values:
1619+
type: nested
1620+
properties:
1621+
k:
1622+
type: keyword
1623+
copy_to: my_values.copy
1624+
second_level:
1625+
type: nested
1626+
properties:
1627+
k2:
1628+
type: keyword
1629+
copy_to: my_values.copy
1630+
copy:
1631+
type: keyword
1632+
dummy:
1633+
type: keyword
1634+
1635+
- do:
1636+
index:
1637+
index: test
1638+
id: 1
1639+
refresh: true
1640+
body:
1641+
name: "A"
1642+
my_values:
1643+
k: "hello"
1644+
1645+
- do:
1646+
index:
1647+
index: test
1648+
id: 2
1649+
refresh: true
1650+
body:
1651+
name: "B"
1652+
my_values:
1653+
second_level:
1654+
k2: "hello"
1655+
1656+
- do:
1657+
search:
1658+
index: test
1659+
sort: name
1660+
1661+
- match:
1662+
hits.hits.0._source:
1663+
name: "A"
1664+
my_values:
1665+
k: "hello"
1666+
- match:
1667+
hits.hits.1._source:
1668+
name: "B"
1669+
my_values:
1670+
second_level:
1671+
k2: "hello"
1672+
16051673
---
16061674
synthetic_source with copy_to pointing to ambiguous field:
16071675
- do:

server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -367,19 +367,6 @@ public final DocumentParserContext maybeCloneForArray(Mapper mapper) throws IOEx
367367
return this;
368368
}
369369

370-
/**
371-
* Creates a sub-context from the current {@link DocumentParserContext} to indicate that the source for the sub-context has been
372-
* recorded and avoid duplicate recording for parts of the sub-context. Applies to synthetic source only.
373-
*/
374-
public final DocumentParserContext cloneWithRecordedSource() throws IOException {
375-
if (canAddIgnoredField()) {
376-
DocumentParserContext subcontext = createChildContext(parent());
377-
subcontext.setRecordedSource(); // Avoids double-storing parts of the source for the same parser subtree.
378-
return subcontext;
379-
}
380-
return this;
381-
}
382-
383370
/**
384371
* Add the given {@code field} to the _field_names field
385372
*
@@ -466,10 +453,6 @@ public boolean isCopyToDestinationField(String name) {
466453
return copyToFields.contains(name);
467454
}
468455

469-
public Set<String> getCopyToFields() {
470-
return copyToFields;
471-
}
472-
473456
/**
474457
* Add a new mapper dynamically created while parsing.
475458
*
@@ -706,6 +689,26 @@ public LuceneDocument doc() {
706689
* @param doc the document to target
707690
*/
708691
public final DocumentParserContext createCopyToContext(String copyToField, LuceneDocument doc) throws IOException {
692+
/*
693+
Mark field as containing copied data meaning it should not be present
694+
in synthetic _source (to be consistent with stored _source).
695+
Ignored source values take precedence over standard synthetic source implementation
696+
so by adding the `XContentDataHelper.voidValue()` entry we disable the field in synthetic source.
697+
Otherwise, it would be constructed f.e. from doc_values which leads to duplicate values
698+
in copied field after reindexing.
699+
*/
700+
if (mappingLookup.isSourceSynthetic() && indexSettings().getSkipIgnoredSourceWrite() == false) {
701+
ObjectMapper parent = root().findParentMapper(copyToField);
702+
// There are scenarios when this is false:
703+
// 1. all values of the field that is the source of copy_to are null
704+
// 2. copy_to points at a field inside a disabled object
705+
// 3. copy_to points at dynamic field which is not yet applied to mapping, we will process it properly after the dynamic update
706+
if (parent != null) {
707+
int offset = parent.isRoot() ? 0 : parent.fullPath().length() + 1;
708+
ignoredFieldValues.add(new IgnoredSourceFieldMapper.NameValue(copyToField, offset, XContentDataHelper.voidValue(), doc));
709+
}
710+
}
711+
709712
ContentPath path = new ContentPath();
710713
XContentParser parser = DotExpandingXContentParser.expandDots(new CopyToParser(copyToField, parser()), path);
711714
return new Wrapper(root(), this) {

server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java

Lines changed: 1 addition & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@
2525

2626
import java.io.IOException;
2727
import java.nio.charset.StandardCharsets;
28-
import java.util.ArrayList;
29-
import java.util.Collection;
3028
import java.util.Collections;
3129
import java.util.Map;
3230
import java.util.Set;
@@ -161,33 +159,7 @@ public void postParse(DocumentParserContext context) {
161159
return;
162160
}
163161

164-
Collection<NameValue> ignoredValuesToWrite = context.getIgnoredFieldValues();
165-
if (context.getCopyToFields().isEmpty() == false && indexSettings.getSkipIgnoredSourceWrite() == false) {
166-
/*
167-
Mark fields as containing copied data meaning they should not be present
168-
in synthetic _source (to be consistent with stored _source).
169-
Ignored source values take precedence over standard synthetic source implementation
170-
so by adding the `XContentDataHelper.voidValue()` entry we disable the field in synthetic source.
171-
Otherwise, it would be constructed f.e. from doc_values which leads to duplicate values
172-
in copied field after reindexing.
173-
*/
174-
var mutableList = new ArrayList<>(ignoredValuesToWrite);
175-
for (String copyToField : context.getCopyToFields()) {
176-
ObjectMapper parent = context.parent().findParentMapper(copyToField);
177-
if (parent == null) {
178-
// There are scenarios when this can happen:
179-
// 1. all values of the field that is the source of copy_to are null
180-
// 2. copy_to points at a field inside a disabled object
181-
// 3. copy_to points at dynamic field which is not yet applied to mapping, we will process it properly on re-parse.
182-
continue;
183-
}
184-
int offset = parent.isRoot() ? 0 : parent.fullPath().length() + 1;
185-
mutableList.add(new IgnoredSourceFieldMapper.NameValue(copyToField, offset, XContentDataHelper.voidValue(), context.doc()));
186-
}
187-
ignoredValuesToWrite = mutableList;
188-
}
189-
190-
for (NameValue nameValue : ignoredValuesToWrite) {
162+
for (NameValue nameValue : context.getIgnoredFieldValues()) {
191163
nameValue.doc().add(new StoredField(NAME, encode(nameValue)));
192164
}
193165
}

0 commit comments

Comments
 (0)