Skip to content

Commit 5791538

Browse files
authored
Fix synthetic source for flattened field when used with ignore_above (elastic#113499) (elastic#113568)
(cherry picked from commit 35fbbec) # Conflicts: # rest-api-spec/build.gradle
1 parent 7870e2d commit 5791538

File tree

11 files changed

+396
-102
lines changed

11 files changed

+396
-102
lines changed

docs/changelog/113499.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 113499
2+
summary: Fix synthetic source for flattened field when used with `ignore_above`
3+
area: Logs
4+
type: bug
5+
issues:
6+
- 112044

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1050,6 +1050,63 @@ flattened field with ignore_above:
10501050

10511051
- is_false: fields
10521052

1053+
1054+
---
1055+
flattened field with ignore_above and arrays:
1056+
- requires:
1057+
cluster_features: ["mapper.flattened.ignore_above_with_arrays_support"]
1058+
reason: requires support of ignore_above synthetic source with arrays
1059+
1060+
- do:
1061+
indices.create:
1062+
index: test
1063+
body:
1064+
mappings:
1065+
_source:
1066+
mode: synthetic
1067+
properties:
1068+
field:
1069+
type: flattened
1070+
ignore_above: 10
1071+
1072+
- do:
1073+
index:
1074+
index: test
1075+
id: 1
1076+
body: |
1077+
{
1078+
"field": [
1079+
{ "key1": { "key2": "key2", "key3": "key3_ignored" }, "key4": "key4_ignored", "key5": { "key6": "key6_ignored" }, "key7": "key7" },
1080+
{ "key1": { "key2": "key12", "key13": "key13_ignored" }, "key4": "key14_ignored", "key15": { "key16": "key16_ignored" }, "key17": [ "key17", "key18" ] }
1081+
]
1082+
}
1083+
1084+
- do:
1085+
get:
1086+
index: test
1087+
id: 1
1088+
1089+
- match: { _index: "test" }
1090+
- match: { _id: "1" }
1091+
- match: { _version: 1 }
1092+
- match: { found: true }
1093+
- match:
1094+
_source:
1095+
field:
1096+
key1:
1097+
key2: [ "key12", "key2" ]
1098+
key3: "key3_ignored"
1099+
key13: "key13_ignored"
1100+
key4: [ "key14_ignored", "key4_ignored" ]
1101+
key5:
1102+
key6: "key6_ignored"
1103+
key7: "key7"
1104+
key15:
1105+
key16: "key16_ignored"
1106+
key17: [ "key17", "key18" ]
1107+
1108+
- is_false: fields
1109+
10531110
---
10541111
completion:
10551112
- requires:

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/540_ignore_above_synthetic_source.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ ignore_above mapping level setting:
4444
---
4545
ignore_above mapping level setting on arrays:
4646
- requires:
47-
cluster_features: [ "mapper.ignore_above_index_level_setting" ]
48-
reason: introduce ignore_above index level setting
47+
cluster_features: [ "mapper.flattened.ignore_above_with_arrays_support" ]
48+
reason: requires support of ignore_above with arrays for flattened fields
4949
- do:
5050
indices.create:
5151
index: test
@@ -80,9 +80,9 @@ ignore_above mapping level setting on arrays:
8080
match_all: {}
8181

8282
- length: { hits.hits: 1 }
83-
#TODO: synthetic source field reconstruction bug (TBD: add link to the issue here)
83+
#TODO: synthetic source field reconstruction bug (TBD: add link to the issue here)
8484
#- match: { hits.hits.0._source.keyword: ["foo bar", "the quick brown fox"] }
85-
- match: { hits.hits.0._source.flattened.value: ["the quick brown fox", "jumps over"] }
85+
- match: { hits.hits.0._source.flattened.value: [ "jumps over", "the quick brown fox" ] }
8686
- match: { hits.hits.0.fields.keyword.0: "foo bar" }
8787
- match: { hits.hits.0.fields.flattened.0.value: "jumps over" }
8888

server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ public Set<NodeFeature> getFeatures() {
4444
FlattenedFieldMapper.IGNORE_ABOVE_SUPPORT,
4545
IndexSettings.IGNORE_ABOVE_INDEX_LEVEL_SETTING,
4646
SourceFieldMapper.SYNTHETIC_SOURCE_COPY_TO_INSIDE_OBJECTS_FIX,
47-
TimeSeriesRoutingHashFieldMapper.TS_ROUTING_HASH_FIELD_PARSES_BYTES_REF
47+
TimeSeriesRoutingHashFieldMapper.TS_ROUTING_HASH_FIELD_PARSES_BYTES_REF,
48+
FlattenedFieldMapper.IGNORE_ABOVE_WITH_ARRAYS_SUPPORT
4849
);
4950
}
5051
}

server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapper.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,11 @@
112112
public final class FlattenedFieldMapper extends FieldMapper {
113113

114114
public static final NodeFeature IGNORE_ABOVE_SUPPORT = new NodeFeature("flattened.ignore_above_support");
115+
public static final NodeFeature IGNORE_ABOVE_WITH_ARRAYS_SUPPORT = new NodeFeature("mapper.flattened.ignore_above_with_arrays_support");
115116

116117
public static final String CONTENT_TYPE = "flattened";
117118
public static final String KEYED_FIELD_SUFFIX = "._keyed";
119+
public static final String KEYED_IGNORED_VALUES_FIELD_SUFFIX = "._keyed._ignored";
118120
public static final String TIME_SERIES_DIMENSIONS_ARRAY_PARAM = "time_series_dimensions";
119121

120122
private static class Defaults {
@@ -835,6 +837,7 @@ private FlattenedFieldMapper(
835837
this.fieldParser = new FlattenedFieldParser(
836838
mappedFieldType.name(),
837839
mappedFieldType.name() + KEYED_FIELD_SUFFIX,
840+
mappedFieldType.name() + KEYED_IGNORED_VALUES_FIELD_SUFFIX,
838841
mappedFieldType,
839842
builder.depthLimit.get(),
840843
builder.ignoreAbove.get(),
@@ -903,7 +906,12 @@ public FieldMapper.Builder getMergeBuilder() {
903906
@Override
904907
protected SyntheticSourceSupport syntheticSourceSupport() {
905908
if (fieldType().hasDocValues()) {
906-
var loader = new FlattenedSortedSetDocValuesSyntheticFieldLoader(fullPath(), fullPath() + "._keyed", leafName());
909+
var loader = new FlattenedSortedSetDocValuesSyntheticFieldLoader(
910+
fullPath(),
911+
fullPath() + KEYED_FIELD_SUFFIX,
912+
ignoreAbove() < Integer.MAX_VALUE ? fullPath() + KEYED_IGNORED_VALUES_FIELD_SUFFIX : null,
913+
leafName()
914+
);
907915

908916
return new SyntheticSourceSupport.Native(loader);
909917
}

server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldParser.java

Lines changed: 19 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,15 @@
1111

1212
import org.apache.lucene.document.Field;
1313
import org.apache.lucene.document.SortedSetDocValuesField;
14+
import org.apache.lucene.document.StoredField;
1415
import org.apache.lucene.document.StringField;
1516
import org.apache.lucene.index.IndexWriter;
1617
import org.apache.lucene.index.IndexableField;
1718
import org.apache.lucene.util.BytesRef;
1819
import org.elasticsearch.common.xcontent.XContentParserUtils;
1920
import org.elasticsearch.index.mapper.ContentPath;
2021
import org.elasticsearch.index.mapper.DocumentParserContext;
21-
import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper;
2222
import org.elasticsearch.index.mapper.MappedFieldType;
23-
import org.elasticsearch.index.mapper.XContentDataHelper;
24-
import org.elasticsearch.xcontent.CopyingXContentParser;
25-
import org.elasticsearch.xcontent.XContentBuilder;
2623
import org.elasticsearch.xcontent.XContentParser;
2724

2825
import java.io.IOException;
@@ -39,6 +36,7 @@ class FlattenedFieldParser {
3936

4037
private final String rootFieldFullPath;
4138
private final String keyedFieldFullPath;
39+
private final String keyedIgnoredValuesFieldFullPath;
4240

4341
private final MappedFieldType fieldType;
4442
private final int depthLimit;
@@ -48,13 +46,15 @@ class FlattenedFieldParser {
4846
FlattenedFieldParser(
4947
String rootFieldFullPath,
5048
String keyedFieldFullPath,
49+
String keyedIgnoredValuesFieldFullPath,
5150
MappedFieldType fieldType,
5251
int depthLimit,
5352
int ignoreAbove,
5453
String nullValue
5554
) {
5655
this.rootFieldFullPath = rootFieldFullPath;
5756
this.keyedFieldFullPath = keyedFieldFullPath;
57+
this.keyedIgnoredValuesFieldFullPath = keyedIgnoredValuesFieldFullPath;
5858
this.fieldType = fieldType;
5959
this.depthLimit = depthLimit;
6060
this.ignoreAbove = ignoreAbove;
@@ -65,36 +65,18 @@ public List<IndexableField> parse(final DocumentParserContext documentParserCont
6565
XContentParser parser = documentParserContext.parser();
6666
XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser);
6767

68-
XContentBuilder rawDataForSyntheticSource = null;
69-
if (documentParserContext.canAddIgnoredField() && ignoreAbove < Integer.MAX_VALUE) {
70-
var copyingParser = new CopyingXContentParser(parser);
71-
rawDataForSyntheticSource = copyingParser.getBuilder();
72-
parser = copyingParser;
73-
}
74-
7568
ContentPath path = new ContentPath();
7669
List<IndexableField> fields = new ArrayList<>();
7770

7871
var context = new Context(parser, documentParserContext);
7972
parseObject(context, path, fields);
80-
if (rawDataForSyntheticSource != null && context.isIgnoredValueEncountered()) {
81-
// One or more inner fields are ignored due to `ignore_above`.
82-
// Because of that we will store whole object as is in order to generate synthetic source.
83-
documentParserContext.addIgnoredField(
84-
IgnoredSourceFieldMapper.NameValue.fromContext(
85-
documentParserContext,
86-
rootFieldFullPath,
87-
XContentDataHelper.encodeXContentBuilder(rawDataForSyntheticSource)
88-
)
89-
);
90-
}
9173

9274
return fields;
9375
}
9476

9577
private void parseObject(Context context, ContentPath path, List<IndexableField> fields) throws IOException {
9678
String currentName = null;
97-
XContentParser parser = context.getParser();
79+
XContentParser parser = context.parser();
9880
while (true) {
9981
XContentParser.Token token = parser.nextToken();
10082
if (token == XContentParser.Token.END_OBJECT) {
@@ -111,7 +93,7 @@ private void parseObject(Context context, ContentPath path, List<IndexableField>
11193
}
11294

11395
private void parseArray(Context context, ContentPath path, String currentName, List<IndexableField> fields) throws IOException {
114-
XContentParser parser = context.getParser();
96+
XContentParser parser = context.parser();
11597
while (true) {
11698
XContentParser.Token token = parser.nextToken();
11799
if (token == XContentParser.Token.END_ARRAY) {
@@ -128,7 +110,7 @@ private void parseFieldValue(
128110
String currentName,
129111
List<IndexableField> fields
130112
) throws IOException {
131-
XContentParser parser = context.getParser();
113+
XContentParser parser = context.parser();
132114
if (token == XContentParser.Token.START_OBJECT) {
133115
path.add(currentName);
134116
validateDepthLimit(path);
@@ -151,19 +133,23 @@ private void parseFieldValue(
151133
}
152134

153135
private void addField(Context context, ContentPath path, String currentName, String value, List<IndexableField> fields) {
154-
if (value.length() > ignoreAbove) {
155-
context.onIgnoredValue();
156-
return;
157-
}
158-
159136
String key = path.pathAsText(currentName);
160137
if (key.contains(SEPARATOR)) {
161138
throw new IllegalArgumentException(
162139
"Keys in [flattened] fields cannot contain the reserved character \\0. Offending key: [" + key + "]."
163140
);
164141
}
142+
165143
String keyedValue = createKeyedValue(key, value);
166144
BytesRef bytesKeyedValue = new BytesRef(keyedValue);
145+
146+
if (value.length() > ignoreAbove) {
147+
if (context.documentParserContext().mappingLookup().isSourceSynthetic()) {
148+
fields.add(new StoredField(keyedIgnoredValuesFieldFullPath, bytesKeyedValue));
149+
}
150+
return;
151+
}
152+
167153
// check the keyed value doesn't exceed the IndexWriter.MAX_TERM_LENGTH limit enforced by Lucene at index time
168154
// in that case we can already throw a more user friendly exception here which includes the offending fields key and value lengths
169155
if (bytesKeyedValue.length > IndexWriter.MAX_TERM_LENGTH) {
@@ -198,10 +184,10 @@ private void addField(Context context, ContentPath path, String currentName, Str
198184
final String keyedFieldName = FlattenedFieldParser.extractKey(bytesKeyedValue).utf8ToString();
199185
if (fieldType.isDimension() && fieldType.dimensions().contains(keyedFieldName)) {
200186
final BytesRef keyedFieldValue = FlattenedFieldParser.extractValue(bytesKeyedValue);
201-
context.getDocumentParserContext()
187+
context.documentParserContext()
202188
.getDimensions()
203189
.addString(rootFieldFullPath + "." + keyedFieldName, keyedFieldValue)
204-
.validate(context.getDocumentParserContext().indexSettings());
190+
.validate(context.documentParserContext().indexSettings());
205191
}
206192
}
207193
}
@@ -239,32 +225,5 @@ static BytesRef extractValue(BytesRef keyedValue) {
239225
return new BytesRef(keyedValue.bytes, valueStart, keyedValue.length - valueStart);
240226
}
241227

242-
private static class Context {
243-
private final XContentParser parser;
244-
private final DocumentParserContext documentParserContext;
245-
246-
private boolean ignoredValueEncountered;
247-
248-
private Context(XContentParser parser, DocumentParserContext documentParserContext) {
249-
this.parser = parser;
250-
this.documentParserContext = documentParserContext;
251-
this.ignoredValueEncountered = false;
252-
}
253-
254-
public XContentParser getParser() {
255-
return parser;
256-
}
257-
258-
public DocumentParserContext getDocumentParserContext() {
259-
return documentParserContext;
260-
}
261-
262-
public void onIgnoredValue() {
263-
this.ignoredValueEncountered = true;
264-
}
265-
266-
public boolean isIgnoredValueEncountered() {
267-
return ignoredValueEncountered;
268-
}
269-
}
228+
private record Context(XContentParser parser, DocumentParserContext documentParserContext) {}
270229
}

server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldSyntheticWriterHelper.java

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
package org.elasticsearch.index.mapper.flattened;
1111

12-
import org.apache.lucene.index.SortedSetDocValues;
1312
import org.apache.lucene.util.BytesRef;
1413
import org.elasticsearch.xcontent.XContentBuilder;
1514

@@ -226,19 +225,23 @@ public boolean equals(Object obj) {
226225
}
227226
}
228227

229-
private final SortedSetDocValues dv;
228+
interface SortedKeyedValues {
229+
BytesRef next() throws IOException;
230+
}
231+
232+
private final SortedKeyedValues sortedKeyedValues;
230233

231-
FlattenedFieldSyntheticWriterHelper(final SortedSetDocValues dv) {
232-
this.dv = dv;
234+
FlattenedFieldSyntheticWriterHelper(final SortedKeyedValues sortedKeyedValues) {
235+
this.sortedKeyedValues = sortedKeyedValues;
233236
}
234237

235238
void write(final XContentBuilder b) throws IOException {
236-
KeyValue curr = new KeyValue(dv.lookupOrd(dv.nextOrd()));
239+
KeyValue curr = new KeyValue(sortedKeyedValues.next());
237240
KeyValue prev = KeyValue.EMPTY;
238241
final List<String> values = new ArrayList<>();
239242
values.add(curr.value());
240-
for (int i = 1; i < dv.docValueCount(); i++) {
241-
KeyValue next = new KeyValue(dv.lookupOrd(dv.nextOrd()));
243+
for (BytesRef nextValue = sortedKeyedValues.next(); nextValue != null; nextValue = sortedKeyedValues.next()) {
244+
KeyValue next = new KeyValue(nextValue);
242245
writeObject(b, curr, next, curr.start(prev), curr.end(next), values);
243246
values.add(next.value());
244247
prev = curr;

0 commit comments

Comments
 (0)