Skip to content

Commit 09f5349

Browse files
Delegate synthetic source to keyword multi-fields when skip_store_original_value (#137229)
With this, text fields will delegate synthetic_source storage to normalized keyword fields when the `normalizer_skip_store_original_value` parameter is set on the keyword field.
1 parent 61025ff commit 09f5349

File tree

5 files changed

+245
-2
lines changed

5 files changed

+245
-2
lines changed

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/20_synthetic_source.yml

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,3 +229,128 @@ synthetic_source text with ignored multi-field and multiple values in the same d
229229
- match: { hits.total.value: 1 }
230230
- match: { hits.hits.0._source.foo.0: "This value is short" }
231231
- match: { hits.hits.0._source.foo.1: "This value is too long and will be ignored" }
232+
233+
---
234+
synthetic_source text with normalized keyword with store original value:
235+
- requires:
236+
cluster_features: [ "mapper.source.mode_from_index_setting" ]
237+
reason: "Source mode configured through index setting"
238+
239+
- do:
240+
indices.create:
241+
index: synthetic_source_test
242+
body:
243+
settings:
244+
index:
245+
mapping.source.mode: synthetic
246+
mappings:
247+
properties:
248+
foo:
249+
type: text
250+
fields:
251+
raw:
252+
type: keyword
253+
normalizer: lowercase
254+
normalizer_skip_store_original_value: false
255+
256+
- do:
257+
index:
258+
index: synthetic_source_test
259+
id: "1"
260+
refresh: true
261+
body:
262+
foo: "Apache LUCENE powers ELASTIC"
263+
264+
- do:
265+
search:
266+
index: synthetic_source_test
267+
body:
268+
query:
269+
match_phrase:
270+
foo: lucene
271+
272+
- match: { hits.total.value: 1 }
273+
- match: { hits.hits.0._source.foo: "Apache LUCENE powers ELASTIC" }
274+
275+
---
276+
synthetic_source text with normalized keyword with skip store original value:
277+
- requires:
278+
cluster_features: [ "mapper.source.mode_from_index_setting" ]
279+
reason: "Source mode configured through index setting"
280+
281+
- do:
282+
indices.create:
283+
index: synthetic_source_test
284+
body:
285+
settings:
286+
index:
287+
mapping.source.mode: synthetic
288+
mappings:
289+
properties:
290+
foo:
291+
type: text
292+
fields:
293+
raw:
294+
type: keyword
295+
normalizer: lowercase
296+
normalizer_skip_store_original_value: true
297+
298+
- do:
299+
index:
300+
index: synthetic_source_test
301+
id: "1"
302+
refresh: true
303+
body:
304+
foo: "Apache LUCENE powers ELASTIC"
305+
306+
- do:
307+
search:
308+
index: synthetic_source_test
309+
body:
310+
query:
311+
match_phrase:
312+
foo: lucene
313+
314+
- match: { hits.total.value: 1 }
315+
- match: { hits.hits.0._source.foo: "apache lucene powers elastic" }
316+
317+
---
318+
synthetic_source text with normalized keyword with default skip store original value:
319+
- requires:
320+
cluster_features: [ "mapper.source.mode_from_index_setting" ]
321+
reason: "Source mode configured through index setting"
322+
323+
- do:
324+
indices.create:
325+
index: synthetic_source_test
326+
body:
327+
settings:
328+
index:
329+
mapping.source.mode: synthetic
330+
mappings:
331+
properties:
332+
foo:
333+
type: text
334+
fields:
335+
raw:
336+
type: keyword
337+
normalizer: lowercase
338+
339+
- do:
340+
index:
341+
index: synthetic_source_test
342+
id: "1"
343+
refresh: true
344+
body:
345+
foo: "Apache LUCENE powers ELASTIC"
346+
347+
- do:
348+
search:
349+
index: synthetic_source_test
350+
body:
351+
query:
352+
match_phrase:
353+
foo: lucene
354+
355+
- match: { hits.total.value: 1 }
356+
- match: { hits.hits.0._source.foo: "apache lucene powers elastic" }

server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -616,7 +616,8 @@ public Builder add(FieldMapper.Builder builder) {
616616
mapperBuilders.put(builder.leafName(), builder::build);
617617

618618
if (builder instanceof KeywordFieldMapper.Builder kwd) {
619-
if (kwd.hasNormalizer() == false && (kwd.hasDocValues() || kwd.isStored())) {
619+
if ((kwd.hasNormalizer() == false || kwd.isNormalizerSkipStoreOriginalValue())
620+
&& (kwd.hasDocValues() || kwd.isStored())) {
620621
hasSyntheticSourceCompatibleKeywordField = true;
621622
}
622623
}

server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,10 @@ public boolean hasNormalizer() {
360360
return this.normalizer.get() != null;
361361
}
362362

363+
public boolean isNormalizerSkipStoreOriginalValue() {
364+
return this.normalizerSkipStoreOriginalValue.getValue();
365+
}
366+
363367
Builder nullValue(String nullValue) {
364368
this.nullValue.setValue(nullValue);
365369
return this;

server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1748,7 +1748,8 @@ public static KeywordFieldMapper getKeywordFieldMapperForSyntheticSource(Iterabl
17481748
*/
17491749
private static boolean keywordFieldSupportsSyntheticSource(final KeywordFieldMapper keyword) {
17501750
// the field must be stored in some way, whether that be via store or doc values
1751-
return keyword.hasNormalizer() == false && (keyword.fieldType().hasDocValues()) || keyword.fieldType().isStored();
1751+
return (keyword.hasNormalizer() == false || keyword.isNormalizerSkipStoreOriginalValue())
1752+
&& (keyword.fieldType().hasDocValues() || keyword.fieldType().isStored());
17521753
}
17531754
}
17541755
}

server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,14 @@
8888
import java.util.List;
8989
import java.util.Map;
9090
import java.util.Set;
91+
import java.util.stream.Collectors;
9192

9293
import static org.hamcrest.Matchers.containsString;
9394
import static org.hamcrest.Matchers.equalTo;
95+
import static org.hamcrest.Matchers.in;
9496
import static org.hamcrest.Matchers.instanceOf;
97+
import static org.hamcrest.Matchers.not;
98+
import static org.hamcrest.Matchers.nullValue;
9599
import static org.hamcrest.core.Is.is;
96100

97101
public class TextFieldMapperTests extends MapperTestCase {
@@ -451,6 +455,114 @@ public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiFieldBwc()
451455
assertThat(fieldType.stored(), is(true));
452456
}
453457

458+
public void testDelegatesSyntheticSourceToKeywordMultiField() throws IOException {
459+
var indexSettingsBuilder = getIndexSettingsBuilder();
460+
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
461+
var indexSettings = indexSettingsBuilder.build();
462+
463+
var mapping = mapping(b -> {
464+
b.startObject("name");
465+
b.field("type", "text");
466+
b.field("store", false);
467+
b.startObject("fields");
468+
b.startObject("keyword");
469+
b.field("type", "keyword");
470+
b.endObject();
471+
b.endObject();
472+
b.endObject();
473+
});
474+
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
475+
476+
var source = source(b -> b.field("name", "QUICK Brown fox"));
477+
ParsedDocument doc = mapper.parse(source);
478+
IndexableField textFallbackField = doc.rootDoc().getField("name._original");
479+
assertThat(textFallbackField, nullValue());
480+
IndexableFieldType keywordFieldType = doc.rootDoc().getField("name.keyword").fieldType();
481+
assertThat(keywordFieldType.docValuesType(), not(is(DocValuesType.NONE)));
482+
483+
Set<String> ignoredFields = doc.rootDoc()
484+
.getFields(IgnoredSourceFieldMapper.NAME)
485+
.stream()
486+
.flatMap(field -> IgnoredSourceFieldMapper.CoalescedIgnoredSourceEncoding.decode(field.binaryValue()).stream())
487+
.map(IgnoredSourceFieldMapper.NameValue::name)
488+
.collect(Collectors.toSet());
489+
assertThat("name", not(in(ignoredFields)));
490+
491+
assertThat(syntheticSource(mapper, b -> b.field("name", "QUICK Brown fox")), equalTo("{\"name\":\"QUICK Brown fox\"}"));
492+
}
493+
494+
public void testDoesNotDelegateSyntheticSourceForNormalizedKeywordMultiFieldWhenStoreOriginalValue() throws IOException {
495+
var indexSettingsBuilder = getIndexSettingsBuilder();
496+
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
497+
var indexSettings = indexSettingsBuilder.build();
498+
499+
var mapping = mapping(b -> {
500+
b.startObject("name");
501+
b.field("type", "text");
502+
b.field("store", false);
503+
b.startObject("fields");
504+
b.startObject("keyword");
505+
b.field("type", "keyword");
506+
b.field("normalizer", "lowercase");
507+
b.field("normalizer_skip_store_original_value", false);
508+
b.endObject();
509+
b.endObject();
510+
b.endObject();
511+
});
512+
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
513+
514+
var source = source(b -> b.field("name", "QUICK Brown fox"));
515+
ParsedDocument doc = mapper.parse(source);
516+
IndexableField textFallbackField = doc.rootDoc().getField("name._original");
517+
assertThat(textFallbackField, nullValue());
518+
519+
Set<String> ignoredFields = doc.rootDoc()
520+
.getFields(IgnoredSourceFieldMapper.NAME)
521+
.stream()
522+
.flatMap(field -> IgnoredSourceFieldMapper.CoalescedIgnoredSourceEncoding.decode(field.binaryValue()).stream())
523+
.map(IgnoredSourceFieldMapper.NameValue::name)
524+
.collect(Collectors.toSet());
525+
assertThat("name", in(ignoredFields));
526+
527+
assertThat(syntheticSource(mapper, b -> b.field("name", "QUICK Brown fox")), equalTo("{\"name\":\"QUICK Brown fox\"}"));
528+
}
529+
530+
public void testDelegatesSyntheticSourceForNormalizedKeywordMultiFieldWhenSkipStoreOriginalValue() throws IOException {
531+
var indexSettingsBuilder = getIndexSettingsBuilder();
532+
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
533+
var indexSettings = indexSettingsBuilder.build();
534+
535+
var mapping = mapping(b -> {
536+
b.startObject("name");
537+
b.field("type", "text");
538+
b.field("store", false);
539+
b.startObject("fields");
540+
b.startObject("keyword");
541+
b.field("type", "keyword");
542+
b.field("normalizer", "lowercase");
543+
b.field("normalizer_skip_store_original_value", true);
544+
b.endObject();
545+
b.endObject();
546+
b.endObject();
547+
});
548+
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
549+
550+
var source = source(b -> b.field("name", "QUICK Brown fox"));
551+
ParsedDocument doc = mapper.parse(source);
552+
IndexableField textFallbackField = doc.rootDoc().getField("name._original");
553+
assertThat(textFallbackField, nullValue());
554+
555+
Set<String> ignoredFields = doc.rootDoc()
556+
.getFields(IgnoredSourceFieldMapper.NAME)
557+
.stream()
558+
.flatMap(field -> IgnoredSourceFieldMapper.CoalescedIgnoredSourceEncoding.decode(field.binaryValue()).stream())
559+
.map(IgnoredSourceFieldMapper.NameValue::name)
560+
.collect(Collectors.toSet());
561+
assertThat("name", not(in(ignoredFields)));
562+
563+
assertThat(syntheticSource(mapper, b -> b.field("name", "QUICK Brown fox")), equalTo("{\"name\":\"quick brown fox\"}"));
564+
}
565+
454566
public void testBWCSerialization() throws IOException {
455567
MapperService mapperService = createMapperService(fieldMapping(b -> {
456568
b.field("type", "text");

0 commit comments

Comments
 (0)