diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/20_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/20_synthetic_source.yml index 0c35df319293e..cd94633a46c8c 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/20_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/20_synthetic_source.yml @@ -229,3 +229,128 @@ synthetic_source text with ignored multi-field and multiple values in the same d - match: { hits.total.value: 1 } - match: { hits.hits.0._source.foo.0: "This value is short" } - match: { hits.hits.0._source.foo.1: "This value is too long and will be ignored" } + +--- +synthetic_source text with normalized keyword with store original value: + - requires: + cluster_features: [ "mapper.source.mode_from_index_setting" ] + reason: "Source mode configured through index setting" + + - do: + indices.create: + index: synthetic_source_test + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + foo: + type: text + fields: + raw: + type: keyword + normalizer: lowercase + normalizer_skip_store_original_value: false + + - do: + index: + index: synthetic_source_test + id: "1" + refresh: true + body: + foo: "Apache LUCENE powers ELASTIC" + + - do: + search: + index: synthetic_source_test + body: + query: + match_phrase: + foo: lucene + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._source.foo: "Apache LUCENE powers ELASTIC" } + +--- +synthetic_source text with normalized keyword with skip store original value: + - requires: + cluster_features: [ "mapper.source.mode_from_index_setting" ] + reason: "Source mode configured through index setting" + + - do: + indices.create: + index: synthetic_source_test + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + foo: + type: text + fields: + raw: + type: keyword + normalizer: lowercase + normalizer_skip_store_original_value: true + + - do: + index: + index: synthetic_source_test + id: "1" + refresh: true + body: + foo: "Apache LUCENE powers ELASTIC" + + - do: + search: + index: synthetic_source_test + body: + query: + match_phrase: + foo: lucene + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._source.foo: "apache lucene powers elastic" } + +--- +synthetic_source text with normalized keyword with default skip store original value: + - requires: + cluster_features: [ "mapper.source.mode_from_index_setting" ] + reason: "Source mode configured through index setting" + + - do: + indices.create: + index: synthetic_source_test + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + foo: + type: text + fields: + raw: + type: keyword + normalizer: lowercase + + - do: + index: + index: synthetic_source_test + id: "1" + refresh: true + body: + foo: "Apache LUCENE powers ELASTIC" + + - do: + search: + index: synthetic_source_test + body: + query: + match_phrase: + foo: lucene + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._source.foo: "apache lucene powers elastic" } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index 6317acaa65bd0..dd35576299a0e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -616,7 +616,8 @@ public Builder add(FieldMapper.Builder builder) { mapperBuilders.put(builder.leafName(), builder::build); if (builder instanceof KeywordFieldMapper.Builder kwd) { - if (kwd.hasNormalizer() == false && (kwd.hasDocValues() || kwd.isStored())) { + if ((kwd.hasNormalizer() == false || kwd.isNormalizerSkipStoreOriginalValue()) + && (kwd.hasDocValues() || kwd.isStored())) { hasSyntheticSourceCompatibleKeywordField = true; } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index 5ddb98699808b..dc307d157bbaa 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -360,6 +360,10 @@ public boolean hasNormalizer() { return this.normalizer.get() != null; } + public boolean isNormalizerSkipStoreOriginalValue() { + return this.normalizerSkipStoreOriginalValue.getValue(); + } + Builder nullValue(String nullValue) { this.nullValue.setValue(nullValue); return this; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index a94825392cc79..c1f1d262c185b 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -1748,7 +1748,8 @@ public static KeywordFieldMapper getKeywordFieldMapperForSyntheticSource(Iterabl */ private static boolean keywordFieldSupportsSyntheticSource(final KeywordFieldMapper keyword) { // the field must be stored in some way, whether that be via store or doc values - return keyword.hasNormalizer() == false && (keyword.fieldType().hasDocValues()) || keyword.fieldType().isStored(); + return (keyword.hasNormalizer() == false || keyword.isNormalizerSkipStoreOriginalValue()) + && (keyword.fieldType().hasDocValues() || keyword.fieldType().isStored()); } } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java index cee149071134f..970dd8886b93f 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java @@ -88,10 +88,14 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.in; import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.nullValue; import static org.hamcrest.core.Is.is; public class TextFieldMapperTests extends MapperTestCase { @@ -451,6 +455,114 @@ public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiFieldBwc() assertThat(fieldType.stored(), is(true)); } + public void testDelegatesSyntheticSourceToKeywordMultiField() throws IOException { + var indexSettingsBuilder = getIndexSettingsBuilder(); + indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic"); + var indexSettings = indexSettingsBuilder.build(); + + var mapping = mapping(b -> { + b.startObject("name"); + b.field("type", "text"); + b.field("store", false); + b.startObject("fields"); + b.startObject("keyword"); + b.field("type", "keyword"); + b.endObject(); + b.endObject(); + b.endObject(); + }); + DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper(); + + var source = source(b -> b.field("name", "QUICK Brown fox")); + ParsedDocument doc = mapper.parse(source); + IndexableField textFallbackField = doc.rootDoc().getField("name._original"); + assertThat(textFallbackField, nullValue()); + IndexableFieldType keywordFieldType = doc.rootDoc().getField("name.keyword").fieldType(); + assertThat(keywordFieldType.docValuesType(), not(is(DocValuesType.NONE))); + + Set ignoredFields = doc.rootDoc() + .getFields(IgnoredSourceFieldMapper.NAME) + .stream() + .flatMap(field -> IgnoredSourceFieldMapper.CoalescedIgnoredSourceEncoding.decode(field.binaryValue()).stream()) + .map(IgnoredSourceFieldMapper.NameValue::name) + .collect(Collectors.toSet()); + assertThat("name", not(in(ignoredFields))); + + assertThat(syntheticSource(mapper, b -> b.field("name", "QUICK Brown fox")), equalTo("{\"name\":\"QUICK Brown fox\"}")); + } + + public void testDoesNotDelegateSyntheticSourceForNormalizedKeywordMultiFieldWhenStoreOriginalValue() throws IOException { + var indexSettingsBuilder = getIndexSettingsBuilder(); + indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic"); + var indexSettings = indexSettingsBuilder.build(); + + var mapping = mapping(b -> { + b.startObject("name"); + b.field("type", "text"); + b.field("store", false); + b.startObject("fields"); + b.startObject("keyword"); + b.field("type", "keyword"); + b.field("normalizer", "lowercase"); + b.field("normalizer_skip_store_original_value", false); + b.endObject(); + b.endObject(); + b.endObject(); + }); + DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper(); + + var source = source(b -> b.field("name", "QUICK Brown fox")); + ParsedDocument doc = mapper.parse(source); + IndexableField textFallbackField = doc.rootDoc().getField("name._original"); + assertThat(textFallbackField, nullValue()); + + Set ignoredFields = doc.rootDoc() + .getFields(IgnoredSourceFieldMapper.NAME) + .stream() + .flatMap(field -> IgnoredSourceFieldMapper.CoalescedIgnoredSourceEncoding.decode(field.binaryValue()).stream()) + .map(IgnoredSourceFieldMapper.NameValue::name) + .collect(Collectors.toSet()); + assertThat("name", in(ignoredFields)); + + assertThat(syntheticSource(mapper, b -> b.field("name", "QUICK Brown fox")), equalTo("{\"name\":\"QUICK Brown fox\"}")); + } + + public void testDelegatesSyntheticSourceForNormalizedKeywordMultiFieldWhenSkipStoreOriginalValue() throws IOException { + var indexSettingsBuilder = getIndexSettingsBuilder(); + indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic"); + var indexSettings = indexSettingsBuilder.build(); + + var mapping = mapping(b -> { + b.startObject("name"); + b.field("type", "text"); + b.field("store", false); + b.startObject("fields"); + b.startObject("keyword"); + b.field("type", "keyword"); + b.field("normalizer", "lowercase"); + b.field("normalizer_skip_store_original_value", true); + b.endObject(); + b.endObject(); + b.endObject(); + }); + DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper(); + + var source = source(b -> b.field("name", "QUICK Brown fox")); + ParsedDocument doc = mapper.parse(source); + IndexableField textFallbackField = doc.rootDoc().getField("name._original"); + assertThat(textFallbackField, nullValue()); + + Set ignoredFields = doc.rootDoc() + .getFields(IgnoredSourceFieldMapper.NAME) + .stream() + .flatMap(field -> IgnoredSourceFieldMapper.CoalescedIgnoredSourceEncoding.decode(field.binaryValue()).stream()) + .map(IgnoredSourceFieldMapper.NameValue::name) + .collect(Collectors.toSet()); + assertThat("name", not(in(ignoredFields))); + + assertThat(syntheticSource(mapper, b -> b.field("name", "QUICK Brown fox")), equalTo("{\"name\":\"quick brown fox\"}")); + } + public void testBWCSerialization() throws IOException { MapperService mapperService = createMapperService(fieldMapping(b -> { b.field("type", "text");