diff --git a/docs/changelog/112151.yaml b/docs/changelog/112151.yaml new file mode 100644 index 0000000000000..f5cbfd8da07c2 --- /dev/null +++ b/docs/changelog/112151.yaml @@ -0,0 +1,5 @@ +pr: 112151 +summary: Store original source for keywords using a normalizer +area: Logs +type: enhancement +issues: [] diff --git a/docs/reference/mapping/types/date_nanos.asciidoc b/docs/reference/mapping/types/date_nanos.asciidoc index 1a3b390b1690c..3799426c6881b 100644 --- a/docs/reference/mapping/types/date_nanos.asciidoc +++ b/docs/reference/mapping/types/date_nanos.asciidoc @@ -152,8 +152,7 @@ of official GA features. `date_nanos` fields support <> in their default configuration. Synthetic `_source` cannot be used together with -<>, <> set to true -or with <> disabled. +<> or with <> disabled. Synthetic source always sorts `date_nanos` fields. For example: [source,console,id=synthetic-source-date-nanos-example] diff --git a/docs/reference/mapping/types/keyword.asciidoc b/docs/reference/mapping/types/keyword.asciidoc index 59d307c4df0ad..7f5ba1b20481f 100644 --- a/docs/reference/mapping/types/keyword.asciidoc +++ b/docs/reference/mapping/types/keyword.asciidoc @@ -181,7 +181,7 @@ of official GA features. `keyword` fields support <> in their default configuration. Synthetic `_source` cannot be used together with -a <> or <>. +<>. By default, synthetic source sorts `keyword` fields and removes duplicates. For example: diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mget/90_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mget/90_synthetic_source.yml index 2935c0c1c41b5..ff17a92ed0fcc 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mget/90_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mget/90_synthetic_source.yml @@ -46,6 +46,94 @@ keyword: docs.1._source: kwd: bar +--- +keyword with normalizer: + - requires: + cluster_features: [ "mapper.keyword_normalizer_synthetic_source" ] + reason: support for normalizer on keyword fields + - do: + indices.create: + index: test-keyword-with-normalizer + body: + settings: + analysis: + normalizer: + lowercase: + type: custom + filter: + - lowercase + mappings: + _source: + mode: synthetic + properties: + keyword: + type: keyword + normalizer: lowercase + keyword_with_ignore_above: + type: keyword + normalizer: lowercase + ignore_above: 10 + keyword_without_doc_values: + type: keyword + normalizer: lowercase + doc_values: false + + - do: + index: + index: test-keyword-with-normalizer + id: 1 + body: + keyword: "the Quick Brown Fox jumps over the lazy Dog" + keyword_with_ignore_above: "the Quick Brown Fox jumps over the lazy Dog" + keyword_without_doc_values: "the Quick Brown Fox jumps over the lazy Dog" + + - do: + index: + index: test-keyword-with-normalizer + id: 2 + body: + keyword: "The five BOXING wizards jump Quickly" + keyword_with_ignore_above: "The five BOXING wizards jump Quickly" + keyword_without_doc_values: "The five BOXING wizards jump Quickly" + + - do: + index: + index: test-keyword-with-normalizer + id: 3 + body: + keyword: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ] + keyword_with_ignore_above: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ] + keyword_without_doc_values: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ] + + - do: + mget: + index: test-keyword-with-normalizer + body: + ids: [ 1, 2, 3 ] + - match: { docs.0._index: "test-keyword-with-normalizer" } + - match: { docs.0._id: "1" } + - match: + docs.0._source: + keyword: "the Quick Brown Fox jumps over the lazy Dog" + keyword_with_ignore_above: "the Quick Brown Fox jumps over the lazy Dog" + keyword_without_doc_values: "the Quick Brown Fox jumps over the lazy Dog" + + - match: { docs.1._index: "test-keyword-with-normalizer" } + - match: { docs.1._id: "2" } + - match: + docs.1._source: + keyword: "The five BOXING wizards jump Quickly" + keyword_with_ignore_above: "The five BOXING wizards jump Quickly" + keyword_without_doc_values: "The five BOXING wizards jump Quickly" + + - match: { docs.2._index: "test-keyword-with-normalizer" } + - match: { docs.2._id: "3" } + - match: + docs.2._source: + keyword: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ] + keyword_with_ignore_above: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ] + keyword_without_doc_values: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ] + --- stored text: - requires: diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index 9645b4397df4f..d130f37c3e8eb 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -89,6 +89,7 @@ public final class KeywordFieldMapper extends FieldMapper { public static final String CONTENT_TYPE = "keyword"; static final NodeFeature KEYWORD_DIMENSION_IGNORE_ABOVE = new NodeFeature("mapper.keyword_dimension_ignore_above"); + static final NodeFeature KEYWORD_NORMALIZER_SYNTHETIC_SOURCE = new NodeFeature("mapper.keyword_normalizer_synthetic_source"); public static class Defaults { public static final FieldType FIELD_TYPE; @@ -856,7 +857,7 @@ public boolean hasNormalizer() { private final Script script; private final ScriptCompiler scriptCompiler; private final IndexVersion indexCreatedVersion; - private final boolean storeIgnored; + private final boolean isSyntheticSource; private final IndexAnalyzers indexAnalyzers; @@ -866,7 +867,7 @@ private KeywordFieldMapper( KeywordFieldType mappedFieldType, MultiFields multiFields, CopyTo copyTo, - boolean storeIgnored, + boolean isSyntheticSource, Builder builder ) { super(simpleName, mappedFieldType, multiFields, copyTo, builder.script.get() != null, builder.onScriptError.getValue()); @@ -881,7 +882,7 @@ private KeywordFieldMapper( this.indexAnalyzers = builder.indexAnalyzers; this.scriptCompiler = builder.scriptCompiler; this.indexCreatedVersion = builder.indexCreatedVersion; - this.storeIgnored = storeIgnored; + this.isSyntheticSource = isSyntheticSource; } @Override @@ -916,7 +917,7 @@ private void indexValue(DocumentParserContext context, String value) { if (value.length() > fieldType().ignoreAbove()) { context.addIgnoredField(fullPath()); - if (storeIgnored) { + if (isSyntheticSource) { // Save a copy of the field so synthetic source can load it context.doc().add(new StoredField(originalName(), new BytesRef(value))); } @@ -1026,6 +1027,11 @@ private String originalName() { @Override protected SyntheticSourceMode syntheticSourceMode() { + if (hasNormalizer()) { + // NOTE: no matter if we have doc values or not we use a stored field to reconstruct the original value + // whose doc values would be altered by the normalizer + return SyntheticSourceMode.FALLBACK; + } if (fieldType.stored() || hasDocValues) { return SyntheticSourceMode.NATIVE; } @@ -1047,11 +1053,6 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String simpleName) "field [" + fullPath() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to" ); } - if (hasNormalizer()) { - throw new IllegalArgumentException( - "field [" + fullPath() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares a normalizer" - ); - } if (syntheticSourceMode() != SyntheticSourceMode.NATIVE) { return super.syntheticFieldLoader(); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index 6dce9d6c7b86e..63bbef061c61f 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -33,6 +33,7 @@ public Set getFeatures() { NodeMappingStats.SEGMENT_LEVEL_FIELDS_STATS, BooleanFieldMapper.BOOLEAN_DIMENSION, ObjectMapper.SUBOBJECTS_AUTO, + KeywordFieldMapper.KEYWORD_NORMALIZER_SYNTHETIC_SOURCE, SourceFieldMapper.SYNTHETIC_SOURCE_STORED_FIELDS_ADVANCE_FIX ); } diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/KeywordFieldSyntheticSourceSupport.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/KeywordFieldSyntheticSourceSupport.java index 6abe923851318..2f452161b10ca 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/KeywordFieldSyntheticSourceSupport.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/KeywordFieldSyntheticSourceSupport.java @@ -21,8 +21,6 @@ import java.util.stream.Collectors; import java.util.stream.Stream; -import static org.hamcrest.Matchers.equalTo; - public class KeywordFieldSyntheticSourceSupport implements MapperTestCase.SyntheticSourceSupport { private final Integer ignoreAbove; private final boolean allIgnored; @@ -128,11 +126,6 @@ private void mapping(XContentBuilder b) throws IOException { @Override public List invalidExample() throws IOException { - return List.of( - new MapperTestCase.SyntheticSourceInvalidExample( - equalTo("field [field] of type [keyword] doesn't support synthetic source because it declares a normalizer"), - b -> b.field("type", "keyword").field("normalizer", "lowercase") - ) - ); + return List.of(); } }