diff --git a/docs/reference/elasticsearch/mapping-reference/keyword.md b/docs/reference/elasticsearch/mapping-reference/keyword.md index c392a71b59499..51441abcd5213 100644 --- a/docs/reference/elasticsearch/mapping-reference/keyword.md +++ b/docs/reference/elasticsearch/mapping-reference/keyword.md @@ -232,6 +232,42 @@ Will become: } ``` +If `null_value` is configured, `null` values are replaced with the `null_value` in synthetic source: + +$$$synthetic-source-keyword-example-null-value$$$ + +```console +PUT idx +{ + "settings": { + "index": { + "mapping": { + "source": { + "mode": "synthetic" + } + } + } + }, + "mappings": { + "properties": { + "kwd": { "type": "keyword", "null_value": "NA" } + } + } +} +PUT idx/_doc/1 +{ + "kwd": ["foo", null, "bar"] +} +``` + +Will become: + +```console-result +{ + "kwd": ["bar", "foo", "NA"] +} +``` + ## Constant keyword field type [constant-keyword-field-type] diff --git a/docs/reference/elasticsearch/mapping-reference/text.md b/docs/reference/elasticsearch/mapping-reference/text.md index 7295f2233d777..ff7d698c4bf61 100644 --- a/docs/reference/elasticsearch/mapping-reference/text.md +++ b/docs/reference/elasticsearch/mapping-reference/text.md @@ -104,11 +104,17 @@ Synthetic `_source` is Generally Available only for TSDB indices (indices that h :::: -`text` fields support [synthetic `_source`](/reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source) if they have a [`keyword`](/reference/elasticsearch/mapping-reference/keyword.md#keyword-synthetic-source) sub-field that supports synthetic `_source` or if the `text` field sets `store` to `true`. Either way, it may not have [`copy_to`](/reference/elasticsearch/mapping-reference/copy-to.md). +`text` fields can use a [`keyword`](/reference/elasticsearch/mapping-reference/keyword.md#keyword-synthetic-source) sub-field to support [synthetic `_source`](/reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source) without storing values of the text field itself. -If using a sub-`keyword` field, then the values are sorted in the same way as a `keyword` field’s values are sorted. By default, that means sorted with duplicates removed. So: +In this case, the synthetic source of the `text` field will have the same [modifications](/reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source) as a `keyword` field. -$$$synthetic-source-text-example-default$$$ +These modifications can impact usage of `text` fields: +* Reordering text fields can have an effect on [phrase](/reference/query-languages/query-dsl/query-dsl-match-query-phrase.md) and [span](/reference/query-languages/query-dsl/span-queries.md) queries. See the discussion about [`position_increment_gap`](/reference/elasticsearch/mapping-reference/position-increment-gap.md) for more details. You can avoid this by making sure the `slop` parameter on the phrase queries is lower than the `position_increment_gap`. This is the default. +* Handling of `null` values is different. `text` fields ignore `null` values, but `keyword` fields support replacing `null` values with a value specified in the `null_value` parameter. This replacement is represented in synthetic source. + +For example: + +$$$synthetic-source-text-example-multi-field$$$ ```console PUT idx @@ -127,8 +133,9 @@ PUT idx "text": { "type": "text", "fields": { - "raw": { - "type": "keyword" + "kwd": { + "type": "keyword", + "null_value": "NA" } } } @@ -138,9 +145,10 @@ PUT idx PUT idx/_doc/1 { "text": [ + null, "the quick brown fox", "the quick brown fox", - "jumped over the lazy dog" + "jumped over the lazy dog", ] } ``` @@ -150,18 +158,15 @@ Will become: ```console-result { "text": [ - "jumped over the lazy dog", + "jumped over the lazy dog" + "NA", "the quick brown fox" ] } ``` -::::{note} -Reordering text fields can have an effect on [phrase](/reference/query-languages/query-dsl/query-dsl-match-query-phrase.md) and [span](/reference/query-languages/query-dsl/span-queries.md) queries. See the discussion about [`position_increment_gap`](/reference/elasticsearch/mapping-reference/position-increment-gap.md) for more detail. You can avoid this by making sure the `slop` parameter on the phrase queries is lower than the `position_increment_gap`. This is the default. -:::: - -If the `text` field sets `store` to true then order and duplicates are preserved. +If the `text` field sets `store` to `true` then the sub-field is not used and no modifications are applied. For example: $$$synthetic-source-text-example-stored$$$ @@ -179,7 +184,15 @@ PUT idx }, "mappings": { "properties": { - "text": { "type": "text", "store": true } + "text": { + "type": "text", + "store": true, + "fields": { + "raw": { + "type": "keyword" + } + } + } } } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index d77817915f387..08cef586e1438 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -1087,6 +1087,13 @@ public Builder builder(BlockFactory factory, int expectedCount) { * using whatever */ private BlockSourceReader.LeafIteratorLookup blockReaderDisiLookup(BlockLoaderContext blContext) { + if (isSyntheticSource && syntheticSourceDelegate != null) { + // Since we are using synthetic source and a delegate, we can't use this field + // to determine if the delegate has values in the document (f.e. handling of `null` is different + // between text and keyword). + return BlockSourceReader.lookupMatchingAll(); + } + if (isIndexed()) { if (getTextSearchInfo().hasNorms()) { return BlockSourceReader.lookupFromNorms(name()); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldBlockLoaderTests.java b/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldBlockLoaderTests.java index 5c9acaf18a45d..77c42740451ee 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldBlockLoaderTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldBlockLoaderTests.java @@ -62,15 +62,8 @@ public static Object expectedValue(Map fieldMapping, Object valu if (params.syntheticSource() && testContext.forceFallbackSyntheticSource() == false && usingSyntheticSourceDelegate) { var nullValue = (String) keywordMultiFieldMapping.get("null_value"); - // Due to how TextFieldMapper#blockReaderDisiLookup works this is complicated. - // If we are using lookupMatchingAll() then we'll see all docs, generate synthetic source using syntheticSourceDelegate, - // parse it and see null_value inside. - // But if we are using lookupFromNorms() we will skip the document (since the text field itself does not exist). - // Same goes for lookupFromFieldNames(). - boolean textFieldIndexed = (boolean) fieldMapping.getOrDefault("index", true); - if (value == null) { - if (textFieldIndexed == false && nullValue != null && nullValue.length() <= (int) ignoreAbove) { + if (nullValue != null && nullValue.length() <= (int) ignoreAbove) { return new BytesRef(nullValue); } @@ -82,12 +75,6 @@ public static Object expectedValue(Map fieldMapping, Object valu } var values = (List) value; - - // See note above about TextFieldMapper#blockReaderDisiLookup. - if (textFieldIndexed && values.stream().allMatch(Objects::isNull)) { - return null; - } - var indexed = values.stream() .map(s -> s == null ? nullValue : s) .filter(Objects::nonNull)