Skip to content

Commit dbbf4bb

Browse files
authored
[8.19] Text field block loader properly handles null values from delegate (elastic#127525) (elastic#127780)
* Text field block loader properly handles null values from delegate (elastic#127525) (cherry picked from commit 0df9d1c) # Conflicts: # docs/reference/elasticsearch/mapping-reference/keyword.md # docs/reference/elasticsearch/mapping-reference/text.md * fix * fix * fix
1 parent 024bf59 commit dbbf4bb

File tree

4 files changed

+76
-32
lines changed

4 files changed

+76
-32
lines changed

docs/reference/mapping/types/keyword.asciidoc

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,43 @@ Will become:
293293
----
294294
// TEST[s/^/{"_source":/ s/\n$/}/]
295295

296+
If `null_value` is configured, `null` values are replaced with the `null_value` in synthetic source.
297+
For example:
298+
[source,console,id=synthetic-source-keyword-example-null-values]
299+
----
300+
PUT idx
301+
{
302+
"settings": {
303+
"index": {
304+
"mapping": {
305+
"source": {
306+
"mode": "synthetic"
307+
}
308+
}
309+
}
310+
},
311+
"mappings": {
312+
"properties": {
313+
"kwd": { "type": "keyword", "null_value": "NA" }
314+
}
315+
}
316+
}
317+
PUT idx/_doc/1
318+
{
319+
"kwd": ["foo", null, "bar"]
320+
}
321+
----
322+
// TEST[s/$/\nGET idx\/_doc\/1?filter_path=_source\n/]
323+
324+
Will become:
325+
326+
[source,console-result]
327+
----
328+
{
329+
"kwd": ["NA", "bar", "foo"]
330+
}
331+
----
332+
// TEST[s/^/{"_source":/ s/\n$/}/]
296333

297334
include::constant-keyword.asciidoc[]
298335

docs/reference/mapping/types/text.asciidoc

Lines changed: 31 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -168,15 +168,23 @@ be changed or removed in a future release. Elastic will work to fix
168168
any issues, but features in technical preview are not subject to the support SLA
169169
of official GA features.
170170

171-
`text` fields support <<synthetic-source,synthetic `_source`>> if they have
172-
a <<keyword-synthetic-source, `keyword`>> sub-field that supports synthetic
173-
`_source` or if the `text` field sets `store` to `true`. Either way, it may
174-
not have <<copy-to,`copy_to`>>.
175-
176-
If using a sub-`keyword` field, then the values are sorted in the same way as
177-
a `keyword` field's values are sorted. By default, that means sorted with
178-
duplicates removed. So:
179-
[source,console,id=synthetic-source-text-example-default]
171+
`text` fields can use a <<keyword-synthetic-source, `keyword`>> sub-field to support
172+
<<synthetic-source,synthetic `_source`>> without storing values of the text field itself.
173+
174+
In this case, the synthetic source of the `text` field will have the same <<synthetic-source-modifications,modifications>> as a `keyword` field.
175+
176+
These modifications can impact usage of `text` fields:
177+
* Reordering text fields can have an effect on <<query-dsl-match-query-phrase,phrase>>
178+
and <<span-queries,span>> queries. See the discussion about
179+
<<position-increment-gap,`position_increment_gap`>> for more detail. You
180+
can avoid this by making sure the `slop` parameter on the phrase queries
181+
is lower than the `position_increment_gap`. This is the default.
182+
* Handling of `null` values is different. `text` fields ignore `null` values,
183+
but `keyword` fields support replacing `null` values with a value specified in the `null_value` parameter.
184+
This replacement is represented in synthetic source.
185+
186+
For example:
187+
[source,console,id=synthetic-source-text-example-multi-field]
180188
----
181189
PUT idx
182190
{
@@ -194,8 +202,9 @@ PUT idx
194202
"text": {
195203
"type": "text",
196204
"fields": {
197-
"raw": {
198-
"type": "keyword"
205+
"kwd": {
206+
"type": "keyword",
207+
"null_value": "NA"
199208
}
200209
}
201210
}
@@ -205,6 +214,7 @@ PUT idx
205214
PUT idx/_doc/1
206215
{
207216
"text": [
217+
null,
208218
"the quick brown fox",
209219
"the quick brown fox",
210220
"jumped over the lazy dog"
@@ -218,19 +228,14 @@ Will become:
218228
----
219229
{
220230
"text": [
231+
"NA",
221232
"jumped over the lazy dog",
222233
"the quick brown fox"
223234
]
224235
}
225236
----
226237
// TEST[s/^/{"_source":/ s/\n$/}/]
227238

228-
NOTE: Reordering text fields can have an effect on <<query-dsl-match-query-phrase,phrase>>
229-
and <<span-queries,span>> queries. See the discussion about
230-
<<position-increment-gap,`position_increment_gap`>> for more detail. You
231-
can avoid this by making sure the `slop` parameter on the phrase queries
232-
is lower than the `position_increment_gap`. This is the default.
233-
234239
If the `text` field sets `store` to true then order and duplicates
235240
are preserved.
236241
[source,console,id=synthetic-source-text-example-stored]
@@ -248,7 +253,15 @@ PUT idx
248253
},
249254
"mappings": {
250255
"properties": {
251-
"text": { "type": "text", "store": true }
256+
"text": {
257+
"type": "text",
258+
"store": true,
259+
"fields": {
260+
"raw": {
261+
"type": "keyword"
262+
}
263+
}
264+
}
252265
}
253266
}
254267
}

server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1075,6 +1075,13 @@ public Builder builder(BlockFactory factory, int expectedCount) {
10751075
* using whatever
10761076
*/
10771077
private BlockSourceReader.LeafIteratorLookup blockReaderDisiLookup(BlockLoaderContext blContext) {
1078+
if (isSyntheticSource && syntheticSourceDelegate != null) {
1079+
// Since we are using synthetic source and a delegate, we can't use this field
1080+
// to determine if the delegate has values in the document (f.e. handling of `null` is different
1081+
// between text and keyword).
1082+
return BlockSourceReader.lookupMatchingAll();
1083+
}
1084+
10781085
if (isIndexed()) {
10791086
if (getTextSearchInfo().hasNorms()) {
10801087
return BlockSourceReader.lookupFromNorms(name());

server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldBlockLoaderTests.java

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -62,15 +62,8 @@ public static Object expectedValue(Map<String, Object> fieldMapping, Object valu
6262
if (params.syntheticSource() && testContext.forceFallbackSyntheticSource() == false && usingSyntheticSourceDelegate) {
6363
var nullValue = (String) keywordMultiFieldMapping.get("null_value");
6464

65-
// Due to how TextFieldMapper#blockReaderDisiLookup works this is complicated.
66-
// If we are using lookupMatchingAll() then we'll see all docs, generate synthetic source using syntheticSourceDelegate,
67-
// parse it and see null_value inside.
68-
// But if we are using lookupFromNorms() we will skip the document (since the text field itself does not exist).
69-
// Same goes for lookupFromFieldNames().
70-
boolean textFieldIndexed = (boolean) fieldMapping.getOrDefault("index", true);
71-
7265
if (value == null) {
73-
if (textFieldIndexed == false && nullValue != null && nullValue.length() <= (int) ignoreAbove) {
66+
if (nullValue != null && nullValue.length() <= (int) ignoreAbove) {
7467
return new BytesRef(nullValue);
7568
}
7669

@@ -82,12 +75,6 @@ public static Object expectedValue(Map<String, Object> fieldMapping, Object valu
8275
}
8376

8477
var values = (List<String>) value;
85-
86-
// See note above about TextFieldMapper#blockReaderDisiLookup.
87-
if (textFieldIndexed && values.stream().allMatch(Objects::isNull)) {
88-
return null;
89-
}
90-
9178
var indexed = values.stream()
9279
.map(s -> s == null ? nullValue : s)
9380
.filter(Objects::nonNull)

0 commit comments

Comments
 (0)