From f0b206a8c7b4c56dcb28cc4719ebf3be63a00763 Mon Sep 17 00:00:00 2001 From: gmarouli Date: Tue, 16 Sep 2025 12:15:16 +0300 Subject: [PATCH 1/6] Bug fix: Facilitate second retrieval of the same value (#134770) --- .../xcontent/provider/json/ESUTF8StreamJsonParser.java | 8 ++++++++ .../provider/json/ESUTF8StreamJsonParserTests.java | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParser.java b/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParser.java index bf33f2b3ae663..fc10a5cda8072 100644 --- a/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParser.java +++ b/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParser.java @@ -28,6 +28,7 @@ public class ESUTF8StreamJsonParser extends UTF8StreamJsonParser implements OptimizedTextCapable { protected int stringEnd = -1; protected int stringLength; + protected byte[] lastOptimisedValue; private final List backslashes = new ArrayList<>(); @@ -53,6 +54,9 @@ public ESUTF8StreamJsonParser( @Override public Text getValueAsText() throws IOException { if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete) { + if (lastOptimisedValue != null) { + return new Text(new XContentString.UTF8Bytes(lastOptimisedValue), stringLength); + } if (stringEnd > 0) { final int len = stringEnd - 1 - _inputPtr; return new Text(new XContentString.UTF8Bytes(_inputBuffer, _inputPtr, len), stringLength); @@ -137,6 +141,7 @@ protected Text _finishAndReturnText() throws IOException { copyPtr = backslash + 1; } System.arraycopy(inputBuffer, copyPtr, buff, destPtr, ptr - copyPtr); + lastOptimisedValue = buff; return new Text(new XContentString.UTF8Bytes(buff), stringLength); } } @@ -146,6 +151,7 @@ public JsonToken nextToken() throws IOException { if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete && stringEnd > 0) { _inputPtr = stringEnd; _tokenIncomplete = false; + lastOptimisedValue = null; } stringEnd = -1; return super.nextToken(); @@ -156,6 +162,7 @@ public boolean nextFieldName(SerializableString str) throws IOException { if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete && stringEnd > 0) { _inputPtr = stringEnd; _tokenIncomplete = false; + lastOptimisedValue = null; } stringEnd = -1; return super.nextFieldName(str); @@ -166,6 +173,7 @@ public String nextFieldName() throws IOException { if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete && stringEnd > 0) { _inputPtr = stringEnd; _tokenIncomplete = false; + lastOptimisedValue = null; } stringEnd = -1; return super.nextFieldName(); diff --git a/libs/x-content/impl/src/test/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParserTests.java b/libs/x-content/impl/src/test/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParserTests.java index de74def939723..a6cd1fb8d9686 100644 --- a/libs/x-content/impl/src/test/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParserTests.java +++ b/libs/x-content/impl/src/test/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParserTests.java @@ -65,6 +65,10 @@ public void testGetValueAsText() throws IOException { var text = parser.getValueAsText(); assertThat(text, Matchers.notNullValue()); assertTextRef(text.bytes(), "bar\"baz\""); + // Retrieve the value for a second time to ensure the last value is available + text = parser.getValueAsText(); + assertThat(text, Matchers.notNullValue()); + assertTextRef(text.bytes(), "bar\"baz\""); }); testParseJson("{\"foo\": \"b\\u00e5r\"}", parser -> { From 87f9dc9a3cb86b13a0efc8e72d2077c210a254ed Mon Sep 17 00:00:00 2001 From: Mary Gouseti Date: Tue, 16 Sep 2025 12:27:02 +0300 Subject: [PATCH 2/6] Update docs/changelog/134790.yaml --- docs/changelog/134790.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 docs/changelog/134790.yaml diff --git a/docs/changelog/134790.yaml b/docs/changelog/134790.yaml new file mode 100644 index 0000000000000..d59f0acb4bc06 --- /dev/null +++ b/docs/changelog/134790.yaml @@ -0,0 +1,6 @@ +pr: 134790 +summary: "Bug fix: Facilitate second retrieval of the same value" +area: Infra/Core +type: bug +issues: + - 134770 From 8f9533064237a54dee2609a37bc872dfd29a6f44 Mon Sep 17 00:00:00 2001 From: gmarouli Date: Tue, 16 Sep 2025 14:55:16 +0300 Subject: [PATCH 3/6] Add yaml test to capture the failing use case --- .../test/mapping/30_multi_field_keyword.yml | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/30_multi_field_keyword.yml diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/30_multi_field_keyword.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/30_multi_field_keyword.yml new file mode 100644 index 0000000000000..18da1faaedbad --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/30_multi_field_keyword.yml @@ -0,0 +1,45 @@ +--- +Keyword with escaped characters as multi-field: + - do: + indices.create: + index: test + body: + mappings: + properties: + foo: + type: keyword + fields: + bar: + type: keyword + + - do: + index: + index: test + id: "1" + refresh: true + body: + foo: "c:\\windows\\system32\\svchost.exe" + + - do: + search: + index: test + body: + query: + term: + foo: "c:\\windows\\system32\\svchost.exe" + + - match: { "hits.total.value": 1 } + - match: + hits.hits.0._source.foo: "c:\\windows\\system32\\svchost.exe" + + - do: + search: + index: test + body: + query: + term: + foo.bar: "c:\\windows\\system32\\svchost.exe" + + - match: { "hits.total.value": 1 } + - match: + hits.hits.0._source.foo: "c:\\windows\\system32\\svchost.exe" From 7812af62f9043bce2e41e7a087d6e58c2ccec31c Mon Sep 17 00:00:00 2001 From: gmarouli Date: Wed, 17 Sep 2025 12:57:55 +0300 Subject: [PATCH 4/6] Add test cluster feature --- .../rest-api-spec/test/mapping/30_multi_field_keyword.yml | 3 +++ .../java/org/elasticsearch/index/mapper/MapperFeatures.java | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/30_multi_field_keyword.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/30_multi_field_keyword.yml index 18da1faaedbad..e8a9f79a4bf95 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/30_multi_field_keyword.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/30_multi_field_keyword.yml @@ -1,5 +1,8 @@ --- Keyword with escaped characters as multi-field: + - requires: + cluster_features: [ "mapper.multi_field.unicode_optimisation_fix" ] + reason: "requires a fix (#134770)" - do: indices.create: index: test diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index 284a2de238f1e..64290c9978758 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -48,6 +48,9 @@ public class MapperFeatures implements FeatureSpecification { static final NodeFeature SEARCH_LOAD_PER_SHARD = new NodeFeature("mapper.search_load_per_shard"); static final NodeFeature PATTERNED_TEXT = new NodeFeature("mapper.patterned_text"); static final NodeFeature IGNORED_SOURCE_FIELDS_PER_ENTRY = new NodeFeature("mapper.ignored_source_fields_per_entry"); + public static final NodeFeature MULTI_FIELD_UNICODE_OPTIMISATION_FIX = new NodeFeature( + "mapper.multi_field.unicode_optimisation_fix" + ); @Override public Set getTestFeatures() { @@ -82,7 +85,8 @@ public Set getTestFeatures() { SEARCH_LOAD_PER_SHARD, SPARSE_VECTOR_INDEX_OPTIONS_FEATURE, PATTERNED_TEXT, - IGNORED_SOURCE_FIELDS_PER_ENTRY + IGNORED_SOURCE_FIELDS_PER_ENTRY, + MULTI_FIELD_UNICODE_OPTIMISATION_FIX ); } } From 466536d08c3d28315e9cbc9a1d324ed84d51850e Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 17 Sep 2025 10:11:53 +0000 Subject: [PATCH 5/6] [CI] Auto commit changes from spotless --- .../java/org/elasticsearch/index/mapper/MapperFeatures.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index 64290c9978758..4397f399222ea 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -48,9 +48,7 @@ public class MapperFeatures implements FeatureSpecification { static final NodeFeature SEARCH_LOAD_PER_SHARD = new NodeFeature("mapper.search_load_per_shard"); static final NodeFeature PATTERNED_TEXT = new NodeFeature("mapper.patterned_text"); static final NodeFeature IGNORED_SOURCE_FIELDS_PER_ENTRY = new NodeFeature("mapper.ignored_source_fields_per_entry"); - public static final NodeFeature MULTI_FIELD_UNICODE_OPTIMISATION_FIX = new NodeFeature( - "mapper.multi_field.unicode_optimisation_fix" - ); + public static final NodeFeature MULTI_FIELD_UNICODE_OPTIMISATION_FIX = new NodeFeature("mapper.multi_field.unicode_optimisation_fix"); @Override public Set getTestFeatures() { From 93260ec488d51e5a4458636f8dc5042b578d0581 Mon Sep 17 00:00:00 2001 From: gmarouli Date: Wed, 17 Sep 2025 14:06:51 +0300 Subject: [PATCH 6/6] Incorporate review feedback --- .../provider/json/ESUTF8StreamJsonParser.java | 23 ++++++------- .../json/ESUTF8StreamJsonParserTests.java | 34 +++++++++++++++---- .../test/mapping/30_multi_field_keyword.yml | 1 + 3 files changed, 39 insertions(+), 19 deletions(-) diff --git a/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParser.java b/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParser.java index fc10a5cda8072..c5c5f51488520 100644 --- a/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParser.java +++ b/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParser.java @@ -148,34 +148,33 @@ protected Text _finishAndReturnText() throws IOException { @Override public JsonToken nextToken() throws IOException { - if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete && stringEnd > 0) { - _inputPtr = stringEnd; - _tokenIncomplete = false; - lastOptimisedValue = null; - } + maybeResetCurrentTokenState(); stringEnd = -1; return super.nextToken(); } @Override public boolean nextFieldName(SerializableString str) throws IOException { - if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete && stringEnd > 0) { - _inputPtr = stringEnd; - _tokenIncomplete = false; - lastOptimisedValue = null; - } + maybeResetCurrentTokenState(); stringEnd = -1; return super.nextFieldName(str); } @Override public String nextFieldName() throws IOException { + maybeResetCurrentTokenState(); + stringEnd = -1; + return super.nextFieldName(); + } + + /** + * Resets the current token state before moving to the next. + */ + private void maybeResetCurrentTokenState() { if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete && stringEnd > 0) { _inputPtr = stringEnd; _tokenIncomplete = false; lastOptimisedValue = null; } - stringEnd = -1; - return super.nextFieldName(); } } diff --git a/libs/x-content/impl/src/test/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParserTests.java b/libs/x-content/impl/src/test/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParserTests.java index a6cd1fb8d9686..5fdf000630bdd 100644 --- a/libs/x-content/impl/src/test/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParserTests.java +++ b/libs/x-content/impl/src/test/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParserTests.java @@ -57,18 +57,30 @@ public void testGetValueAsText() throws IOException { assertThat(parser.nextToken(), Matchers.equalTo(JsonToken.END_OBJECT)); }); - testParseJson("{\"foo\": \"bar\\\"baz\\\"\"}", parser -> { + testParseJson("{\"foo\": [\"bar\\\"baz\\\"\", \"foobar\"]}", parser -> { assertThat(parser.nextToken(), Matchers.equalTo(JsonToken.START_OBJECT)); assertThat(parser.nextFieldName(), Matchers.equalTo("foo")); + + assertThat(parser.nextValue(), Matchers.equalTo(JsonToken.START_ARRAY)); assertThat(parser.nextValue(), Matchers.equalTo(JsonToken.VALUE_STRING)); - var text = parser.getValueAsText(); - assertThat(text, Matchers.notNullValue()); - assertTextRef(text.bytes(), "bar\"baz\""); + var firstText = parser.getValueAsText(); + assertThat(firstText, Matchers.notNullValue()); + assertTextRef(firstText.bytes(), "bar\"baz\""); // Retrieve the value for a second time to ensure the last value is available - text = parser.getValueAsText(); - assertThat(text, Matchers.notNullValue()); - assertTextRef(text.bytes(), "bar\"baz\""); + firstText = parser.getValueAsText(); + assertThat(firstText, Matchers.notNullValue()); + assertTextRef(firstText.bytes(), "bar\"baz\""); + + // Ensure values lastOptimisedValue is reset + assertThat(parser.nextValue(), Matchers.equalTo(JsonToken.VALUE_STRING)); + var secondTest = parser.getValueAsText(); + assertThat(secondTest, Matchers.notNullValue()); + assertTextRef(secondTest.bytes(), "foobar"); + secondTest = parser.getValueAsText(); + assertThat(secondTest, Matchers.notNullValue()); + assertTextRef(secondTest.bytes(), "foobar"); + assertThat(parser.nextValue(), Matchers.equalTo(JsonToken.END_ARRAY)); }); testParseJson("{\"foo\": \"b\\u00e5r\"}", parser -> { @@ -260,9 +272,17 @@ public void testGetValueRandomized() throws IOException { var text = parser.getValueAsText(); assertTextRef(text.bytes(), currVal); assertThat(text.stringLength(), Matchers.equalTo(currVal.length())); + + // Retrieve it twice to ensure it works as expected + text = parser.getValueAsText(); + assertTextRef(text.bytes(), currVal); + assertThat(text.stringLength(), Matchers.equalTo(currVal.length())); } else { assertThat(parser.getValueAsText(), Matchers.nullValue()); assertThat(parser.getValueAsString(), Matchers.equalTo(currVal)); + // Retrieve it twice to ensure it works as expected + assertThat(parser.getValueAsText(), Matchers.nullValue()); + assertThat(parser.getValueAsString(), Matchers.equalTo(currVal)); } } }); diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/30_multi_field_keyword.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/30_multi_field_keyword.yml index e8a9f79a4bf95..bcd772b84bb39 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/30_multi_field_keyword.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/30_multi_field_keyword.yml @@ -35,6 +35,7 @@ Keyword with escaped characters as multi-field: - match: hits.hits.0._source.foo: "c:\\windows\\system32\\svchost.exe" + # Test that optimisation works the same for the multi-fields as well. - do: search: index: test