From 5e7027f7a66657d72d986a0bfbbd3f102778dc1e Mon Sep 17 00:00:00 2001 From: Mary Gouseti Date: Wed, 17 Sep 2025 17:36:01 +0300 Subject: [PATCH] [8.19] Reduce the number of fields per document (#132322) --- docs/changelog/134790.yaml | 6 +++ .../provider/json/ESUTF8StreamJsonParser.java | 27 ++++++---- .../json/ESUTF8StreamJsonParserTests.java | 32 ++++++++++-- .../test/mapping/30_multi_field_keyword.yml | 49 +++++++++++++++++++ .../index/mapper/MapperFeatures.java | 4 +- 5 files changed, 103 insertions(+), 15 deletions(-) create mode 100644 docs/changelog/134790.yaml create mode 100644 rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/30_multi_field_keyword.yml diff --git a/docs/changelog/134790.yaml b/docs/changelog/134790.yaml new file mode 100644 index 0000000000000..d59f0acb4bc06 --- /dev/null +++ b/docs/changelog/134790.yaml @@ -0,0 +1,6 @@ +pr: 134790 +summary: "Bug fix: Facilitate second retrieval of the same value" +area: Infra/Core +type: bug +issues: + - 134770 diff --git a/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParser.java b/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParser.java index c4a9823d68ecf..168dcd84fb6ee 100644 --- a/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParser.java +++ b/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParser.java @@ -27,6 +27,7 @@ public class ESUTF8StreamJsonParser extends UTF8StreamJsonParser { protected int stringEnd = -1; protected int stringLength; + protected byte[] lastOptimisedValue; private final List backslashes = new ArrayList<>(); @@ -51,6 +52,9 @@ public ESUTF8StreamJsonParser( */ public Text getValueAsText() throws IOException { if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete) { + if (lastOptimisedValue != null) { + return new Text(new XContentString.UTF8Bytes(lastOptimisedValue), stringLength); + } if (stringEnd > 0) { final int len = stringEnd - 1 - _inputPtr; return new Text(new XContentString.UTF8Bytes(_inputBuffer, _inputPtr, len), stringLength); @@ -135,37 +139,40 @@ protected Text _finishAndReturnText() throws IOException { copyPtr = backslash + 1; } System.arraycopy(inputBuffer, copyPtr, buff, destPtr, ptr - copyPtr); + lastOptimisedValue = buff; return new Text(new XContentString.UTF8Bytes(buff), stringLength); } } @Override public JsonToken nextToken() throws IOException { - if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete && stringEnd > 0) { - _inputPtr = stringEnd; - _tokenIncomplete = false; - } + maybeResetCurrentTokenState(); stringEnd = -1; return super.nextToken(); } @Override public boolean nextFieldName(SerializableString str) throws IOException { - if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete && stringEnd > 0) { - _inputPtr = stringEnd; - _tokenIncomplete = false; - } + maybeResetCurrentTokenState(); stringEnd = -1; return super.nextFieldName(str); } @Override public String nextFieldName() throws IOException { + maybeResetCurrentTokenState(); + stringEnd = -1; + return super.nextFieldName(); + } + + /** + * Resets the current token state before moving to the next. + */ + private void maybeResetCurrentTokenState() { if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete && stringEnd > 0) { _inputPtr = stringEnd; _tokenIncomplete = false; + lastOptimisedValue = null; } - stringEnd = -1; - return super.nextFieldName(); } } diff --git a/libs/x-content/impl/src/test/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParserTests.java b/libs/x-content/impl/src/test/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParserTests.java index de74def939723..5fdf000630bdd 100644 --- a/libs/x-content/impl/src/test/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParserTests.java +++ b/libs/x-content/impl/src/test/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParserTests.java @@ -57,14 +57,30 @@ public void testGetValueAsText() throws IOException { assertThat(parser.nextToken(), Matchers.equalTo(JsonToken.END_OBJECT)); }); - testParseJson("{\"foo\": \"bar\\\"baz\\\"\"}", parser -> { + testParseJson("{\"foo\": [\"bar\\\"baz\\\"\", \"foobar\"]}", parser -> { assertThat(parser.nextToken(), Matchers.equalTo(JsonToken.START_OBJECT)); assertThat(parser.nextFieldName(), Matchers.equalTo("foo")); + + assertThat(parser.nextValue(), Matchers.equalTo(JsonToken.START_ARRAY)); assertThat(parser.nextValue(), Matchers.equalTo(JsonToken.VALUE_STRING)); - var text = parser.getValueAsText(); - assertThat(text, Matchers.notNullValue()); - assertTextRef(text.bytes(), "bar\"baz\""); + var firstText = parser.getValueAsText(); + assertThat(firstText, Matchers.notNullValue()); + assertTextRef(firstText.bytes(), "bar\"baz\""); + // Retrieve the value for a second time to ensure the last value is available + firstText = parser.getValueAsText(); + assertThat(firstText, Matchers.notNullValue()); + assertTextRef(firstText.bytes(), "bar\"baz\""); + + // Ensure values lastOptimisedValue is reset + assertThat(parser.nextValue(), Matchers.equalTo(JsonToken.VALUE_STRING)); + var secondTest = parser.getValueAsText(); + assertThat(secondTest, Matchers.notNullValue()); + assertTextRef(secondTest.bytes(), "foobar"); + secondTest = parser.getValueAsText(); + assertThat(secondTest, Matchers.notNullValue()); + assertTextRef(secondTest.bytes(), "foobar"); + assertThat(parser.nextValue(), Matchers.equalTo(JsonToken.END_ARRAY)); }); testParseJson("{\"foo\": \"b\\u00e5r\"}", parser -> { @@ -256,9 +272,17 @@ public void testGetValueRandomized() throws IOException { var text = parser.getValueAsText(); assertTextRef(text.bytes(), currVal); assertThat(text.stringLength(), Matchers.equalTo(currVal.length())); + + // Retrieve it twice to ensure it works as expected + text = parser.getValueAsText(); + assertTextRef(text.bytes(), currVal); + assertThat(text.stringLength(), Matchers.equalTo(currVal.length())); } else { assertThat(parser.getValueAsText(), Matchers.nullValue()); assertThat(parser.getValueAsString(), Matchers.equalTo(currVal)); + // Retrieve it twice to ensure it works as expected + assertThat(parser.getValueAsText(), Matchers.nullValue()); + assertThat(parser.getValueAsString(), Matchers.equalTo(currVal)); } } }); diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/30_multi_field_keyword.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/30_multi_field_keyword.yml new file mode 100644 index 0000000000000..bcd772b84bb39 --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/30_multi_field_keyword.yml @@ -0,0 +1,49 @@ +--- +Keyword with escaped characters as multi-field: + - requires: + cluster_features: [ "mapper.multi_field.unicode_optimisation_fix" ] + reason: "requires a fix (#134770)" + - do: + indices.create: + index: test + body: + mappings: + properties: + foo: + type: keyword + fields: + bar: + type: keyword + + - do: + index: + index: test + id: "1" + refresh: true + body: + foo: "c:\\windows\\system32\\svchost.exe" + + - do: + search: + index: test + body: + query: + term: + foo: "c:\\windows\\system32\\svchost.exe" + + - match: { "hits.total.value": 1 } + - match: + hits.hits.0._source.foo: "c:\\windows\\system32\\svchost.exe" + + # Test that optimisation works the same for the multi-fields as well. + - do: + search: + index: test + body: + query: + term: + foo.bar: "c:\\windows\\system32\\svchost.exe" + + - match: { "hits.total.value": 1 } + - match: + hits.hits.0._source.foo: "c:\\windows\\system32\\svchost.exe" diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index a0d2c9c9d7b6e..205a15889cf4b 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -73,6 +73,7 @@ public Set getFeatures() { "mapper.unknown_field_mapping_update_error_message" ); static final NodeFeature NPE_ON_DIMS_UPDATE_FIX = new NodeFeature("mapper.npe_on_dims_update_fix"); + public static final NodeFeature MULTI_FIELD_UNICODE_OPTIMISATION_FIX = new NodeFeature("mapper.multi_field.unicode_optimisation_fix"); @Override public Set getTestFeatures() { @@ -99,7 +100,8 @@ public Set getTestFeatures() { RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING, RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING, USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ, - SPARSE_VECTOR_INDEX_OPTIONS_FEATURE + SPARSE_VECTOR_INDEX_OPTIONS_FEATURE, + MULTI_FIELD_UNICODE_OPTIMISATION_FIX ); } }