diff --git a/docs/changelog/134790.yaml b/docs/changelog/134790.yaml new file mode 100644 index 0000000000000..d59f0acb4bc06 --- /dev/null +++ b/docs/changelog/134790.yaml @@ -0,0 +1,6 @@ +pr: 134790 +summary: "Bug fix: Facilitate second retrieval of the same value" +area: Infra/Core +type: bug +issues: + - 134770 diff --git a/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParser.java b/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParser.java index c4a9823d68ecf..168dcd84fb6ee 100644 --- a/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParser.java +++ b/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParser.java @@ -27,6 +27,7 @@ public class ESUTF8StreamJsonParser extends UTF8StreamJsonParser { protected int stringEnd = -1; protected int stringLength; + protected byte[] lastOptimisedValue; private final List backslashes = new ArrayList<>(); @@ -51,6 +52,9 @@ public ESUTF8StreamJsonParser( */ public Text getValueAsText() throws IOException { if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete) { + if (lastOptimisedValue != null) { + return new Text(new XContentString.UTF8Bytes(lastOptimisedValue), stringLength); + } if (stringEnd > 0) { final int len = stringEnd - 1 - _inputPtr; return new Text(new XContentString.UTF8Bytes(_inputBuffer, _inputPtr, len), stringLength); @@ -135,37 +139,40 @@ protected Text _finishAndReturnText() throws IOException { copyPtr = backslash + 1; } System.arraycopy(inputBuffer, copyPtr, buff, destPtr, ptr - copyPtr); + lastOptimisedValue = buff; return new Text(new XContentString.UTF8Bytes(buff), stringLength); } } @Override public JsonToken nextToken() throws IOException { - if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete && stringEnd > 0) { - _inputPtr = stringEnd; - _tokenIncomplete = false; - } + maybeResetCurrentTokenState(); stringEnd = -1; return super.nextToken(); } @Override public boolean nextFieldName(SerializableString str) throws IOException { - if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete && stringEnd > 0) { - _inputPtr = stringEnd; - _tokenIncomplete = false; - } + maybeResetCurrentTokenState(); stringEnd = -1; return super.nextFieldName(str); } @Override public String nextFieldName() throws IOException { + maybeResetCurrentTokenState(); + stringEnd = -1; + return super.nextFieldName(); + } + + /** + * Resets the current token state before moving to the next. + */ + private void maybeResetCurrentTokenState() { if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete && stringEnd > 0) { _inputPtr = stringEnd; _tokenIncomplete = false; + lastOptimisedValue = null; } - stringEnd = -1; - return super.nextFieldName(); } } diff --git a/libs/x-content/impl/src/test/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParserTests.java b/libs/x-content/impl/src/test/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParserTests.java index de74def939723..5fdf000630bdd 100644 --- a/libs/x-content/impl/src/test/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParserTests.java +++ b/libs/x-content/impl/src/test/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParserTests.java @@ -57,14 +57,30 @@ public void testGetValueAsText() throws IOException { assertThat(parser.nextToken(), Matchers.equalTo(JsonToken.END_OBJECT)); }); - testParseJson("{\"foo\": \"bar\\\"baz\\\"\"}", parser -> { + testParseJson("{\"foo\": [\"bar\\\"baz\\\"\", \"foobar\"]}", parser -> { assertThat(parser.nextToken(), Matchers.equalTo(JsonToken.START_OBJECT)); assertThat(parser.nextFieldName(), Matchers.equalTo("foo")); + + assertThat(parser.nextValue(), Matchers.equalTo(JsonToken.START_ARRAY)); assertThat(parser.nextValue(), Matchers.equalTo(JsonToken.VALUE_STRING)); - var text = parser.getValueAsText(); - assertThat(text, Matchers.notNullValue()); - assertTextRef(text.bytes(), "bar\"baz\""); + var firstText = parser.getValueAsText(); + assertThat(firstText, Matchers.notNullValue()); + assertTextRef(firstText.bytes(), "bar\"baz\""); + // Retrieve the value for a second time to ensure the last value is available + firstText = parser.getValueAsText(); + assertThat(firstText, Matchers.notNullValue()); + assertTextRef(firstText.bytes(), "bar\"baz\""); + + // Ensure values lastOptimisedValue is reset + assertThat(parser.nextValue(), Matchers.equalTo(JsonToken.VALUE_STRING)); + var secondTest = parser.getValueAsText(); + assertThat(secondTest, Matchers.notNullValue()); + assertTextRef(secondTest.bytes(), "foobar"); + secondTest = parser.getValueAsText(); + assertThat(secondTest, Matchers.notNullValue()); + assertTextRef(secondTest.bytes(), "foobar"); + assertThat(parser.nextValue(), Matchers.equalTo(JsonToken.END_ARRAY)); }); testParseJson("{\"foo\": \"b\\u00e5r\"}", parser -> { @@ -256,9 +272,17 @@ public void testGetValueRandomized() throws IOException { var text = parser.getValueAsText(); assertTextRef(text.bytes(), currVal); assertThat(text.stringLength(), Matchers.equalTo(currVal.length())); + + // Retrieve it twice to ensure it works as expected + text = parser.getValueAsText(); + assertTextRef(text.bytes(), currVal); + assertThat(text.stringLength(), Matchers.equalTo(currVal.length())); } else { assertThat(parser.getValueAsText(), Matchers.nullValue()); assertThat(parser.getValueAsString(), Matchers.equalTo(currVal)); + // Retrieve it twice to ensure it works as expected + assertThat(parser.getValueAsText(), Matchers.nullValue()); + assertThat(parser.getValueAsString(), Matchers.equalTo(currVal)); } } }); diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/30_multi_field_keyword.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/30_multi_field_keyword.yml new file mode 100644 index 0000000000000..bcd772b84bb39 --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/30_multi_field_keyword.yml @@ -0,0 +1,49 @@ +--- +Keyword with escaped characters as multi-field: + - requires: + cluster_features: [ "mapper.multi_field.unicode_optimisation_fix" ] + reason: "requires a fix (#134770)" + - do: + indices.create: + index: test + body: + mappings: + properties: + foo: + type: keyword + fields: + bar: + type: keyword + + - do: + index: + index: test + id: "1" + refresh: true + body: + foo: "c:\\windows\\system32\\svchost.exe" + + - do: + search: + index: test + body: + query: + term: + foo: "c:\\windows\\system32\\svchost.exe" + + - match: { "hits.total.value": 1 } + - match: + hits.hits.0._source.foo: "c:\\windows\\system32\\svchost.exe" + + # Test that optimisation works the same for the multi-fields as well. + - do: + search: + index: test + body: + query: + term: + foo.bar: "c:\\windows\\system32\\svchost.exe" + + - match: { "hits.total.value": 1 } + - match: + hits.hits.0._source.foo: "c:\\windows\\system32\\svchost.exe" diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index 0c6a3dbd00e6e..2912bf4e86921 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -46,6 +46,7 @@ public class MapperFeatures implements FeatureSpecification { static final NodeFeature IVF_NESTED_SUPPORT = new NodeFeature("mapper.ivf_nested_support"); static final NodeFeature SEARCH_LOAD_PER_SHARD = new NodeFeature("mapper.search_load_per_shard"); static final NodeFeature PATTERNED_TEXT = new NodeFeature("mapper.patterned_text"); + public static final NodeFeature MULTI_FIELD_UNICODE_OPTIMISATION_FIX = new NodeFeature("mapper.multi_field.unicode_optimisation_fix"); @Override public Set getTestFeatures() { @@ -78,7 +79,8 @@ public Set getTestFeatures() { IVF_NESTED_SUPPORT, SEARCH_LOAD_PER_SHARD, SPARSE_VECTOR_INDEX_OPTIONS_FEATURE, - PATTERNED_TEXT + PATTERNED_TEXT, + MULTI_FIELD_UNICODE_OPTIMISATION_FIX ); } }