Skip to content

Commit 5420780

Browse files
authored
Bug fix, last optimised value in ESUTF8StreamJsonParser kept old value. (#135184)
In #134790 we introduced a bug that was caught by our tests.; The problem manifested itself when a multi field mapper would call `getText()` which would set `_tokenIncomplete` to `false` after the `getOptimisedText()`. This would evaluate the condition `_currToken == JsonToken.VALUE_STRING && _tokenIncomplete && stringEnd > 0` to false and the `lastOptimisedValue` would not be reset. We changed the code to always reset the `lastOptimisedValue` when a `next*()` method is called. Furthermore, we introduced a randomised unit test that creates two `XContentParsers` and runs one as a baseline using none of the optimised code and the other one is accessing both optimised and non optimised in a random pattern. This test was able to catch both #134770 & #135256. Fixes #135256
1 parent 651314e commit 5420780

File tree

6 files changed

+156
-15
lines changed

6 files changed

+156
-15
lines changed

libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParser.java

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,12 @@
2525
import java.util.ArrayList;
2626
import java.util.List;
2727

28+
/**
29+
* Provides the method getValueAsText that is a best-effort optimization for UTF8 fields. If the
30+
* {@link UTF8StreamJsonParser} has already parsed the text, then the caller should fall back to getText.
31+
* This is sufficient because when we call getText, jackson stores the parsed UTF-16 value for future calls.
32+
* Once we've already got the parsed UTF-16 value, the optimization isn't necessary.
33+
*/
2834
public class ESUTF8StreamJsonParser extends UTF8StreamJsonParser implements OptimizedTextCapable {
2935
protected int stringEnd = -1;
3036
protected int stringLength;
@@ -53,6 +59,7 @@ public ESUTF8StreamJsonParser(
5359
*/
5460
@Override
5561
public Text getValueAsText() throws IOException {
62+
// _tokenIncomplete is true when UTF8StreamJsonParser has already processed this value.
5663
if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete) {
5764
if (lastOptimisedValue != null) {
5865
return new Text(new XContentString.UTF8Bytes(lastOptimisedValue), stringLength);
@@ -148,33 +155,33 @@ protected Text _finishAndReturnText() throws IOException {
148155

149156
@Override
150157
public JsonToken nextToken() throws IOException {
151-
maybeResetCurrentTokenState();
152-
stringEnd = -1;
158+
resetCurrentTokenState();
153159
return super.nextToken();
154160
}
155161

156162
@Override
157163
public boolean nextFieldName(SerializableString str) throws IOException {
158-
maybeResetCurrentTokenState();
159-
stringEnd = -1;
164+
resetCurrentTokenState();
160165
return super.nextFieldName(str);
161166
}
162167

163168
@Override
164169
public String nextFieldName() throws IOException {
165-
maybeResetCurrentTokenState();
166-
stringEnd = -1;
170+
resetCurrentTokenState();
167171
return super.nextFieldName();
168172
}
169173

170174
/**
171-
* Resets the current token state before moving to the next.
175+
* Resets the current token state before moving to the next. It resets the _inputPtr and the
176+
* _tokenIncomplete only if {@link UTF8StreamJsonParser#getText()} or {@link UTF8StreamJsonParser#getValueAsString()}
177+
* hasn't run yet.
172178
*/
173-
private void maybeResetCurrentTokenState() {
179+
private void resetCurrentTokenState() {
174180
if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete && stringEnd > 0) {
175181
_inputPtr = stringEnd;
176182
_tokenIncomplete = false;
177-
lastOptimisedValue = null;
178183
}
184+
lastOptimisedValue = null;
185+
stringEnd = -1;
179186
}
180187
}

libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/JsonXContentParser.java

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import com.fasterxml.jackson.core.JsonToken;
1717
import com.fasterxml.jackson.core.exc.InputCoercionException;
1818
import com.fasterxml.jackson.core.exc.StreamConstraintsException;
19+
import com.fasterxml.jackson.core.filter.FilteringParserDelegate;
1920
import com.fasterxml.jackson.core.io.JsonEOFException;
2021

2122
import org.elasticsearch.core.IOUtils;
@@ -26,6 +27,7 @@
2627
import org.elasticsearch.xcontent.XContentParserConfiguration;
2728
import org.elasticsearch.xcontent.XContentString;
2829
import org.elasticsearch.xcontent.XContentType;
30+
import org.elasticsearch.xcontent.provider.OptimizedTextCapable;
2931
import org.elasticsearch.xcontent.provider.XContentParserConfigurationImpl;
3032
import org.elasticsearch.xcontent.support.AbstractXContentParser;
3133

@@ -143,7 +145,19 @@ public String text() throws IOException {
143145

144146
@Override
145147
public XContentString optimizedText() throws IOException {
146-
// TODO: enable utf-8 parsing optimization once verified it is completely safe
148+
if (currentToken().isValue() == false) {
149+
throwOnNoText();
150+
}
151+
var parser = this.parser;
152+
if (parser instanceof FilteringParserDelegate delegate) {
153+
parser = delegate.delegate();
154+
}
155+
if (parser instanceof OptimizedTextCapable optimizedTextCapableParser) {
156+
var bytesRef = optimizedTextCapableParser.getValueAsText();
157+
if (bytesRef != null) {
158+
return bytesRef;
159+
}
160+
}
147161
return new Text(text());
148162
}
149163

libs/x-content/impl/src/test/java/org/elasticsearch/xcontent/provider/cbor/ESCborParserTests.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import java.util.Map;
2424

2525
import static org.hamcrest.Matchers.equalTo;
26+
import static org.hamcrest.Matchers.nullValue;
2627

2728
public class ESCborParserTests extends ESTestCase {
2829

@@ -54,8 +55,15 @@ private void testStringValue(String expected) throws IOException {
5455
assertThat(text.hasBytes(), equalTo(true));
5556
assertThat(text.stringLength(), equalTo(expected.length()));
5657
assertThat(text.string(), equalTo(expected));
58+
// Retrieve twice
5759
assertThat(parser.getValueAsText().string(), equalTo(expected));
5860
assertThat(parser.getValueAsString(), equalTo(expected));
61+
// Use the getText() to ensure _tokenIncomplete works
62+
assertThat(parser.getText(), equalTo(expected));
63+
// The optimisation is not used after the getText()
64+
assertThat(parser.getValueAsText(), nullValue());
65+
// Original CBOR getValueAsString works.
66+
assertThat(parser.getValueAsString(), equalTo(expected));
5967
assertThat(parser.nextToken(), equalTo(JsonToken.END_OBJECT));
6068
}
6169
}

libs/x-content/impl/src/test/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParserTests.java

Lines changed: 109 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,23 @@
1313
import com.fasterxml.jackson.core.JsonParser;
1414
import com.fasterxml.jackson.core.JsonToken;
1515

16+
import org.elasticsearch.common.bytes.BytesArray;
17+
import org.elasticsearch.common.xcontent.XContentHelper;
1618
import org.elasticsearch.core.CheckedConsumer;
1719
import org.elasticsearch.test.ESTestCase;
20+
import org.elasticsearch.xcontent.FilterXContentParserWrapper;
21+
import org.elasticsearch.xcontent.XContentParser;
22+
import org.elasticsearch.xcontent.XContentParserConfiguration;
1823
import org.elasticsearch.xcontent.XContentString;
24+
import org.elasticsearch.xcontent.XContentType;
25+
import org.elasticsearch.xcontent.provider.XContentParserConfigurationImpl;
1926
import org.hamcrest.Matchers;
2027

2128
import java.io.IOException;
2229
import java.nio.charset.StandardCharsets;
2330
import java.util.Locale;
31+
import java.util.stream.Collectors;
32+
import java.util.stream.IntStream;
2433

2534
public class ESUTF8StreamJsonParserTests extends ESTestCase {
2635

@@ -272,12 +281,17 @@ public void testGetValueRandomized() throws IOException {
272281
var text = parser.getValueAsText();
273282
assertTextRef(text.bytes(), currVal);
274283
assertThat(text.stringLength(), Matchers.equalTo(currVal.length()));
275-
276-
// Retrieve it twice to ensure it works as expected
284+
// Retrieve it a second time
277285
text = parser.getValueAsText();
286+
assertThat(text, Matchers.notNullValue());
278287
assertTextRef(text.bytes(), currVal);
279288
assertThat(text.stringLength(), Matchers.equalTo(currVal.length()));
289+
// Use getText()
290+
assertThat(parser.getText(), Matchers.equalTo(text.string()));
291+
// After retrieving it with getText() we do not use the optimised value anymore.
292+
assertThat(parser.getValueAsText(), Matchers.nullValue());
280293
} else {
294+
assertThat(parser.getText(), Matchers.notNullValue());
281295
assertThat(parser.getValueAsText(), Matchers.nullValue());
282296
assertThat(parser.getValueAsString(), Matchers.equalTo(currVal));
283297
// Retrieve it twice to ensure it works as expected
@@ -288,4 +302,97 @@ public void testGetValueRandomized() throws IOException {
288302
});
289303
}
290304

305+
/**
306+
* This test compares the retrieval of an optimised text against the baseline.
307+
*/
308+
public void testOptimisedParser() throws Exception {
309+
for (int i = 0; i < 200; i++) {
310+
String json = randomJsonInput(randomIntBetween(1, 6));
311+
try (
312+
var baselineParser = XContentHelper.createParser(
313+
XContentParserConfiguration.EMPTY,
314+
new BytesArray(json),
315+
XContentType.JSON
316+
);
317+
var optimisedParser = TestXContentParser.create(json)
318+
) {
319+
var expected = baselineParser.mapOrdered();
320+
var actual = optimisedParser.mapOrdered();
321+
assertThat(expected, Matchers.equalTo(actual));
322+
}
323+
}
324+
}
325+
326+
private String randomJsonInput(int depth) {
327+
StringBuilder sb = new StringBuilder();
328+
sb.append('{');
329+
int numberOfFields = randomIntBetween(1, 10);
330+
for (int i = 0; i < numberOfFields; i++) {
331+
sb.append("\"k-").append(randomAlphanumericOfLength(10)).append("\":");
332+
if (depth == 0 || randomBoolean()) {
333+
if (randomIntBetween(0, 9) == 0) {
334+
sb.append(
335+
IntStream.range(0, randomIntBetween(1, 10))
336+
.mapToObj(ignored -> randomUTF8Value())
337+
.collect(Collectors.joining(",", "[", "]"))
338+
);
339+
} else {
340+
sb.append(randomUTF8Value());
341+
}
342+
} else {
343+
sb.append(randomJsonInput(depth - 1));
344+
}
345+
if (i < numberOfFields - 1) {
346+
sb.append(',');
347+
}
348+
}
349+
sb.append("}");
350+
return sb.toString();
351+
}
352+
353+
private String randomUTF8Value() {
354+
return "\"" + buildRandomInput(randomIntBetween(10, 50)).input + "\"";
355+
}
356+
357+
/**
358+
* This XContentParser introduces a random mix of getText() and getOptimisedText()
359+
* to simulate different access patterns for optimised fields.
360+
*/
361+
private static class TestXContentParser extends FilterXContentParserWrapper {
362+
363+
TestXContentParser(XContentParser delegate) {
364+
super(delegate);
365+
}
366+
367+
static TestXContentParser create(String input) throws IOException {
368+
JsonFactory factory = new ESJsonFactoryBuilder().build();
369+
assertThat(factory, Matchers.instanceOf(ESJsonFactory.class));
370+
371+
JsonParser parser = factory.createParser(StandardCharsets.UTF_8.encode(input).array());
372+
assertThat(parser, Matchers.instanceOf(ESUTF8StreamJsonParser.class));
373+
return new TestXContentParser(new JsonXContentParser(XContentParserConfigurationImpl.EMPTY, parser));
374+
}
375+
376+
@Override
377+
public String text() throws IOException {
378+
if (randomIntBetween(0, 9) < 8) {
379+
return super.text();
380+
} else {
381+
return super.optimizedText().string();
382+
}
383+
}
384+
385+
@Override
386+
public XContentString optimizedText() throws IOException {
387+
int extraCalls = randomIntBetween(0, 5);
388+
for (int i = 0; i < extraCalls; i++) {
389+
if (randomBoolean()) {
390+
super.optimizedText();
391+
} else {
392+
super.text();
393+
}
394+
}
395+
return super.optimizedText();
396+
}
397+
}
291398
}

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/30_multi_field_keyword.yml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Keyword with escaped characters as multi-field:
33
- requires:
44
cluster_features: [ "mapper.multi_field.unicode_optimisation_fix" ]
5-
reason: "requires a fix (#134770)"
5+
reason: "Captures the scenarios in #134770 & 135256"
66
- do:
77
indices.create:
88
index: test
@@ -14,14 +14,20 @@ Keyword with escaped characters as multi-field:
1414
fields:
1515
bar:
1616
type: keyword
17+
bar-text:
18+
type: text
19+
my-ip:
20+
type: ip
1721

22+
# Ensure the IP is correctly parsed after a multi-field mapping that combines optimised and non-optimised fields
1823
- do:
1924
index:
2025
index: test
2126
id: "1"
2227
refresh: true
2328
body:
2429
foo: "c:\\windows\\system32\\svchost.exe"
30+
my-ip: "127.0.0.1"
2531

2632
- do:
2733
search:

server/src/test/java/org/elasticsearch/common/xcontent/json/JsonXContentTests.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,7 @@ public void testOptimizedTextHasBytes() throws Exception {
5959
assertSame(XContentParser.Token.FIELD_NAME, parser.nextToken());
6060
assertTrue(parser.nextToken().isValue());
6161
Text text = (Text) parser.optimizedText();
62-
// TODO: uncomment after utf8 optimized parsing has been enabled again:
63-
// assertTrue(text.hasBytes());
62+
assertTrue(text.hasBytes());
6463
assertThat(text.string(), equalTo("foo"));
6564
}
6665
}

0 commit comments

Comments
 (0)