Skip to content

Commit 86f9bd2

Browse files
committed
Extend json parser randomized testing to include escape sequences
1 parent 1ff4df4 commit 86f9bd2

File tree

2 files changed

+85
-23
lines changed

2 files changed

+85
-23
lines changed

libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParser.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ public ESUTF8StreamJsonParser(
4646
/**
4747
* Method that will try to get underlying UTF-8 encoded bytes of the current string token.
4848
* This is only a best-effort attempt; if there is some reason the bytes cannot be retrieved, this method will return null.
49-
* Currently, this is only implemented for ascii-only strings that do not contain escaped characters.
5049
*/
5150
public Text getValueAsText() throws IOException {
5251
if (_currToken == JsonToken.VALUE_STRING && _tokenIncomplete) {

libs/x-content/impl/src/test/java/org/elasticsearch/xcontent/provider/json/ESUTF8StreamJsonParserTests.java

Lines changed: 85 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,14 @@
1313
import com.fasterxml.jackson.core.JsonParser;
1414
import com.fasterxml.jackson.core.JsonToken;
1515

16-
import org.elasticsearch.common.Strings;
1716
import org.elasticsearch.core.CheckedConsumer;
1817
import org.elasticsearch.test.ESTestCase;
19-
import org.elasticsearch.xcontent.XContentBuilder;
2018
import org.elasticsearch.xcontent.XContentString;
21-
import org.elasticsearch.xcontent.json.JsonXContent;
2219
import org.hamcrest.Matchers;
2320

2421
import java.io.IOException;
2522
import java.nio.charset.StandardCharsets;
23+
import java.util.Locale;
2624

2725
public class ESUTF8StreamJsonParserTests extends ESTestCase {
2826

@@ -62,8 +60,18 @@ public void testGetValueAsText() throws IOException {
6260
assertThat(parser.nextFieldName(), Matchers.equalTo("foo"));
6361
assertThat(parser.nextValue(), Matchers.equalTo(JsonToken.VALUE_STRING));
6462

63+
var textRef = parser.getValueAsText();
64+
assertThat(textRef, Matchers.notNullValue());
65+
assertTextRef(textRef.bytes(), "bar\"baz\"");
66+
});
67+
68+
testParseJson("{\"foo\": \"b\\u00e5r\"}", parser -> {
69+
assertThat(parser.nextToken(), Matchers.equalTo(JsonToken.START_OBJECT));
70+
assertThat(parser.nextFieldName(), Matchers.equalTo("foo"));
71+
assertThat(parser.nextValue(), Matchers.equalTo(JsonToken.VALUE_STRING));
72+
6573
assertThat(parser.getValueAsText(), Matchers.nullValue());
66-
assertThat(parser.getValueAsString(), Matchers.equalTo("bar\"baz\""));
74+
assertThat(parser.getValueAsString(), Matchers.equalTo("bår"));
6775
});
6876

6977
testParseJson("{\"foo\": \"bår\"}", parser -> {
@@ -112,43 +120,98 @@ public void testGetValueAsText() throws IOException {
112120
});
113121
}
114122

115-
private boolean validForTextRef(String value) {
116-
for (char c : value.toCharArray()) {
117-
if (c == '"') {
118-
return false;
123+
private record TestInput(String input, String result, boolean supportsOptimized) {}
124+
125+
private static final TestInput[] ESCAPE_SEQUENCES = {
126+
new TestInput("\\b", "\b", false),
127+
new TestInput("\\t", "\t", false),
128+
new TestInput("\\n", "\n", false),
129+
new TestInput("\\f", "\f", false),
130+
new TestInput("\\r", "\r", false),
131+
new TestInput("\\\"", "\"", true),
132+
new TestInput("\\/", "/", true),
133+
new TestInput("\\\\", "\\", true) };
134+
135+
private int randomCodepoint(boolean includeAscii) {
136+
while (true) {
137+
char val = Character.toChars(randomInt(0xFFFF))[0];
138+
if (val <= 0x7f && includeAscii == false) {
139+
continue;
119140
}
120-
if (c == '\\') {
121-
return false;
141+
if (val >= Character.MIN_SURROGATE && val <= Character.MAX_SURROGATE) {
142+
continue;
122143
}
123-
if ((int) c < 32 || (int) c >= 128) {
124-
return false;
144+
return val;
145+
}
146+
}
147+
148+
private TestInput buildRandomInput(int length) {
149+
StringBuilder input = new StringBuilder(length);
150+
StringBuilder result = new StringBuilder(length);
151+
boolean forceSupportOptimized = randomBoolean();
152+
boolean doesSupportOptimized = true;
153+
for (int i = 0; i < length; ++i) {
154+
if (forceSupportOptimized == false && randomBoolean()) {
155+
switch (randomInt(9)) {
156+
case 0 -> {
157+
var escape = randomFrom(ESCAPE_SEQUENCES);
158+
input.append(escape.input());
159+
result.append(escape.result());
160+
doesSupportOptimized = doesSupportOptimized && escape.supportsOptimized();
161+
}
162+
case 1 -> {
163+
int value = randomCodepoint(true);
164+
input.append(String.format(Locale.ENGLISH, "\\u%04x", value));
165+
result.append(Character.toChars(value));
166+
doesSupportOptimized = false;
167+
}
168+
default -> {
169+
var value = Character.toChars(randomCodepoint(false));
170+
input.append(value);
171+
result.append(value);
172+
doesSupportOptimized = false;
173+
}
174+
}
175+
} else {
176+
var value = randomAlphanumericOfLength(1);
177+
input.append(value);
178+
result.append(value);
125179
}
126180
}
127-
return true;
181+
return new TestInput(input.toString(), result.toString(), doesSupportOptimized);
128182
}
129183

130184
public void testGetValueRandomized() throws IOException {
131-
XContentBuilder jsonBuilder = JsonXContent.contentBuilder().startObject();
185+
StringBuilder inputBuilder = new StringBuilder();
186+
inputBuilder.append('{');
187+
132188
final int numKeys = 128;
133189
String[] keys = new String[numKeys];
134-
String[] values = new String[numKeys];
190+
TestInput[] inputs = new TestInput[numKeys];
135191
for (int i = 0; i < numKeys; i++) {
136192
String currKey = randomAlphanumericOfLength(6);
137-
String currVal = randomUnicodeOfLengthBetween(0, 512);
138-
jsonBuilder.field(currKey, currVal);
193+
var currVal = buildRandomInput(randomInt(512));
194+
inputBuilder.append('"');
195+
inputBuilder.append(currKey);
196+
inputBuilder.append("\":\"");
197+
inputBuilder.append(currVal.input());
198+
inputBuilder.append('"');
199+
if (i < numKeys - 1) {
200+
inputBuilder.append(',');
201+
}
139202
keys[i] = currKey;
140-
values[i] = currVal;
203+
inputs[i] = currVal;
141204
}
142205

143-
jsonBuilder.endObject();
144-
testParseJson(Strings.toString(jsonBuilder), parser -> {
206+
inputBuilder.append('}');
207+
testParseJson(inputBuilder.toString(), parser -> {
145208
assertThat(parser.nextToken(), Matchers.equalTo(JsonToken.START_OBJECT));
146209
for (int i = 0; i < numKeys; i++) {
147210
assertThat(parser.nextFieldName(), Matchers.equalTo(keys[i]));
148211
assertThat(parser.nextValue(), Matchers.equalTo(JsonToken.VALUE_STRING));
149212

150-
String currVal = values[i];
151-
if (validForTextRef(currVal)) {
213+
String currVal = inputs[i].result();
214+
if (inputs[i].supportsOptimized()) {
152215
assertTextRef(parser.getValueAsText().bytes(), currVal);
153216
} else {
154217
assertThat(parser.getValueAsText(), Matchers.nullValue());

0 commit comments

Comments
 (0)