Skip to content

Commit fd1bd79

Browse files
authored
WIP (#121463)
Under very unfortunate conditions tests that check xContent objects roundtrip parsing (like i.e. [SearchHitsTests testFromXContent](#97716) can fail when we happen to randomly pick YAML xContent type and create random (realistic)Unicode character sequences that may contain the character U+0085 (133) from the [Latin1 code page](https://de.wikipedia.org/wiki/Unicodeblock_Lateinisch-1,_Erg%C3%A4nzung). That specific character doesn't get parsed back to its original form for YAML xContent, which can lead to [rare but hard to diagnose test failures](#97716 (comment)). This change adds logic to AbstractXContentTestCase#test() which lies at the core of most of our xContent roundtrip tests that disallows test instances containing that particular character when using YAML xContent type. Closes #97716
1 parent a589e1f commit fd1bd79

File tree

2 files changed

+57
-2
lines changed

2 files changed

+57
-2
lines changed

test/framework/src/main/java/org/elasticsearch/test/AbstractXContentTestCase.java

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,21 @@ private XContentTester(
145145
public void test() throws IOException {
146146
for (int runs = 0; runs < numberOfTestRuns; runs++) {
147147
XContentType xContentType = randomFrom(XContentType.values()).canonical();
148-
T testInstance = instanceSupplier.apply(xContentType);
148+
T testInstance = null;
149149
try {
150+
if (xContentType.equals(XContentType.YAML)) {
151+
testInstance = randomValueOtherThanMany(instance -> {
152+
// unicode character U+0085 (NEXT LINE (NEL)) doesn't survive YAML round trip tests (see #97716)
153+
// get a new random instance if we detect this character in the xContent output
154+
try {
155+
return toXContent.apply(instance, xContentType).utf8ToString().contains("\u0085");
156+
} catch (IOException e) {
157+
throw new RuntimeException(e);
158+
}
159+
}, () -> instanceSupplier.apply(xContentType));
160+
} else {
161+
testInstance = instanceSupplier.apply(xContentType);
162+
}
150163
BytesReference originalXContent = toXContent.apply(testInstance, xContentType);
151164
BytesReference shuffledContent = insertRandomFieldsAndShuffle(
152165
originalXContent,
@@ -173,7 +186,9 @@ public void test() throws IOException {
173186
dispose.accept(parsed);
174187
}
175188
} finally {
176-
dispose.accept(testInstance);
189+
if (testInstance != null) {
190+
dispose.accept(testInstance);
191+
}
177192
}
178193
}
179194
}

test/framework/src/test/java/org/elasticsearch/test/AbstractXContentTestCaseTests.java

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@
1212
import com.carrotsearch.randomizedtesting.RandomizedContext;
1313

1414
import org.elasticsearch.common.bytes.BytesReference;
15+
import org.elasticsearch.xcontent.ToXContentFragment;
1516
import org.elasticsearch.xcontent.XContentBuilder;
1617
import org.elasticsearch.xcontent.XContentFactory;
1718
import org.elasticsearch.xcontent.XContentParser;
1819
import org.elasticsearch.xcontent.XContentType;
1920

21+
import java.io.IOException;
2022
import java.util.Map;
2123

2224
import static org.hamcrest.Matchers.equalTo;
@@ -49,4 +51,42 @@ public void testInsertRandomFieldsAndShuffle() throws Exception {
4951
assertThat(mapOrdered.keySet().iterator().next(), not(equalTo("field")));
5052
}
5153
}
54+
55+
private record TestToXContent(String field, String value) implements ToXContentFragment {
56+
57+
@Override
58+
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
59+
return builder.field(field, value);
60+
}
61+
}
62+
63+
public void testYamlXContentRoundtripSanitization() throws Exception {
64+
var test = new AbstractXContentTestCase<TestToXContent>() {
65+
66+
@Override
67+
protected TestToXContent createTestInstance() {
68+
// we need to randomly create both a "problematic" and an okay version in order to ensure that the sanitization code
69+
// can draw at least one okay version if polled often enough
70+
return randomBoolean() ? new TestToXContent("a\u0085b", "def") : new TestToXContent("a b", "def");
71+
}
72+
73+
@Override
74+
protected TestToXContent doParseInstance(XContentParser parser) throws IOException {
75+
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
76+
assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken());
77+
String name = parser.currentName();
78+
assertEquals(XContentParser.Token.VALUE_STRING, parser.nextToken());
79+
String value = parser.text();
80+
assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken());
81+
return new TestToXContent(name, value);
82+
};
83+
84+
@Override
85+
protected boolean supportsUnknownFields() {
86+
return false;
87+
}
88+
};
89+
// testFromXContent runs 20 repetitions, enough to hit a YAML xcontent version very likely
90+
test.testFromXContent();
91+
}
5292
}

0 commit comments

Comments
 (0)