Skip to content

Commit 4b8f36c

Browse files
Make Html5libTest handle double-escaped tests
This change makes Html5libTest correctly handle tests in the html5lib-tests suite which have cases with so-called “double-escaped” “input” and “output” values — for example, values that contain the literals “\\u0000” and “\\uFFFD" rather than “\u0000” and “\uFFFD”.
1 parent 99d4827 commit 4b8f36c

File tree

1 file changed

+12
-1
lines changed

1 file changed

+12
-1
lines changed

test-src/nu/validator/htmlparser/test/Html5libTest.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@
2222

2323
package nu.validator.htmlparser.test;
2424

25+
import java.io.ByteArrayInputStream;
2526
import java.io.IOException;
2627
import java.net.URISyntaxException;
28+
import java.nio.charset.StandardCharsets;
2729
import java.nio.file.FileVisitResult;
2830
import java.nio.file.Files;
2931
import java.nio.file.Path;
@@ -52,7 +54,7 @@ public void testEncoding() throws Exception {
5254
public void testTokenizer() throws Exception {
5355
Files.walkFileTree(testDir.resolve("tokenizer"),
5456
new TestVisitor(true, ".test", file -> //
55-
new TokenizerTester(Files.newInputStream(file)).runTests()));
57+
new TokenizerTester(getDoubleEscapedInput(file)).runTests()));
5658
if (TokenizerTester.exitStatus != 0) {
5759
assert false : "Tokenizer test failed";
5860
}
@@ -67,6 +69,15 @@ public void testTree() throws Exception {
6769
}
6870
}
6971

72+
private ByteArrayInputStream getDoubleEscapedInput(Path file)
73+
throws IOException {
74+
byte[] fileBytes = Files.readAllBytes(file);
75+
String fileContent = new String(fileBytes, StandardCharsets.UTF_8);
76+
String unescapedContent = fileContent.replace("\\\\u", "\\u");
77+
byte[] newBytes = unescapedContent.getBytes(StandardCharsets.UTF_8);
78+
return new ByteArrayInputStream(newBytes);
79+
}
80+
7081
private interface TestConsumer extends Consumer<Path> {
7182

7283
@Override

0 commit comments

Comments
 (0)