Skip to content

Commit 7f0ae26

Browse files
sideshowbarkerhsivonen
authored andcommitted
Make TokenizerTester handle double-escaped tests
This change makes TokenizerTester correctly handle tests in the html5lib-tests suite which have cases with so-called “double-escaped” “input” and “output” values — for example, values that contain the literals “\\u0000” and “\\uFFFD" rather than “\u0000” and “\uFFFD”.
1 parent 23a8b13 commit 7f0ae26

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

test-src/nu/validator/htmlparser/test/TokenizerTester.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
package nu.validator.htmlparser.test;
2424

25+
import java.io.ByteArrayInputStream;
2526
import java.io.FileInputStream;
2627
import java.io.IOException;
2728
import java.io.InputStream;
@@ -31,6 +32,9 @@
3132
import java.io.StringReader;
3233
import java.io.UnsupportedEncodingException;
3334
import java.io.Writer;
35+
import java.nio.charset.StandardCharsets;
36+
import java.nio.file.Files;
37+
import java.nio.file.Paths;
3438

3539
import nu.validator.htmlparser.common.XmlViolationPolicy;
3640
import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
@@ -217,8 +221,12 @@ private void runTestInner(String inputString, JSONArray expectedTokens,
217221
public static void main(String[] args) throws TokenStreamException,
218222
RecognitionException, SAXException, IOException {
219223
for (int i = 0; i < args.length; i++) {
220-
TokenizerTester tester = new TokenizerTester(new FileInputStream(
221-
args[i]));
224+
byte[] fileBytes = Files.readAllBytes(Paths.get(args[i]));
225+
String fileContent = new String(fileBytes, StandardCharsets.UTF_8);
226+
String unescapedContent = fileContent.replace("\\\\u", "\\u");
227+
byte[] newBytes = unescapedContent.getBytes(StandardCharsets.UTF_8);
228+
ByteArrayInputStream bais = new ByteArrayInputStream(newBytes);
229+
TokenizerTester tester = new TokenizerTester(bais);
222230
tester.runTests();
223231
}
224232
}

0 commit comments

Comments
 (0)