Skip to content

Commit 41b60f7

Browse files
Make EncodingTester usable in testing parsed state
This change updates EncodingTester to make it test the result for cases when the expected character encoding is not limited to what can be determined by checking only the first 1024 bytes of the input stream. Otherwise, without this change, EncodingTester is limited to only being useful for testing the output of the meta prescan. This change also allows EncodingTester to be given a directory name rather than a list of files (or pathname with a shell wildcard). And when given a directory name, it recurses the directory looking for *.dat files, and then run the tests from those files. Without that change, we can’t easily run EncodingTester from AntRun in Maven — because we can’t use shell wildcards in the “arg” value for the Ant “java” task, and any list of files we otherwise construct within Maven ends up getting put into the java arg value as a single string (single argument) — including the spaces between filenames.
1 parent d3745b8 commit 41b60f7

File tree

1 file changed

+45
-8
lines changed

1 file changed

+45
-8
lines changed

test-src/nu/validator/htmlparser/test/EncodingTester.java

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Copyright (c) 2007 Henri Sivonen
3-
* Copyright (c) 2008 Mozilla Foundation
3+
* Copyright (c) 2008-2020 Mozilla Foundation
44
*
55
* Permission is hereby granted, free of charge, to any person obtaining a
66
* copy of this software and associated documentation files (the "Software"),
@@ -23,6 +23,7 @@
2323

2424
package nu.validator.htmlparser.test;
2525

26+
import java.io.File;
2627
import java.io.FileInputStream;
2728
import java.io.IOException;
2829
import java.io.InputStream;
@@ -51,6 +52,13 @@ public EncodingTester(InputStream aggregateStream) {
5152
this.aggregateStream = aggregateStream;
5253
}
5354

55+
/**
56+
* @param aggregateStream
57+
*/
58+
public EncodingTester() {
59+
this.aggregateStream = null;
60+
}
61+
5462
private void runTests() throws IOException, SAXException {
5563
while (runTest()) {
5664
// spin
@@ -63,7 +71,7 @@ private boolean runTest() throws IOException, SAXException {
6371
}
6472
UntilHashInputStream stream = new UntilHashInputStream(aggregateStream);
6573
HtmlInputStreamReader reader = new HtmlInputStreamReader(stream, null,
66-
null, null, Heuristics.NONE);
74+
null, null, Heuristics.NONE, SNIFFING_LIMIT);
6775
Charset charset = reader.getCharset();
6876
stream.close();
6977
if (skipLabel()) {
@@ -113,16 +121,45 @@ private boolean skipLabel() throws IOException {
113121
}
114122
}
115123

124+
private void recurseDirectory(File directory) throws Throwable {
125+
if ("scripted".equals(directory.getName())) {
126+
return;
127+
}
128+
if (directory.canRead()) {
129+
File[] files = directory.listFiles();
130+
for (File file : files) {
131+
if (file.isDirectory()) {
132+
recurseDirectory(file);
133+
} else {
134+
if (!file.getName().endsWith(".dat")) {
135+
continue;
136+
}
137+
EncodingTester tester = new EncodingTester(
138+
new FileInputStream(file.getPath().toString()));
139+
tester.runTests();
140+
}
141+
}
142+
}
143+
}
144+
116145
/**
117146
* @param args
118-
* @throws SAXException
119-
* @throws IOException
147+
* @throws Throwable
120148
*/
121-
public static void main(String[] args) throws IOException, SAXException {
149+
public static void main(String[] args) throws Throwable {
122150
for (int i = 0; i < args.length; i++) {
123-
EncodingTester tester = new EncodingTester(new FileInputStream(
124-
args[i]));
125-
tester.runTests();
151+
File file = new File(args[i]);
152+
if (file.isDirectory()) {
153+
EncodingTester tester = new EncodingTester();
154+
tester.recurseDirectory(file);
155+
} else {
156+
if (!file.getName().endsWith(".dat")) {
157+
return;
158+
}
159+
EncodingTester tester = new EncodingTester(
160+
new FileInputStream(file.getPath().toString()));
161+
tester.runTests();
162+
}
126163
}
127164
System.exit(exitStatus);
128165
}

0 commit comments

Comments
 (0)