Skip to content

Commit edfaeff

Browse files
Drop UTF-32 & “UTF-16”; use UTF-16BE and UTF-16LE
This change drops all handling for UTF-32 (which is a completely invalid/ unsupported encoding per the Encoding spec), as well as replacing handling for “UTF-16” (which also isn’t a valid/supported encoding) with, instead, handling for the valid/supported encodings UTF-16BE and UTF-16LE.
1 parent 7fa4d80 commit edfaeff

File tree

3 files changed

+5
-10
lines changed

3 files changed

+5
-10
lines changed

src/nu/validator/htmlparser/io/Driver.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -354,9 +354,8 @@ public boolean internalEncodingDeclaration(String internalCharset)
354354
throws SAXException {
355355
try {
356356
internalCharset = internalCharset.toLowerCase();
357-
Encoding cs;
358-
if ("utf-16".equals(internalCharset)
359-
|| "utf-16be".equals(internalCharset)
357+
Encoding cs = Encoding.forName(internalCharset);
358+
if ("utf-16be".equals(internalCharset)
360359
|| "utf-16le".equals(internalCharset)) {
361360
tokenizer.errTreeBuilder("Internal encoding declaration specified \u201C"
362361
+ internalCharset
@@ -448,8 +447,8 @@ protected Encoding encodingFromExternalDeclaration(String encoding)
448447
encoding = encoding.toLowerCase();
449448
try {
450449
Encoding cs = Encoding.forName(encoding);
451-
if ("utf-16".equals(cs.getCanonName())
452-
|| "utf-32".equals(cs.getCanonName())) {
450+
if ("utf-16be".equals(cs.getCanonName())
451+
|| "utf-16le".equals(cs.getCanonName())) {
453452
swallowBom = false;
454453
}
455454
return whineAboutEncodingAndReturnCanonical(encoding, cs);

src/nu/validator/htmlparser/io/Encoding.java

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,6 @@ public class Encoding {
4444

4545
public static final Encoding UTF8;
4646

47-
public static final Encoding UTF16;
48-
4947
public static final Encoding UTF16LE;
5048

5149
public static final Encoding UTF16BE;
@@ -391,7 +389,6 @@ private static void createEncoding(String name, String[] labels) {
391389

392390
static {
393391
UTF8 = forName("utf-8");
394-
UTF16 = forName("utf-16");
395392
UTF16BE = forName("utf-16be");
396393
UTF16LE = forName("utf-16le");
397394
WINDOWS1252 = forName("windows-1252");

src/nu/validator/htmlparser/io/MetaSniffer.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,8 +161,7 @@ public String getEncoding() {
161161
protected boolean tryCharset(String encoding) throws SAXException {
162162
encoding = encoding.toLowerCase();
163163
try {
164-
// XXX spec says only UTF-16
165-
if ("utf-16".equals(encoding) || "utf-16be".equals(encoding) || "utf-16le".equals(encoding) || "utf-32".equals(encoding) || "utf-32be".equals(encoding) || "utf-32le".equals(encoding)) {
164+
if ("utf-16be".equals(encoding) || "utf-16le".equals(encoding)) {
166165
this.characterEncoding = Encoding.UTF8;
167166
err("The internal character encoding declaration specified \u201C" + encoding + "\u201D which is not a rough superset of ASCII. Using \u201CUTF-8\u201D instead.");
168167
return true;

0 commit comments

Comments
 (0)