Skip to content

Commit 2453891

Browse files
Drop superseded encoding-checking code
This change drops some code which performs various encoding checks that no longer correspond to any current requirements in the Encoding spec.
1 parent ff4258b commit 2453891

File tree

3 files changed

+0
-121
lines changed

3 files changed

+0
-121
lines changed

src/nu/validator/htmlparser/io/Driver.java

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -470,33 +470,6 @@ protected Encoding encodingFromExternalDeclaration(String encoding)
470470
protected Encoding whineAboutEncodingAndReturnActual(String encoding,
471471
Encoding cs) throws SAXException {
472472
String canonName = cs.getCanonName();
473-
if (!cs.isRegistered()) {
474-
if (encoding.startsWith("x-")) {
475-
tokenizer.err("The encoding \u201C"
476-
+ encoding
477-
+ "\u201D is not an IANA-registered encoding. (Charmod C022)");
478-
} else {
479-
tokenizer.err("The encoding \u201C"
480-
+ encoding
481-
+ "\u201D is not an IANA-registered encoding and did not use the \u201Cx-\u201D prefix. (Charmod C023)");
482-
}
483-
} else if (!canonName.equals(encoding)) {
484-
tokenizer.err("The encoding \u201C"
485-
+ encoding
486-
+ "\u201D is not the preferred name of the character encoding in use. The preferred name is \u201C"
487-
+ canonName + "\u201D. (Charmod C024)");
488-
}
489-
if (cs.isShouldNot()) {
490-
tokenizer.warn("Authors should not use the character encoding \u201C"
491-
+ encoding
492-
+ "\u201D. It is recommended to use \u201CUTF-8\u201D.");
493-
} else if (cs.isLikelyEbcdic()) {
494-
tokenizer.warn("Authors should not use EBCDIC-based encodings. It is recommended to use \u201CUTF-8\u201D.");
495-
} else if (cs.isObscure()) {
496-
tokenizer.warn("The character encoding \u201C"
497-
+ encoding
498-
+ "\u201D is not widely supported. Better interoperability may be achieved by using \u201CUTF-8\u201D.");
499-
}
500473
if (!canonName.equals(encoding)) {
501474
tokenizer.err(Encoding.msgNotPreferredName(encoding, canonName));
502475
}

src/nu/validator/htmlparser/io/Encoding.java

Lines changed: 0 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -52,17 +52,6 @@ public class Encoding {
5252

5353
public static final Encoding WINDOWS1252;
5454

55-
private static String[] SHOULD_NOT = { "jisx02121990", "xjis0208" };
56-
57-
private static String[] BANNED = { "bocu1", "cesu8", "compoundtext",
58-
"iscii91", "macarabic", "maccentraleurroman", "maccroatian",
59-
"maccyrillic", "macdevanagari", "macfarsi", "macgreek",
60-
"macgujarati", "macgurmukhi", "machebrew", "macicelandic",
61-
"macroman", "macromanian", "macthai", "macturkish", "macukranian",
62-
"scsu", "utf32", "utf32be", "utf32le", "utf7", "ximapmailboxname",
63-
"xjisautodetect", "xutf16bebom", "xutf16lebom", "xutf32bebom",
64-
"xutf32lebom", "xutf16oppositeendian", "xutf16platformendian",
65-
"xutf32oppositeendian", "xutf32platformendian" };
6655
private static Map<String, Encoding> encodingByLabel =
6756
new HashMap<String, Encoding>();
6857

@@ -304,12 +293,6 @@ public class Encoding {
304293

305294
private final Charset charset;
306295

307-
private final boolean obscure;
308-
309-
private final boolean shouldNot;
310-
311-
private final boolean likelyEbcdic;
312-
313296
static {
314297
Set<Encoding> encodings = new HashSet<Encoding>();
315298

@@ -345,30 +328,6 @@ asciiSuperset, isObscure(name),
345328
WINDOWS1252 = forName("windows-1252");
346329
}
347330

348-
private static boolean isObscure(String lowerCasePreferredIanaName) {
349-
return !(Arrays.binarySearch(NOT_OBSCURE, lowerCasePreferredIanaName) > -1);
350-
}
351-
352-
private static boolean isBanned(String lowerCasePreferredIanaName) {
353-
if (lowerCasePreferredIanaName.startsWith("xibm")) {
354-
return true;
355-
}
356-
return (Arrays.binarySearch(BANNED, lowerCasePreferredIanaName) > -1);
357-
}
358-
359-
private static boolean isShouldNot(String lowerCasePreferredIanaName) {
360-
return (Arrays.binarySearch(SHOULD_NOT, lowerCasePreferredIanaName) > -1);
361-
}
362-
363-
private static boolean isLikelyEbcdic(String canonName,
364-
boolean asciiSuperset) {
365-
if (!asciiSuperset) {
366-
return (canonName.startsWith("cp") || canonName.startsWith("ibm") || canonName.startsWith("xibm"));
367-
} else {
368-
return false;
369-
}
370-
}
371-
372331
public static Encoding forName(String name) {
373332
Encoding rv = encodingByLabel.get(toNameKey(name));
374333
if (rv == null) {
@@ -454,37 +413,6 @@ public String getCanonName() {
454413
return canonName;
455414
}
456415

457-
/**
458-
* Returns the likelyEbcdic.
459-
*
460-
* @return the likelyEbcdic
461-
*/
462-
public boolean isLikelyEbcdic() {
463-
return likelyEbcdic;
464-
}
465-
466-
/**
467-
* Returns the obscure.
468-
*
469-
* @return the obscure
470-
*/
471-
public boolean isObscure() {
472-
return obscure;
473-
}
474-
475-
/**
476-
* Returns the shouldNot.
477-
*
478-
* @return the shouldNot
479-
*/
480-
public boolean isShouldNot() {
481-
return shouldNot;
482-
}
483-
484-
public boolean isRegistered() {
485-
return !canonName.startsWith("x-");
486-
}
487-
488416
/**
489417
* @return
490418
* @see java.nio.charset.Charset#canEncode()

src/nu/validator/htmlparser/io/MetaSniffer.java

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -169,28 +169,6 @@ protected boolean tryCharset(String encoding) throws SAXException {
169169
} else {
170170
Encoding cs = Encoding.forName(encoding);
171171
String canonName = cs.getCanonName();
172-
if (!cs.isRegistered()) {
173-
if (encoding.startsWith("x-")) {
174-
err("The encoding \u201C"
175-
+ encoding
176-
+ "\u201D is not an IANA-registered encoding. (Charmod C022)");
177-
} else {
178-
err("The encoding \u201C"
179-
+ encoding
180-
+ "\u201D is not an IANA-registered encoding and did not use the \u201Cx-\u201D prefix. (Charmod C023)");
181-
}
182-
} else if (!cs.getCanonName().equals(encoding)) {
183-
err("The encoding \u201C" + encoding
184-
+ "\u201D is not the preferred name of the character encoding in use. The preferred name is \u201C"
185-
+ canonName + "\u201D. (Charmod C024)");
186-
}
187-
if (cs.isShouldNot()) {
188-
warn("Authors should not use the character encoding \u201C"
189-
+ encoding
190-
+ "\u201D. It is recommended to use \u201CUTF-8\u201D.");
191-
} else if (cs.isObscure()) {
192-
warn("The character encoding \u201C" + encoding + "\u201D is not widely supported. Better interoperability may be achieved by using \u201CUTF-8\u201D.");
193-
}
194172
if (!cs.getCanonName().equals(encoding)) {
195173
err(Encoding.msgNotCanonicalName(encoding, canonName));
196174
this.characterEncoding = cs;

0 commit comments

Comments
 (0)