Skip to content

Commit 5cfabbe

Browse files
Drop superseded encoding-checking code
This change drops some code which performs various encoding checks that no longer correspond to any current requirements in the Encoding spec.
1 parent 343d86f commit 5cfabbe

File tree

3 files changed

+0
-130
lines changed

3 files changed

+0
-130
lines changed

src/nu/validator/htmlparser/io/Driver.java

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -453,33 +453,6 @@ protected Encoding encodingFromExternalDeclaration(String encoding)
453453
protected Encoding whineAboutEncodingAndReturnActual(String encoding,
454454
Encoding cs) throws SAXException {
455455
String canonName = cs.getCanonName();
456-
if (!cs.isRegistered()) {
457-
if (encoding.startsWith("x-")) {
458-
tokenizer.err("The encoding \u201C"
459-
+ encoding
460-
+ "\u201D is not an IANA-registered encoding. (Charmod C022)");
461-
} else {
462-
tokenizer.err("The encoding \u201C"
463-
+ encoding
464-
+ "\u201D is not an IANA-registered encoding and did not use the \u201Cx-\u201D prefix. (Charmod C023)");
465-
}
466-
} else if (!canonName.equals(encoding)) {
467-
tokenizer.err("The encoding \u201C"
468-
+ encoding
469-
+ "\u201D is not the preferred name of the character encoding in use. The preferred name is \u201C"
470-
+ canonName + "\u201D. (Charmod C024)");
471-
}
472-
if (cs.isShouldNot()) {
473-
tokenizer.warn("Authors should not use the character encoding \u201C"
474-
+ encoding
475-
+ "\u201D. It is recommended to use \u201CUTF-8\u201D.");
476-
} else if (cs.isLikelyEbcdic()) {
477-
tokenizer.warn("Authors should not use EBCDIC-based encodings. It is recommended to use \u201CUTF-8\u201D.");
478-
} else if (cs.isObscure()) {
479-
tokenizer.warn("The character encoding \u201C"
480-
+ encoding
481-
+ "\u201D is not widely supported. Better interoperability may be achieved by using \u201CUTF-8\u201D.");
482-
}
483456
if (!canonName.equals(encoding)) {
484457
tokenizer.err(Encoding.msgNotPreferredName(encoding, canonName));
485458
}

src/nu/validator/htmlparser/io/Encoding.java

Lines changed: 0 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -52,39 +52,13 @@ public class Encoding {
5252

5353
public static final Encoding WINDOWS1252;
5454

55-
private static String[] SHOULD_NOT = { "jisx02121990", "xjis0208" };
56-
57-
private static String[] BANNED = { "bocu1", "cesu8", "compoundtext",
58-
"iscii91", "macarabic", "maccentraleurroman", "maccroatian",
59-
"maccyrillic", "macdevanagari", "macfarsi", "macgreek",
60-
"macgujarati", "macgurmukhi", "machebrew", "macicelandic",
61-
"macroman", "macromanian", "macthai", "macturkish", "macukranian",
62-
"scsu", "utf32", "utf32be", "utf32le", "utf7", "ximapmailboxname",
63-
"xjisautodetect", "xutf16bebom", "xutf16lebom", "xutf32bebom",
64-
"xutf32lebom", "xutf16oppositeendian", "xutf16platformendian",
65-
"xutf32oppositeendian", "xutf32platformendian" };
6655
private static Map<String, Encoding> encodingByLabel =
6756
new HashMap<String, Encoding>();
6857

69-
private static String[] NOT_OBSCURE = { "big5", "big5hkscs", "eucjp",
70-
"euckr", "gb18030", "gbk", "iso2022jp", "iso2022kr", "iso88591",
71-
"iso885913", "iso885915", "iso88592", "iso88593", "iso88594",
72-
"iso88595", "iso88596", "iso88597", "iso88598", "iso88599",
73-
"koi8r", "shiftjis", "tis620", "usascii", "utf16", "utf16be",
74-
"utf16le", "utf8", "windows1250", "windows1251", "windows1252",
75-
"windows1253", "windows1254", "windows1255", "windows1256",
76-
"windows1257", "windows1258" };
77-
7858
private final String canonName;
7959

8060
private final Charset charset;
8161

82-
private final boolean obscure;
83-
84-
private final boolean shouldNot;
85-
86-
private final boolean likelyEbcdic;
87-
8862
static {
8963
Set<Encoding> encodings = new HashSet<Encoding>();
9064

@@ -119,30 +93,6 @@ asciiSuperset, isObscure(name), isShouldNot(name),
11993
WINDOWS1252 = forName("windows-1252");
12094
}
12195

122-
private static boolean isObscure(String lowerCasePreferredIanaName) {
123-
return !(Arrays.binarySearch(NOT_OBSCURE, lowerCasePreferredIanaName) > -1);
124-
}
125-
126-
private static boolean isBanned(String lowerCasePreferredIanaName) {
127-
if (lowerCasePreferredIanaName.startsWith("xibm")) {
128-
return true;
129-
}
130-
return (Arrays.binarySearch(BANNED, lowerCasePreferredIanaName) > -1);
131-
}
132-
133-
private static boolean isShouldNot(String lowerCasePreferredIanaName) {
134-
return (Arrays.binarySearch(SHOULD_NOT, lowerCasePreferredIanaName) > -1);
135-
}
136-
137-
private static boolean isLikelyEbcdic(String canonName,
138-
boolean asciiSuperset) {
139-
if (!asciiSuperset) {
140-
return (canonName.startsWith("cp") || canonName.startsWith("ibm") || canonName.startsWith("xibm"));
141-
} else {
142-
return false;
143-
}
144-
}
145-
14696
public static Encoding forName(String name) {
14797
Encoding rv = encodingByLabel.get(toNameKey(name));
14898
if (rv == null) {
@@ -216,37 +166,6 @@ public String getCanonName() {
216166
return canonName;
217167
}
218168

219-
/**
220-
* Returns the likelyEbcdic.
221-
*
222-
* @return the likelyEbcdic
223-
*/
224-
public boolean isLikelyEbcdic() {
225-
return likelyEbcdic;
226-
}
227-
228-
/**
229-
* Returns the obscure.
230-
*
231-
* @return the obscure
232-
*/
233-
public boolean isObscure() {
234-
return obscure;
235-
}
236-
237-
/**
238-
* Returns the shouldNot.
239-
*
240-
* @return the shouldNot
241-
*/
242-
public boolean isShouldNot() {
243-
return shouldNot;
244-
}
245-
246-
public boolean isRegistered() {
247-
return !canonName.startsWith("x-");
248-
}
249-
250169
/**
251170
* @return
252171
* @see java.nio.charset.Charset#canEncode()

src/nu/validator/htmlparser/io/MetaSniffer.java

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -169,28 +169,6 @@ protected boolean tryCharset(String encoding) throws SAXException {
169169
} else {
170170
Encoding cs = Encoding.forName(encoding);
171171
String canonName = cs.getCanonName();
172-
if (!cs.isRegistered()) {
173-
if (encoding.startsWith("x-")) {
174-
err("The encoding \u201C"
175-
+ encoding
176-
+ "\u201D is not an IANA-registered encoding. (Charmod C022)");
177-
} else {
178-
err("The encoding \u201C"
179-
+ encoding
180-
+ "\u201D is not an IANA-registered encoding and did not use the \u201Cx-\u201D prefix. (Charmod C023)");
181-
}
182-
} else if (!cs.getCanonName().equals(encoding)) {
183-
err("The encoding \u201C" + encoding
184-
+ "\u201D is not the preferred name of the character encoding in use. The preferred name is \u201C"
185-
+ canonName + "\u201D. (Charmod C024)");
186-
}
187-
if (cs.isShouldNot()) {
188-
warn("Authors should not use the character encoding \u201C"
189-
+ encoding
190-
+ "\u201D. It is recommended to use \u201CUTF-8\u201D.");
191-
} else if (cs.isObscure()) {
192-
warn("The character encoding \u201C" + encoding + "\u201D is not widely supported. Better interoperability may be achieved by using \u201CUTF-8\u201D.");
193-
}
194172
if (!cs.getCanonName().equals(encoding)) {
195173
err(Encoding.msgNotCanonicalName(encoding, canonName));
196174
this.characterEncoding = cs;

0 commit comments

Comments
 (0)