Drop superseded encoding-checking code

sideshowbarker · sideshowbarker · commit 5cfabbee4425 · 2020-09-13T10:02:18.000+09:00
This change drops some code which performs various encoding checks that
no longer correspond to any current requirements in the Encoding spec.
diff --git a/src/nu/validator/htmlparser/io/Driver.java b/src/nu/validator/htmlparser/io/Driver.java
@@ -453,33 +453,6 @@ protected Encoding encodingFromExternalDeclaration(String encoding)
     protected Encoding whineAboutEncodingAndReturnActual(String encoding,
             Encoding cs) throws SAXException {
         String canonName = cs.getCanonName();
-        if (!cs.isRegistered()) {
-            if (encoding.startsWith("x-")) {
-                tokenizer.err("The encoding \u201C"
-                        + encoding
-                        + "\u201D is not an IANA-registered encoding. (Charmod C022)");
-            } else {
-                tokenizer.err("The encoding \u201C"
-                        + encoding
-                        + "\u201D is not an IANA-registered encoding and did not use the \u201Cx-\u201D prefix. (Charmod C023)");
-            }
-        } else if (!canonName.equals(encoding)) {
-            tokenizer.err("The encoding \u201C"
-                    + encoding
-                    + "\u201D is not the preferred name of the character encoding in use. The preferred name is \u201C"
-                    + canonName + "\u201D. (Charmod C024)");
-        }
-        if (cs.isShouldNot()) {
-            tokenizer.warn("Authors should not use the character encoding \u201C"
-                    + encoding
-                    + "\u201D. It is recommended to use \u201CUTF-8\u201D.");
-        } else if (cs.isLikelyEbcdic()) {
-            tokenizer.warn("Authors should not use EBCDIC-based encodings. It is recommended to use \u201CUTF-8\u201D.");
-        } else if (cs.isObscure()) {
-            tokenizer.warn("The character encoding \u201C"
-                    + encoding
-                    + "\u201D is not widely supported. Better interoperability may be achieved by using \u201CUTF-8\u201D.");
-        }
         if (!canonName.equals(encoding)) {
             tokenizer.err(Encoding.msgNotPreferredName(encoding, canonName));
         }
diff --git a/src/nu/validator/htmlparser/io/Encoding.java b/src/nu/validator/htmlparser/io/Encoding.java
@@ -52,39 +52,13 @@ public class Encoding {
 
     public static final Encoding WINDOWS1252;
 
-    private static String[] SHOULD_NOT = { "jisx02121990", "xjis0208" };
-
-    private static String[] BANNED = { "bocu1", "cesu8", "compoundtext",
-            "iscii91", "macarabic", "maccentraleurroman", "maccroatian",
-            "maccyrillic", "macdevanagari", "macfarsi", "macgreek",
-            "macgujarati", "macgurmukhi", "machebrew", "macicelandic",
-            "macroman", "macromanian", "macthai", "macturkish", "macukranian",
-            "scsu", "utf32", "utf32be", "utf32le", "utf7", "ximapmailboxname",
-            "xjisautodetect", "xutf16bebom", "xutf16lebom", "xutf32bebom",
-            "xutf32lebom", "xutf16oppositeendian", "xutf16platformendian",
-            "xutf32oppositeendian", "xutf32platformendian" };
     private static Map<String, Encoding> encodingByLabel =
         new HashMap<String, Encoding>();
 
-    private static String[] NOT_OBSCURE = { "big5", "big5hkscs", "eucjp",
-            "euckr", "gb18030", "gbk", "iso2022jp", "iso2022kr", "iso88591",
-            "iso885913", "iso885915", "iso88592", "iso88593", "iso88594",
-            "iso88595", "iso88596", "iso88597", "iso88598", "iso88599",
-            "koi8r", "shiftjis", "tis620", "usascii", "utf16", "utf16be",
-            "utf16le", "utf8", "windows1250", "windows1251", "windows1252",
-            "windows1253", "windows1254", "windows1255", "windows1256",
-            "windows1257", "windows1258" };
-
     private final String canonName;
 
     private final Charset charset;
 
-    private final boolean obscure;
-
-    private final boolean shouldNot;
-
-    private final boolean likelyEbcdic;
-
     static {
         Set<Encoding> encodings = new HashSet<Encoding>();
 
@@ -119,30 +93,6 @@ asciiSuperset, isObscure(name), isShouldNot(name),
         WINDOWS1252 = forName("windows-1252");
     }
 
-    private static boolean isObscure(String lowerCasePreferredIanaName) {
-        return !(Arrays.binarySearch(NOT_OBSCURE, lowerCasePreferredIanaName) > -1);
-    }
-
-    private static boolean isBanned(String lowerCasePreferredIanaName) {
-        if (lowerCasePreferredIanaName.startsWith("xibm")) {
-            return true;
-        }
-        return (Arrays.binarySearch(BANNED, lowerCasePreferredIanaName) > -1);
-    }
-
-    private static boolean isShouldNot(String lowerCasePreferredIanaName) {
-        return (Arrays.binarySearch(SHOULD_NOT, lowerCasePreferredIanaName) > -1);
-    }
-
-    private static boolean isLikelyEbcdic(String canonName,
-            boolean asciiSuperset) {
-        if (!asciiSuperset) {
-            return (canonName.startsWith("cp") || canonName.startsWith("ibm") || canonName.startsWith("xibm"));
-        } else {
-            return false;
-        }
-    }
-
     public static Encoding forName(String name) {
         Encoding rv = encodingByLabel.get(toNameKey(name));
         if (rv == null) {
@@ -216,37 +166,6 @@ public String getCanonName() {
         return canonName;
     }
 
-    /**
-     * Returns the likelyEbcdic.
-     * 
-     * @return the likelyEbcdic
-     */
-    public boolean isLikelyEbcdic() {
-        return likelyEbcdic;
-    }
-
-    /**
-     * Returns the obscure.
-     * 
-     * @return the obscure
-     */
-    public boolean isObscure() {
-        return obscure;
-    }
-
-    /**
-     * Returns the shouldNot.
-     * 
-     * @return the shouldNot
-     */
-    public boolean isShouldNot() {
-        return shouldNot;
-    }
-
-    public boolean isRegistered() {
-        return !canonName.startsWith("x-");
-    }
-
     /**
      * @return
      * @see java.nio.charset.Charset#canEncode()
diff --git a/src/nu/validator/htmlparser/io/MetaSniffer.java b/src/nu/validator/htmlparser/io/MetaSniffer.java
@@ -169,28 +169,6 @@ protected boolean tryCharset(String encoding) throws SAXException {
             } else {
                 Encoding cs = Encoding.forName(encoding);
                 String canonName = cs.getCanonName();
-                if (!cs.isRegistered()) {
-                    if (encoding.startsWith("x-")) {
-                        err("The encoding \u201C"
-                                + encoding
-                                + "\u201D is not an IANA-registered encoding. (Charmod C022)");                    
-                    } else {
-                        err("The encoding \u201C"
-                                + encoding
-                                + "\u201D is not an IANA-registered encoding and did not use the \u201Cx-\u201D prefix. (Charmod C023)");
-                    }
-                } else if (!cs.getCanonName().equals(encoding)) {
-                    err("The encoding \u201C" + encoding
-                            + "\u201D is not the preferred name of the character encoding in use. The preferred name is \u201C"
-                            + canonName + "\u201D. (Charmod C024)");
-                }
-                if (cs.isShouldNot()) {
-                    warn("Authors should not use the character encoding \u201C"
-                            + encoding
-                            + "\u201D. It is recommended to use \u201CUTF-8\u201D.");                
-                } else if (cs.isObscure()) {
-                    warn("The character encoding \u201C" + encoding + "\u201D is not widely supported. Better interoperability may be achieved by using \u201CUTF-8\u201D.");
-                }
                 if (!cs.getCanonName().equals(encoding)) {
                     err(Encoding.msgNotCanonicalName(encoding, canonName));
                     this.characterEncoding = cs;