diff --git a/src/nu/validator/htmlparser/extra/ChardetSniffer.java b/src/nu/validator/htmlparser/extra/ChardetSniffer.java
index a7575039..4f6c5826 100644
--- a/src/nu/validator/htmlparser/extra/ChardetSniffer.java
+++ b/src/nu/validator/htmlparser/extra/ChardetSniffer.java
@@ -54,7 +54,9 @@ public Encoding sniff() throws IOException {
         detector.Init(this);
         detector.DoIt(source, length, false);
         detector.DataEnd();
-        if (returnValue != null && returnValue != Encoding.WINDOWS1252 && returnValue.isAsciiSuperset()) {
+        if (returnValue != null && returnValue != Encoding.WINDOWS1252
+                && returnValue != Encoding.UTF16BE
+                && returnValue != Encoding.UTF16LE) {
             return returnValue;
         } else {
             return null;
@@ -72,10 +74,6 @@ public static void main(String[] args) {
     public void Notify(String charsetName) {
         try {
             Encoding enc = Encoding.forName(charsetName);
-            Encoding actual = enc.getActualHtmlEncoding();
-            if (actual != null) {
-                enc = actual;
-            }
             returnValue = enc;
         } catch (UnsupportedCharsetException e) {
             returnValue = null;
diff --git a/src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java b/src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java
index f3caab5c..7aa0dde0 100644
--- a/src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java
+++ b/src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java
@@ -53,11 +53,8 @@ public Encoding sniff() throws IOException {
             detector.setText(this);
             CharsetMatch match = detector.detect();
             Encoding enc = Encoding.forName(match.getName());
-            Encoding actual = enc.getActualHtmlEncoding();
-            if (actual != null) {
-                enc = actual;
-            }
-            if (enc != Encoding.WINDOWS1252 && enc.isAsciiSuperset()) {
+            if (enc != Encoding.WINDOWS1252 //
+                    && enc != Encoding.UTF16BE && enc != Encoding.UTF16LE) {
                 return enc;
             } else {
                 return null;
diff --git a/src/nu/validator/htmlparser/io/Driver.java b/src/nu/validator/htmlparser/io/Driver.java
index a8dd387c..b5df79b6 100644
--- a/src/nu/validator/htmlparser/io/Driver.java
+++ b/src/nu/validator/htmlparser/io/Driver.java
@@ -21,6 +21,14 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
+/*
+ * The comments following this one that use the same comment syntax as this
+ * comment are quotes from the HTML Standard at https://html.spec.whatwg.org/
+ * as of 10 September 2020. That document came with this statement:
+ * Copyright © WHATWG (Apple, Google, Mozilla, Microsoft). This work is
+ * licensed under a Creative Commons Attribution 4.0 International License.
+ */
+
 package nu.validator.htmlparser.io;
 
 import java.io.IOException;
@@ -214,9 +222,8 @@ public void tokenize(InputSource is, int bufferSize)
                         tokenizer.getErrorHandler(), tokenizer, this, heuristics);
             } else {
                 if (this.characterEncoding != Encoding.UTF8) {
-                    errorWithoutLocation("Legacy encoding \u201C"
-                            + this.characterEncoding.getCanonName()
-                            + "\u201D used. Documents must use UTF-8.");
+                    errorWithoutLocation(Encoding.msgLegacyEncoding(
+                            this.characterEncoding.getCanonName()));
                 }
                 becomeConfident();
                 this.reader = new HtmlInputStreamReader(inputStream,
@@ -350,57 +357,92 @@ public void setEncoding(Encoding encoding, Confidence confidence) {
         }
     }
 
+    private void errInternalActualDiffer(String internalCharset, String actual)
+            throws SAXException {
+        if (!internalCharset.equals(actual)) {
+            tokenizer.errTreeBuilder(
+                    "Ignoring internal encoding declaration \u201C"
+                            + internalCharset + "\u201D, which disagrees with"
+                            + " the actual encoding of the document (\u201C"
+                            + actual + "\u201D).");
+        }
+    }
+
     public boolean internalEncodingDeclaration(String internalCharset)
             throws SAXException {
+        String actual = characterEncoding.getCanonName();
+        if (confidence == Confidence.CERTAIN) {
+            errInternalActualDiffer(internalCharset, actual);
+            return true;
+        }
+        /* https://html.spec.whatwg.org/#changing-the-encoding-while-parsing */
         try {
-            internalCharset = Encoding.toAsciiLowerCase(internalCharset);
-            Encoding cs;
-            if ("utf-16".equals(internalCharset)
-                    || "utf-16be".equals(internalCharset)
+            if ("utf-16be".equals(actual) || "utf-16le".equals(actual)) {
+                errInternalActualDiffer(internalCharset, actual);
+                /*
+                 * 1. If the encoding that is already being used to interpret
+                 * the input stream is a UTF-16 encoding, then set the
+                 * confidence to certain and return. The new encoding is ignored
+                 * becomeConfident();
+                 */
+                return true;
+            }
+            internalCharset = internalCharset.toLowerCase();
+            Encoding cs = Encoding.forName(internalCharset);
+            if ("utf-16be".equals(internalCharset)
                     || "utf-16le".equals(internalCharset)) {
-                tokenizer.errTreeBuilder("Internal encoding declaration specified \u201C"
-                        + internalCharset
-                        + "\u201D which is not an ASCII superset. Continuing as if the encoding had been \u201Cutf-8\u201D.");
+                /*
+                 * 2. If the new encoding is a UTF-16 encoding, then change it
+                 * to UTF-8.
+                 */
+                tokenizer.errTreeBuilder(
+                        Encoding.msgIgnoredCharset(internalCharset, "utf-8"));
                 cs = Encoding.UTF8;
                 internalCharset = "utf-8";
-            } else {
-                cs = Encoding.forName(internalCharset);
-            }
-            Encoding actual = cs.getActualHtmlEncoding();
-            if (actual == null) {
-                actual = cs;
-            }
-            if (!actual.isAsciiSuperset()) {
-                tokenizer.errTreeBuilder("Internal encoding declaration specified \u201C"
-                        + internalCharset
-                        + "\u201D which is not an ASCII superset. Not changing the encoding.");
-                return false;
+            } else if ("x-user-defined".equals(internalCharset)) {
+                /*
+                 * 3. If the new encoding is x-user-defined, then change it to
+                 * windows-1252.
+                 */
+                tokenizer.errTreeBuilder(Encoding.msgIgnoredCharset(
+                        "x-user-defined", "windows-1252"));
+                cs = Encoding.WINDOWS1252;
+                internalCharset = "windows-1252";
             }
             if (characterEncoding == null) {
                 // Reader case
                 return true;
             }
-            if (characterEncoding == actual) {
+            if (characterEncoding == cs) {
+                /*
+                 * 4. If the new encoding is identical or equivalent to the
+                 * encoding that is already being used to interpret the input
+                 * stream, then set the confidence to certain and return.
+                 */
                 becomeConfident();
                 return true;
             }
-            if (confidence == Confidence.CERTAIN && actual != characterEncoding) {
-                tokenizer.errTreeBuilder("Internal encoding declaration \u201C"
-                        + internalCharset
-                        + "\u201D disagrees with the actual encoding of the document (\u201C"
-                        + characterEncoding.getCanonName() + "\u201D).");
-            } else {
-                Encoding newEnc = whineAboutEncodingAndReturnActual(
-                        internalCharset, cs);
-                tokenizer.errTreeBuilder("Changing character encoding \u201C"
-                        + internalCharset + "\u201D and reparsing.");
-                characterEncoding = newEnc;
-                throw new ReparseException();
-            }
-            return true;
+            /*
+             * 6. Otherwise, navigate to the document again, with
+             * historyHandling set to "replace", and using the same source
+             * browsing context, but this time skip the encoding sniffing
+             * algorithm and instead just set the encoding to the new encoding
+             */
+            Encoding newEnc = whineAboutEncodingAndReturnCanonical(
+                    internalCharset, cs);
+            tokenizer.errTreeBuilder("Changing character encoding to \u201C"
+                    + internalCharset + "\u201D and reparsing.");
+            characterEncoding = newEnc;
+            // Note: We intentionally don’t call becomeConfident() at this
+            // point. If we did, it would end up causing the exception
+            // java.lang.IllegalStateException: rewind() after willNotRewind()
+            // to be thrown later. So we are departing here from strictly
+            // following the ordering in the corresponding spec language, which
+            // specifies setting the confidence to "certain" at this point.
+            throw new ReparseException();
         } catch (UnsupportedCharsetException e) {
-            tokenizer.errTreeBuilder("Internal encoding declaration named an unsupported chararacter encoding \u201C"
-                    + internalCharset + "\u201D.");
+            tokenizer.errTreeBuilder(
+                    Encoding.msgBadInternalCharset(internalCharset));
             return false;
         }
     }
@@ -451,17 +493,16 @@ protected Encoding encodingFromExternalDeclaration(String encoding)
         if (encoding == null) {
             return null;
         }
-        encoding = Encoding.toAsciiLowerCase(encoding);
+        encoding = encoding.toLowerCase();
         try {
             Encoding cs = Encoding.forName(encoding);
-            if ("utf-16".equals(cs.getCanonName())
-                    || "utf-32".equals(cs.getCanonName())) {
+            if ("utf-16be".equals(cs.getCanonName())
+                    || "utf-16le".equals(cs.getCanonName())) {
                 swallowBom = false;
             }
-            return whineAboutEncodingAndReturnActual(encoding, cs);
+            return whineAboutEncodingAndReturnCanonical(encoding, cs);
         } catch (UnsupportedCharsetException e) {
-            tokenizer.err("Unsupported character encoding name: \u201C" + encoding
-                    + "\u201D. Will sniff.");
+            tokenizer.err(Encoding.msgBadEncoding(encoding) + " Will sniff.");
             swallowBom = true;
         }
         return null; // keep the compiler happy
@@ -473,45 +514,13 @@ protected Encoding encodingFromExternalDeclaration(String encoding)
      * @return
      * @throws SAXException
      */
-    protected Encoding whineAboutEncodingAndReturnActual(String encoding,
+    protected Encoding whineAboutEncodingAndReturnCanonical(String encoding,
             Encoding cs) throws SAXException {
         String canonName = cs.getCanonName();
-        if (!cs.isRegistered()) {
-            if (encoding.startsWith("x-")) {
-                tokenizer.err("The encoding \u201C"
-                        + encoding
-                        + "\u201D is not an IANA-registered encoding. (Charmod C022)");
-            } else {
-                tokenizer.err("The encoding \u201C"
-                        + encoding
-                        + "\u201D is not an IANA-registered encoding and did not use the \u201Cx-\u201D prefix. (Charmod C023)");
-            }
-        } else if (!canonName.equals(encoding)) {
-            tokenizer.err("The encoding \u201C"
-                    + encoding
-                    + "\u201D is not the preferred name of the character encoding in use. The preferred name is \u201C"
-                    + canonName + "\u201D. (Charmod C024)");
-        }
-        if (cs.isShouldNot()) {
-            tokenizer.warn("Authors should not use the character encoding \u201C"
-                    + encoding
-                    + "\u201D. It is recommended to use \u201CUTF-8\u201D.");
-        } else if (cs.isLikelyEbcdic()) {
-            tokenizer.warn("Authors should not use EBCDIC-based encodings. It is recommended to use \u201CUTF-8\u201D.");
-        } else if (cs.isObscure()) {
-            tokenizer.warn("The character encoding \u201C"
-                    + encoding
-                    + "\u201D is not widely supported. Better interoperability may be achieved by using \u201CUTF-8\u201D.");
-        }
-        Encoding actual = cs.getActualHtmlEncoding();
-        if (actual == null) {
-            return cs;
-        } else {
-            tokenizer.warn("Using \u201C" + actual.getCanonName()
-                    + "\u201D instead of the declared encoding \u201C"
-                    + encoding + "\u201D.");
-            return actual;
+        if (!canonName.equals(encoding)) {
+            tokenizer.err(Encoding.msgNotCanonicalName(encoding, canonName));
         }
+        return cs;
     }
 
     private class ReparseException extends SAXException {
diff --git a/src/nu/validator/htmlparser/io/Encoding.java b/src/nu/validator/htmlparser/io/Encoding.java
index 123465f8..ded6afb4 100644
--- a/src/nu/validator/htmlparser/io/Encoding.java
+++ b/src/nu/validator/htmlparser/io/Encoding.java
@@ -44,422 +44,358 @@ public class Encoding {
 
     public static final Encoding UTF8;
 
-    public static final Encoding UTF16;
-
     public static final Encoding UTF16LE;
 
     public static final Encoding UTF16BE;
 
     public static final Encoding WINDOWS1252;
 
-    private static String[] SHOULD_NOT = { "jisx02121990", "xjis0208" };
+    private static Map<String, Encoding> encodingByLabel =
+        new HashMap<String, Encoding>();
 
-    private static String[] BANNED = { "bocu1", "cesu8", "compoundtext",
-            "iscii91", "macarabic", "maccentraleurroman", "maccroatian",
-            "maccyrillic", "macdevanagari", "macfarsi", "macgreek",
-            "macgujarati", "macgurmukhi", "machebrew", "macicelandic",
-            "macroman", "macromanian", "macthai", "macturkish", "macukranian",
-            "scsu", "utf32", "utf32be", "utf32le", "utf7", "ximapmailboxname",
-            "xjisautodetect", "xutf16bebom", "xutf16lebom", "xutf32bebom",
-            "xutf32lebom", "xutf16oppositeendian", "xutf16platformendian",
-            "xutf32oppositeendian", "xutf32platformendian" };
+    private static void createEncoding(String name, String[] labels) {
+        if (!Charset.isSupported(name)) {
+            return;
+        }
+        Charset cs = Charset.forName(name);
+        Encoding enc = new Encoding(name.toLowerCase().intern(), cs);
+        for (String label : labels) {
+            encodingByLabel.put(label, enc);
+        }
+    }
 
-    /* From the table at https://encoding.spec.whatwg.org/#names-and-labels,
-     * everything in the Labels column, sorted */
-    private static String[] NOT_OBSCURE = { //
-            "866", //
-            "ansi_x3.4-1968", //
-            "arabic", //
-            "ascii", //
-            "asmo-708", //
-            "big5", //
-            "big5-hkscs", //
-            "chinese", //
-            "cn-big5", //
-            "cp1250", //
-            "cp1251", //
-            "cp1252", //
-            "cp1253", //
-            "cp1254", //
-            "cp1255", //
-            "cp1256", //
-            "cp1257", //
-            "cp1258", //
-            "cp819", //
-            "cp866", //
-            "csbig5", //
-            "cseuckr", //
-            "cseucpkdfmtjapanese", //
-            "csgb2312", //
-            "csibm866", //
-            "csiso2022jp", //
-            "csiso2022kr", //
-            "csiso58gb231280", //
-            "csiso88596e", //
-            "csiso88596i", //
-            "csiso88598e", //
-            "csiso88598i", //
-            "csisolatin1", //
-            "csisolatin2", //
-            "csisolatin3", //
-            "csisolatin4", //
-            "csisolatin5", //
-            "csisolatin6", //
-            "csisolatin9", //
-            "csisolatinarabic", //
-            "csisolatincyrillic", //
-            "csisolatingreek", //
-            "csisolatinhebrew", //
-            "cskoi8r", //
-            "csksc56011987", //
-            "csmacintosh", //
-            "csshiftjis", //
-            "csunicode", //
-            "cyrillic", //
-            "dos-874", //
-            "ecma-114", //
-            "ecma-118", //
-            "elot_928", //
-            "euc-jp", //
-            "euc-kr", //
-            "gb18030", //
-            "gb2312", //
-            "gb_2312", //
-            "gb_2312-80", //
-            "gbk", //
-            "greek", //
-            "greek8", //
-            "hebrew", //
-            "hz-gb-2312", //
-            "ibm819", //
-            "ibm866", //
-            "iso-10646-ucs-2", //
-            "iso-2022-cn", //
-            "iso-2022-cn-ext", //
-            "iso-2022-jp", //
-            "iso-2022-kr", //
-            "iso-8859-1", //
-            "iso-8859-10", //
-            "iso-8859-11", //
-            "iso-8859-13", //
-            "iso-8859-14", //
-            "iso-8859-15", //
-            "iso-8859-16", //
-            "iso-8859-2", //
-            "iso-8859-3", //
-            "iso-8859-4", //
-            "iso-8859-5", //
-            "iso-8859-6", //
-            "iso-8859-6-e", //
-            "iso-8859-6-i", //
-            "iso-8859-7", //
-            "iso-8859-8", //
-            "iso-8859-8-e", //
-            "iso-8859-8-i", //
-            "iso-8859-9", //
-            "iso-ir-100", //
-            "iso-ir-101", //
-            "iso-ir-109", //
-            "iso-ir-110", //
-            "iso-ir-126", //
-            "iso-ir-127", //
-            "iso-ir-138", //
-            "iso-ir-144", //
-            "iso-ir-148", //
-            "iso-ir-149", //
-            "iso-ir-157", //
-            "iso-ir-58", //
-            "iso8859-1", //
-            "iso8859-10", //
-            "iso8859-11", //
-            "iso8859-13", //
-            "iso8859-14", //
-            "iso8859-15", //
-            "iso8859-2", //
-            "iso8859-3", //
-            "iso8859-4", //
-            "iso8859-5", //
-            "iso8859-6", //
-            "iso8859-7", //
-            "iso8859-8", //
-            "iso8859-9", //
-            "iso88591", //
-            "iso885910", //
-            "iso885911", //
-            "iso885913", //
-            "iso885914", //
-            "iso885915", //
-            "iso88592", //
-            "iso88593", //
-            "iso88594", //
-            "iso88595", //
-            "iso88596", //
-            "iso88597", //
-            "iso88598", //
-            "iso88599", //
-            "iso_8859-1", //
-            "iso_8859-15", //
-            "iso_8859-1:1987", //
-            "iso_8859-2", //
-            "iso_8859-2:1987", //
-            "iso_8859-3", //
-            "iso_8859-3:1988", //
-            "iso_8859-4", //
-            "iso_8859-4:1988", //
-            "iso_8859-5", //
-            "iso_8859-5:1988", //
-            "iso_8859-6", //
-            "iso_8859-6:1987", //
-            "iso_8859-7", //
-            "iso_8859-7:1987", //
-            "iso_8859-8", //
-            "iso_8859-8:1988", //
-            "iso_8859-9", //
-            "iso_8859-9:1989", //
-            "koi", //
-            "koi8", //
-            "koi8-r", //
-            "koi8-ru", //
-            "koi8-u", //
-            "koi8_r", //
-            "korean", //
-            "ks_c_5601-1987", //
-            "ks_c_5601-1989", //
-            "ksc5601", //
-            "ksc_5601", //
-            "l1", //
-            "l2", //
-            "l3", //
-            "l4", //
-            "l5", //
-            "l6", //
-            "l9", //
-            "latin1", //
-            "latin2", //
-            "latin3", //
-            "latin4", //
-            "latin5", //
-            "latin6", //
-            "logical", //
-            "mac", //
-            "macintosh", //
-            "ms932", //
-            "ms_kanji", //
-            "replacement", //
-            "shift-jis", //
-            "shift_jis", //
-            "sjis", //
-            "sun_eu_greek", //
-            "tis-620", //
-            "ucs-2", //
-            "unicode", //
-            "unicode-1-1-utf-8", //
-            "unicode11utf8", //
-            "unicode20utf8", //
-            "unicodefeff", //
-            "unicodefffe", //
-            "us-ascii", //
-            "utf-16", //
-            "utf-16be", //
-            "utf-16le", //
-            "utf-8", //
-            "utf8", //
-            "visual", //
-            "windows-1250", //
-            "windows-1251", //
-            "windows-1252", //
-            "windows-1253", //
-            "windows-1254", //
-            "windows-1255", //
-            "windows-1256", //
-            "windows-1257", //
-            "windows-1258", //
-            "windows-31j", //
-            "windows-874", //
-            "windows-949", //
-            "x-cp1250", //
-            "x-cp1251", //
-            "x-cp1252", //
-            "x-cp1253", //
-            "x-cp1254", //
-            "x-cp1255", //
-            "x-cp1256", //
-            "x-cp1257", //
-            "x-cp1258", //
-            "x-euc-jp", //
-            "x-gbk", //
-            "x-mac-cyrillic", //
-            "x-mac-roman", //
-            "x-mac-ukrainian", //
-            "x-sjis", //
-            "x-unicode20utf8", //
-            "x-user-defined", //
-            "x-x-big5", //
-    };
-    private static Map<String, Encoding> encodingByCookedName = new HashMap<String, Encoding>();
+    static {
+        /* See https://encoding.spec.whatwg.org/#names-and-labels */
+        createEncoding( //
+                "UTF-8", new String[] { //
+                        "unicode-1-1-utf-8", //
+                        "unicode11utf8", //
+                        "unicode20utf8", //
+                        "utf-8", //
+                        "utf8", //
+                        "x-unicode20utf8" });
+        createEncoding( //
+                "IBM866", new String[] { //
+                        "866", //
+                        "cp866", //
+                        "csibm866", //
+                        "ibm866" });
+        createEncoding( //
+                "ISO-8859-2", new String[] { //
+                        "csisolatin2", //
+                        "iso-8859-2", //
+                        "iso-ir-101", //
+                        "iso8859-2", //
+                        "iso88592", //
+                        "iso_8859-2", //
+                        "iso_8859-2:1987", //
+                        "l2", //
+                        "latin2" });
+        createEncoding( //
+                "ISO-8859-3", new String[] { //
+                        "csisolatin3", //
+                        "iso-8859-3", //
+                        "iso-ir-109", //
+                        "iso8859-3", //
+                        "iso88593", //
+                        "iso_8859-3", //
+                        "iso_8859-3:1988", //
+                        "l3", //
+                        "latin3" });
+        createEncoding( //
+                "ISO-8859-4", new String[] { //
+                        "csisolatin4", //
+                        "iso-8859-4", //
+                        "iso-ir-110", //
+                        "iso8859-4", //
+                        "iso88594", //
+                        "iso_8859-4", //
+                        "iso_8859-4:1988", //
+                        "l4", //
+                        "latin4" });
+        createEncoding( //
+                "ISO-8859-5", new String[] { //
+                        "csisolatincyrillic", //
+                        "cyrillic", //
+                        "iso-8859-5", //
+                        "iso-ir-144", //
+                        "iso8859-5", //
+                        "iso88595", //
+                        "iso_8859-5", //
+                        "iso_8859-5:1988" });
+        createEncoding( //
+                "ISO-8859-6", new String[] { //
+                        "arabic", //
+                        "asmo-708", //
+                        "csiso88596e", //
+                        "csiso88596i", //
+                        "csisolatinarabic", //
+                        "ecma-114", //
+                        "iso-8859-6", //
+                        "iso-8859-6-e", //
+                        "iso-8859-6-i", //
+                        "iso-ir-127", //
+                        "iso8859-6", //
+                        "iso88596", //
+                        "iso_8859-6", //
+                        "iso_8859-6:1987" });
+        createEncoding( //
+                "ISO-8859-7", new String[] { //
+                        "csisolatingreek", //
+                        "ecma-118", //
+                        "elot_928", //
+                        "greek", //
+                        "greek8", //
+                        "iso-8859-7", //
+                        "iso-ir-126", //
+                        "iso8859-7", //
+                        "iso88597", //
+                        "iso_8859-7", //
+                        "iso_8859-7:1987", //
+                        "sun_eu_greek" });
+        createEncoding( //
+                "ISO-8859-8", new String[] { //
+                        "csiso88598e", //
+                        "csisolatinhebrew", //
+                        "hebrew", //
+                        "iso-8859-8", //
+                        "iso-8859-8-e", //
+                        "iso-ir-138", //
+                        "iso8859-8", //
+                        "iso88598", //
+                        "iso_8859-8", //
+                        "iso_8859-8:1988", //
+                        "visual" });
+        createEncoding( //
+                // Unsupported in Java
+                "ISO-8859-8-I", new String[] { //
+                        "csiso88598i", //
+                        "iso-8859-8-i", //
+                        "logical" });
+        createEncoding( //
+                // Unsupported in Java
+                "ISO-8859-10", new String[] { //
+                        "csisolatin6", //
+                        "iso-8859-10", //
+                        "iso-ir-157", //
+                        "iso8859-10", //
+                        "iso885910", //
+                        "l6", //
+                        "latin6" });
+        createEncoding( //
+                "ISO-8859-13", new String[] { //
+                        "iso-8859-13", //
+                        "iso8859-13", //
+                        "iso885913" });
+        createEncoding( //
+                // Unsupported in Java
+                "ISO-8859-14", new String[] { //
+                        "iso-8859-14", //
+                        "iso8859-14", //
+                        "iso885914" });
+        createEncoding( //
+                "ISO-8859-15", new String[] { //
+                        "csisolatin9", //
+                        "iso-8859-15", //
+                        "iso8859-15", //
+                        "iso885915", //
+                        "iso_8859-15", //
+                        "l9" });
+        createEncoding( //
+                "ISO-8859-16", new String[] { //
+                        "iso-8859-16" });
+        createEncoding( //
+                "KOI8-R", new String[] { //
+                        "cskoi8r", //
+                        "koi", //
+                        "koi8", //
+                        "koi8-r", //
+                        "koi8_r" });
+        createEncoding( //
+                "KOI8-U", new String[] { //
+                        "koi8-ru", //
+                        "koi8-u" });
+        createEncoding( //
+                // Unsupported in Java
+                "macintosh", new String[] { //
+                        "csmacintosh", //
+                        "mac", //
+                        "macintosh", //
+                        "x-mac-roman" });
+        createEncoding( //
+                "windows-874", new String[] { //
+                        "dos-874", //
+                        "iso-8859-11", //
+                        "iso8859-11", //
+                        "iso885911", //
+                        "tis-620", //
+                        "windows-874" });
+        createEncoding( //
+                "windows-1250", new String[] { //
+                        "cp1250", //
+                        "windows-1250", //
+                        "x-cp1250" });
+        createEncoding( //
+                "windows-1251", new String[] { //
+                        "cp1251", //
+                        "windows-1251", //
+                        "x-cp1251" });
+        createEncoding( //
+                "windows-1252", new String[] { //
+                        "ansi_x3.4-1968", //
+                        "ascii", //
+                        "cp1252", //
+                        "cp819", //
+                        "csisolatin1", //
+                        "ibm819", //
+                        "iso-8859-1", //
+                        "iso-ir-100", //
+                        "iso8859-1", //
+                        "iso88591", //
+                        "iso_8859-1", //
+                        "iso_8859-1:1987", //
+                        "l1", //
+                        "latin1", //
+                        "us-ascii", //
+                        "windows-1252", //
+                        "x-cp1252" });
+        createEncoding( //
+                "windows-1253", new String[] { //
+                        "cp1253", //
+                        "windows-1253", //
+                        "x-cp1253" });
+        createEncoding( //
+                "windows-1254", new String[] { //
+                        "cp1254", //
+                        "csisolatin5", //
+                        "iso-8859-9", //
+                        "iso-ir-148", //
+                        "iso8859-9", //
+                        "iso88599", //
+                        "iso_8859-9", //
+                        "iso_8859-9:1989", //
+                        "l5", //
+                        "latin5", //
+                        "windows-1254", //
+                        "x-cp1254" });
+        createEncoding( //
+                "windows-1255", new String[] { //
+                        "cp1255", //
+                        "windows-1255", //
+                        "x-cp1255" });
+        createEncoding( //
+                "windows-1256", new String[] { //
+                        "cp1256", //
+                        "windows-1256", //
+                        "x-cp1256" });
+        createEncoding( //
+                "windows-1257", new String[] { //
+                        "cp1257", //
+                        "windows-1257", //
+                        "x-cp1257" });
+        createEncoding( //
+                "windows-1258", new String[] { //
+                        "cp1258", //
+                        "windows-1258", //
+                        "x-cp1258" });
+        createEncoding( //
+                // Unsupported in Java
+                "x-mac-cyrillic", new String[] { //
+                        "x-mac-cyrillic", //
+                        "x-mac-ukrainian" });
+        createEncoding( //
+                "GBK", new String[] { //
+                        "chinese", //
+                        "csgb2312", //
+                        "csiso58gb231280", //
+                        "gb2312", //
+                        "gb_2312", //
+                        "gb_2312-80", //
+                        "gbk", //
+                        "iso-ir-58", //
+                        "x-gbk" });
+        createEncoding( //
+                "gb18030", new String[] { //
+                        "gb18030" });
+        createEncoding( //
+                "Big5", new String[] { //
+                        "big5", //
+                        "big5-hkscs", //
+                        "cn-big5", //
+                        "csbig5", //
+                        "x-x-big5" });
+        createEncoding( //
+                "EUC-JP", new String[] { //
+                        "cseucpkdfmtjapanese", //
+                        "euc-jp", //
+                        "x-euc-jp" });
+        createEncoding( //
+                "ISO-2022-JP", new String[] { //
+                        "csiso2022jp", //
+                        "iso-2022-jp" });
+        createEncoding( //
+                "Shift_JIS", new String[] { //
+                        "csshiftjis", //
+                        "ms932", //
+                        "ms_kanji", //
+                        "shift-jis", //
+                        "shift_jis", //
+                        "sjis", //
+                        "windows-31j", //
+                        "x-sjis" });
+        createEncoding( //
+                "EUC-KR", new String[] { //
+                        "cseuckr", //
+                        "csksc56011987", //
+                        "euc-kr", //
+                        "iso-ir-149", //
+                        "korean", //
+                        "ks_c_5601-1987", //
+                        "ks_c_5601-1989", //
+                        "ksc5601", //
+                        "ksc_5601", //
+                        "windows-949" });
+        createEncoding( //
+                // Special case
+                "replacement", new String[] { //
+                        "csiso2022kr", //
+                        "hz-gb-2312", //
+                        "iso-2022-cn", //
+                        "iso-2022-cn-ext", //
+                        "iso-2022-kr", //
+                        "replacement" });
+        createEncoding( //
+                "UTF-16BE", new String[] { //
+                        "unicodefffe", //
+                        "utf-16be" });
+        createEncoding( //
+                "UTF-16LE", new String[] { //
+                        "csunicode", //
+                        "iso-10646-ucs-2", //
+                        "ucs-2", //
+                        "unicode", //
+                        "unicodefeff", //
+                        "utf-16", //
+                        "utf-16le" });
+        createEncoding( //
+                // Special case
+                "x-user-defined", new String[] { //
+                        "x-user-defined" });
+    }
 
     private final String canonName;
 
     private final Charset charset;
 
-    private final boolean asciiSuperset;
-
-    private final boolean obscure;
-
-    private final boolean shouldNot;
-
-    private final boolean likelyEbcdic;
-
-    private Encoding actualHtmlEncoding = null;
-
     static {
-        byte[] testBuf = new byte[0x7F];
-        for (int i = 0; i < 0x7F; i++) {
-            if (isAsciiSupersetnessSensitive(i)) {
-                testBuf[i] = (byte) i;
-            } else {
-                testBuf[i] = (byte) 0x20;
-            }
-        }
-
-        Set<Encoding> encodings = new HashSet<Encoding>();
-
-        SortedMap<String, Charset> charsets = Charset.availableCharsets();
-        for (Map.Entry<String, Charset> entry : charsets.entrySet()) {
-            Charset cs = entry.getValue();
-            String name = toNameKey(cs.name());
-            String canonName = toAsciiLowerCase(cs.name());
-            if (!isBanned(stripDashAndUnderscore(name))) {
-                name = name.intern();
-                boolean asciiSuperset = asciiMapsToBasicLatin(testBuf, cs);
-                Encoding enc = new Encoding(canonName.intern(), cs,
-                        asciiSuperset, isObscure(name),
-                        isShouldNot(stripDashAndUnderscore(name)),
-                        isLikelyEbcdic(name, asciiSuperset));
-                encodings.add(enc);
-                Set<String> aliases = cs.aliases();
-                for (String alias : aliases) {
-                    encodingByCookedName.put(toNameKey(alias).intern(), enc);
-                }
-            }
-        }
-        // Overwrite possible overlapping aliases with the real things--just in
-        // case
-        for (Encoding encoding : encodings) {
-            encodingByCookedName.put(toNameKey(encoding.getCanonName()),
-                    encoding);
-        }
         UTF8 = forName("utf-8");
-        UTF16 = forName("utf-16");
         UTF16BE = forName("utf-16be");
         UTF16LE = forName("utf-16le");
         WINDOWS1252 = forName("windows-1252");
-        try {
-            forName("iso-8859-1").actualHtmlEncoding = forName("windows-1252");
-        } catch (UnsupportedCharsetException e) {
-        }
-        try {
-            forName("iso-8859-9").actualHtmlEncoding = forName("windows-1254");
-        } catch (UnsupportedCharsetException e) {
-        }
-        try {
-            forName("iso-8859-11").actualHtmlEncoding = forName("windows-874");
-        } catch (UnsupportedCharsetException e) {
-        }
-        try {
-            forName("x-iso-8859-11").actualHtmlEncoding = forName("windows-874");
-        } catch (UnsupportedCharsetException e) {
-        }
-        try {
-            forName("tis-620").actualHtmlEncoding = forName("windows-874");
-        } catch (UnsupportedCharsetException e) {
-        }
-        try {
-            forName("gb_2312-80").actualHtmlEncoding = forName("gbk");
-        } catch (UnsupportedCharsetException e) {
-        }
-        try {
-            forName("gb2312").actualHtmlEncoding = forName("gbk");
-        } catch (UnsupportedCharsetException e) {
-        }
-        try {
-            encodingByCookedName.put("x-x-big5", forName("big5"));
-        } catch (UnsupportedCharsetException e) {
-        }
-        try {
-            encodingByCookedName.put("euc-kr", forName("windows-949"));
-        } catch (UnsupportedCharsetException e) {
-        }
-        try {
-            encodingByCookedName.put("ks_c_5601-1987", forName("windows-949"));
-        } catch (UnsupportedCharsetException e) {
-        }
-    }
-
-    private static boolean isAsciiSupersetnessSensitive(int c) {
-        return (c >= 0x09 && c <= 0x0D) || (c >= 0x20 && c <= 0x22)
-                || (c >= 0x26 && c <= 0x27) || (c >= 0x2C && c <= 0x3F)
-                || (c >= 0x41 && c <= 0x5A) || (c >= 0x61 && c <= 0x7A);
-    }
-
-    private static boolean isObscure(String lowerCasePreferredIanaName) {
-        return !(Arrays.binarySearch(NOT_OBSCURE, lowerCasePreferredIanaName) > -1);
-    }
-
-    private static boolean isBanned(String lowerCasePreferredIanaName) {
-        if (lowerCasePreferredIanaName.startsWith("xibm")) {
-            return true;
-        }
-        return (Arrays.binarySearch(BANNED, lowerCasePreferredIanaName) > -1);
-    }
-
-    private static boolean isShouldNot(String lowerCasePreferredIanaName) {
-        return (Arrays.binarySearch(SHOULD_NOT, lowerCasePreferredIanaName) > -1);
-    }
-
-    /**
-     * @param testBuf
-     * @param cs
-     */
-    private static boolean asciiMapsToBasicLatin(byte[] testBuf, Charset cs) {
-        CharsetDecoder dec = cs.newDecoder();
-        dec.onMalformedInput(CodingErrorAction.REPORT);
-        dec.onUnmappableCharacter(CodingErrorAction.REPORT);
-        Reader r = new InputStreamReader(new ByteArrayInputStream(testBuf), dec);
-        try {
-            for (int i = 0; i < 0x7F; i++) {
-                if (isAsciiSupersetnessSensitive(i)) {
-                    if (r.read() != i) {
-                        return false;
-                    }
-                } else {
-                    if (r.read() != 0x20) {
-                        return false;
-                    }
-                }
-            }
-        } catch (IOException e) {
-            return false;
-        } catch (Exception e) {
-            return false;
-        } catch (CoderMalfunctionError e) {
-            return false;
-        }
-
-        return true;
-    }
-
-    private static boolean isLikelyEbcdic(String canonName,
-            boolean asciiSuperset) {
-        if (!asciiSuperset) {
-            return (canonName.startsWith("cp") || canonName.startsWith("ibm") || canonName.startsWith("xibm"));
-        } else {
-            return false;
-        }
     }
 
     public static Encoding forName(String name) {
-        Encoding rv = encodingByCookedName.get(toNameKey(name));
+        Encoding rv = encodingByLabel.get(toNameKey(name));
         if (rv == null) {
             throw new UnsupportedCharsetException(name);
         } else {
@@ -486,61 +422,13 @@ public static String toNameKey(String str) {
         return new String(buf, 0, j);
     }
 
-    public static String stripDashAndUnderscore(String str) {
-        if (str == null) {
-            return null;
-        }
-        char[] buf = new char[str.length()];
-        for (int i = 0; i < str.length(); i++) {
-            char c = str.charAt(i);
-            if (c == '-' || c == '_') {
-                buf[i] = c;
-            }
-        }
-        return new String(buf);
-    }
-
-    public static String toAsciiLowerCase(String str) {
-        if (str == null) {
-            return null;
-        }
-        char[] buf = new char[str.length()];
-        for (int i = 0; i < str.length(); i++) {
-            char c = str.charAt(i);
-            if (c >= 'A' && c <= 'Z') {
-                c += 0x20;
-            }
-            buf[i] = c;
-        }
-        return new String(buf);
-    }
-
     /**
      * @param canonName
      * @param charset
-     * @param asciiSuperset
-     * @param obscure
-     * @param shouldNot
-     * @param likelyEbcdic
      */
-    private Encoding(final String canonName, final Charset charset,
-            final boolean asciiSuperset, final boolean obscure,
-            final boolean shouldNot, final boolean likelyEbcdic) {
+    private Encoding(final String canonName, final Charset charset) {
         this.canonName = canonName;
         this.charset = charset;
-        this.asciiSuperset = asciiSuperset;
-        this.obscure = obscure;
-        this.shouldNot = shouldNot;
-        this.likelyEbcdic = likelyEbcdic;
-    }
-
-    /**
-     * Returns the asciiSuperset.
-     * 
-     * @return the asciiSuperset
-     */
-    public boolean isAsciiSuperset() {
-        return asciiSuperset;
     }
 
     /**
@@ -552,37 +440,6 @@ public String getCanonName() {
         return canonName;
     }
 
-    /**
-     * Returns the likelyEbcdic.
-     * 
-     * @return the likelyEbcdic
-     */
-    public boolean isLikelyEbcdic() {
-        return likelyEbcdic;
-    }
-
-    /**
-     * Returns the obscure.
-     * 
-     * @return the obscure
-     */
-    public boolean isObscure() {
-        return obscure;
-    }
-
-    /**
-     * Returns the shouldNot.
-     * 
-     * @return the shouldNot
-     */
-    public boolean isShouldNot() {
-        return shouldNot;
-    }
-
-    public boolean isRegistered() {
-        return !canonName.startsWith("x-");
-    }
-
     /**
      * @return
      * @see java.nio.charset.Charset#canEncode()
@@ -607,24 +464,36 @@ public CharsetEncoder newEncoder() {
         return charset.newEncoder();
     }
 
-    /**
-     * Returns the actualHtmlEncoding.
-     * 
-     * @return the actualHtmlEncoding
-     */
-    public Encoding getActualHtmlEncoding() {
-        return actualHtmlEncoding;
+    protected static String msgLegacyEncoding(String name) {
+        return "Legacy encoding \u201C" + name + "\u201D used. Documents must"
+                + " use UTF-8.";
+    }
+
+    protected static String msgIgnoredCharset(String ignored, String name) {
+        return "Internal encoding declaration specified \u201C" + ignored
+                + "\u201D. Continuing as if the encoding had been \u201C"
+                + name + "\u201D.";
+    }
+    protected static String msgNotCanonicalName(String label, String name) {
+        return "The encoding \u201C" + label + "\u201D is not the canonical"
+                + " name of the character encoding in use. The canonical name"
+                + " is \u201C" + name + "\u201D. (Charmod C024)";
+    }
+
+    protected static String msgBadInternalCharset(String internalCharset) {
+        return "Internal encoding declaration named an unsupported character"
+            + " encoding \u201C" + internalCharset + "\u201D.";
+    }
+
+    protected static String msgBadEncoding(String name) {
+        return "Unsupported character encoding name: \u201C" + name + "\u201D.";
     }
 
     public static void main(String[] args) {
-        for (Map.Entry<String, Encoding> entry : encodingByCookedName.entrySet()) {
+        for (Map.Entry<String, Encoding> entry : encodingByLabel.entrySet()) {
             String name = entry.getKey();
             Encoding enc = entry.getValue();
-            System.out.printf(
-                    "%21s: canon %21s, obs %5s, reg %5s, asc %5s, ebc %5s\n",
-                    name, enc.getCanonName(), enc.isObscure(),
-                    enc.isRegistered(), enc.isAsciiSuperset(),
-                    enc.isLikelyEbcdic());
+            System.out.printf("%21s: canon %13s\n", name, enc.getCanonName());
         }
     }
 
diff --git a/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java b/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java
index 4facce4a..c54e591a 100755
--- a/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java
+++ b/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java
@@ -139,9 +139,7 @@ public HtmlInputStreamReader(InputStream inputStream,
             if (encoding == null) {
                 declared = false;
             } else if (encoding != Encoding.UTF8) {
-                err("Legacy encoding \u201C"
-                        + encoding.getCanonName()
-                        + "\u201D used. Documents must use UTF-8.");
+                err(Encoding.msgLegacyEncoding(encoding.getCanonName()));
             }
             if (encoding == null
                     && (heuristics == Heuristics.CHARDET || heuristics == Heuristics.ALL)) {
@@ -157,7 +155,8 @@ public HtmlInputStreamReader(InputStream inputStream,
                 encoding = Encoding.WINDOWS1252;
             }
             if (!declared) {
-                err("The character encoding was not declared. Proceeding using \u201C" + encoding.getCanonName() + "\u201D.");
+                err("The character encoding was not declared. Proceeding using"
+                        + " \u201C" + encoding.getCanonName() + "\u201D.");
             }
             if (driver != null) {
                 driver.setEncoding(encoding, Confidence.TENTATIVE);
@@ -168,11 +167,10 @@ public HtmlInputStreamReader(InputStream inputStream,
                     driver.setEncoding(Encoding.UTF8, Confidence.CERTAIN);
                 }
             } else {
-                err("Legacy encoding \u201C"
-                        + encoding.getCanonName()
-                        + "\u201D used. Documents must use UTF-8.");
+                err(Encoding.msgLegacyEncoding(encoding.getCanonName()));
                 if (driver != null) {
-                    driver.setEncoding(Encoding.UTF16, Confidence.CERTAIN);
+                    // XXX Why did we do driver.setEncoding(encoding.UTF16... ?
+                    driver.setEncoding(encoding, Confidence.CERTAIN);
                 }
             }
         }
diff --git a/src/nu/validator/htmlparser/io/MetaSniffer.java b/src/nu/validator/htmlparser/io/MetaSniffer.java
index 9deaef7a..60e157d0 100755
--- a/src/nu/validator/htmlparser/io/MetaSniffer.java
+++ b/src/nu/validator/htmlparser/io/MetaSniffer.java
@@ -159,55 +159,28 @@ public String getEncoding() {
     }
     
     protected boolean tryCharset(String encoding) throws SAXException {
-        encoding = Encoding.toAsciiLowerCase(encoding);
+        encoding = encoding.toLowerCase();
         try {
-            // XXX spec says only UTF-16
-            if ("utf-16".equals(encoding) || "utf-16be".equals(encoding) || "utf-16le".equals(encoding) || "utf-32".equals(encoding) || "utf-32be".equals(encoding) || "utf-32le".equals(encoding)) {
+            if ("utf-16be".equals(encoding) || "utf-16le".equals(encoding)) {
                 this.characterEncoding = Encoding.UTF8;
-                err("The internal character encoding declaration specified \u201C" + encoding + "\u201D which is not a rough superset of ASCII. Using \u201CUTF-8\u201D instead.");
+                err(Encoding.msgIgnoredCharset(encoding, "utf-8"));
+                return true;
+            } else if ("x-user-defined".equals(encoding)) {
+                this.characterEncoding = Encoding.WINDOWS1252;
+                err(Encoding.msgIgnoredCharset("x-user-defined", "windows-1252"));
                 return true;
             } else {
                 Encoding cs = Encoding.forName(encoding);
                 String canonName = cs.getCanonName();
-                if (!cs.isAsciiSuperset()) {
-                    err("The encoding \u201C"
-                                + encoding
-                                + "\u201D is not an ASCII superset and, therefore, cannot be used in an internal encoding declaration. Continuing the sniffing algorithm.");
-                    return false;
-                }
-                if (!cs.isRegistered()) {
-                    if (encoding.startsWith("x-")) {
-                        err("The encoding \u201C"
-                                + encoding
-                                + "\u201D is not an IANA-registered encoding. (Charmod C022)");                    
-                    } else {
-                        err("The encoding \u201C"
-                                + encoding
-                                + "\u201D is not an IANA-registered encoding and did not use the \u201Cx-\u201D prefix. (Charmod C023)");
-                    }
-                } else if (!cs.getCanonName().equals(encoding)) {
-                    err("The encoding \u201C" + encoding
-                            + "\u201D is not the preferred name of the character encoding in use. The preferred name is \u201C"
-                            + canonName + "\u201D. (Charmod C024)");
-                }
-                if (cs.isShouldNot()) {
-                    warn("Authors should not use the character encoding \u201C"
-                            + encoding
-                            + "\u201D. It is recommended to use \u201CUTF-8\u201D.");                
-                } else if (cs.isObscure()) {
-                    warn("The character encoding \u201C" + encoding + "\u201D is not widely supported. Better interoperability may be achieved by using \u201CUTF-8\u201D.");
-                }
-                Encoding actual = cs.getActualHtmlEncoding();
-                if (actual == null) {
+                if (!cs.getCanonName().equals(encoding)) {
+                    err(Encoding.msgNotCanonicalName(encoding, canonName));
                     this.characterEncoding = cs;
-                } else {
-                    warn("Using \u201C" + actual.getCanonName() + "\u201D instead of the declared encoding \u201C" + encoding + "\u201D.");
-                    this.characterEncoding = actual;
                 }
                 return true;
             }
         } catch (UnsupportedCharsetException e) {
-            err("Unsupported character encoding name: \u201C" + encoding + "\u201D. Will continue sniffing.");
+            err(Encoding.msgBadInternalCharset(encoding)
+                    + " Will continue sniffing.");
         }
         return false;
     }