Skip to content

Commit 343d86f

Browse files
Drop “actual HTML encoding”-related code
This change removes all code related to checking and using the “actual HTML encoding”, which no longer corresponds to current spec requirements.
1 parent 6462d3f commit 343d86f

File tree

5 files changed

+5
-69
lines changed

5 files changed

+5
-69
lines changed

src/nu/validator/htmlparser/extra/ChardetSniffer.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,6 @@ public static void main(String[] args) {
7474
public void Notify(String charsetName) {
7575
try {
7676
Encoding enc = Encoding.forName(charsetName);
77-
Encoding actual = enc.getActualHtmlEncoding();
78-
if (actual != null) {
79-
enc = actual;
80-
}
8177
returnValue = enc;
8278
} catch (UnsupportedCharsetException e) {
8379
returnValue = null;

src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,6 @@ public Encoding sniff() throws IOException {
5353
detector.setText(this);
5454
CharsetMatch match = detector.detect();
5555
Encoding enc = Encoding.forName(match.getName());
56-
Encoding actual = enc.getActualHtmlEncoding();
57-
if (actual != null) {
58-
enc = actual;
59-
}
6056
if (enc != Encoding.WINDOWS1252 //
6157
&& enc != Encoding.UTF16BE && enc != Encoding.UTF16LE) {
6258
return enc;

src/nu/validator/htmlparser/io/Driver.java

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -480,15 +480,10 @@ protected Encoding whineAboutEncodingAndReturnActual(String encoding,
480480
+ encoding
481481
+ "\u201D is not widely supported. Better interoperability may be achieved by using \u201CUTF-8\u201D.");
482482
}
483-
Encoding actual = cs.getActualHtmlEncoding();
484-
if (actual == null) {
485-
return cs;
486-
} else {
487-
tokenizer.warn("Using \u201C" + actual.getCanonName()
488-
+ "\u201D instead of the declared encoding \u201C"
489-
+ encoding + "\u201D.");
490-
return actual;
483+
if (!canonName.equals(encoding)) {
484+
tokenizer.err(Encoding.msgNotPreferredName(encoding, canonName));
491485
}
486+
return cs;
492487
}
493488

494489
private class ReparseException extends SAXException {

src/nu/validator/htmlparser/io/Encoding.java

Lines changed: 0 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,6 @@ public class Encoding {
8585

8686
private final boolean likelyEbcdic;
8787

88-
private Encoding actualHtmlEncoding = null;
89-
9088
static {
9189
Set<Encoding> encodings = new HashSet<Encoding>();
9290

@@ -119,46 +117,6 @@ asciiSuperset, isObscure(name), isShouldNot(name),
119117
UTF16BE = forName("utf-16be");
120118
UTF16LE = forName("utf-16le");
121119
WINDOWS1252 = forName("windows-1252");
122-
try {
123-
forName("iso-8859-1").actualHtmlEncoding = forName("windows-1252");
124-
} catch (UnsupportedCharsetException e) {
125-
}
126-
try {
127-
forName("iso-8859-9").actualHtmlEncoding = forName("windows-1254");
128-
} catch (UnsupportedCharsetException e) {
129-
}
130-
try {
131-
forName("iso-8859-11").actualHtmlEncoding = forName("windows-874");
132-
} catch (UnsupportedCharsetException e) {
133-
}
134-
try {
135-
forName("x-iso-8859-11").actualHtmlEncoding = forName("windows-874");
136-
} catch (UnsupportedCharsetException e) {
137-
}
138-
try {
139-
forName("tis-620").actualHtmlEncoding = forName("windows-874");
140-
} catch (UnsupportedCharsetException e) {
141-
}
142-
try {
143-
forName("gb_2312-80").actualHtmlEncoding = forName("gbk");
144-
} catch (UnsupportedCharsetException e) {
145-
}
146-
try {
147-
forName("gb2312").actualHtmlEncoding = forName("gbk");
148-
} catch (UnsupportedCharsetException e) {
149-
}
150-
try {
151-
encodingByLabel.put("x-x-big5", forName("big5"));
152-
} catch (UnsupportedCharsetException e) {
153-
}
154-
try {
155-
encodingByLabel.put("euc-kr", forName("windows-949"));
156-
} catch (UnsupportedCharsetException e) {
157-
}
158-
try {
159-
encodingByLabel.put("ks_c_5601-1987", forName("windows-949"));
160-
} catch (UnsupportedCharsetException e) {
161-
}
162120
}
163121

164122
private static boolean isObscure(String lowerCasePreferredIanaName) {
@@ -313,15 +271,6 @@ public CharsetEncoder newEncoder() {
313271
return charset.newEncoder();
314272
}
315273

316-
/**
317-
* Returns the actualHtmlEncoding.
318-
*
319-
* @return the actualHtmlEncoding
320-
*/
321-
public Encoding getActualHtmlEncoding() {
322-
return actualHtmlEncoding;
323-
}
324-
325274
protected static String msgLegacyEncoding(String name) {
326275
return "Legacy encoding \u201C" + name + "\u201D used. Documents must"
327276
+ " use UTF-8.";

src/nu/validator/htmlparser/io/MetaSniffer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,8 +191,8 @@ protected boolean tryCharset(String encoding) throws SAXException {
191191
} else if (cs.isObscure()) {
192192
warn("The character encoding \u201C" + encoding + "\u201D is not widely supported. Better interoperability may be achieved by using \u201CUTF-8\u201D.");
193193
}
194-
Encoding actual = cs.getActualHtmlEncoding();
195-
if (actual == null) {
194+
if (!cs.getCanonName().equals(encoding)) {
195+
err(Encoding.msgNotCanonicalName(encoding, canonName));
196196
this.characterEncoding = cs;
197197
} else {
198198
warn("Using \u201C" + actual.getCanonName() + "\u201D instead of the declared encoding \u201C" + encoding + "\u201D.");

0 commit comments

Comments
 (0)