Skip to content

Commit ff4258b

Browse files
Drop “actual HTML encoding”-related code
This change removes all code related to checking and using the “actual HTML encoding”, which no longer corresponds to current spec requirements.
1 parent 6413b44 commit ff4258b

File tree

5 files changed

+5
-69
lines changed

5 files changed

+5
-69
lines changed

src/nu/validator/htmlparser/extra/ChardetSniffer.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,6 @@ public static void main(String[] args) {
7474
public void Notify(String charsetName) {
7575
try {
7676
Encoding enc = Encoding.forName(charsetName);
77-
Encoding actual = enc.getActualHtmlEncoding();
78-
if (actual != null) {
79-
enc = actual;
80-
}
8177
returnValue = enc;
8278
} catch (UnsupportedCharsetException e) {
8379
returnValue = null;

src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,6 @@ public Encoding sniff() throws IOException {
5353
detector.setText(this);
5454
CharsetMatch match = detector.detect();
5555
Encoding enc = Encoding.forName(match.getName());
56-
Encoding actual = enc.getActualHtmlEncoding();
57-
if (actual != null) {
58-
enc = actual;
59-
}
6056
if (enc != Encoding.WINDOWS1252 //
6157
&& enc != Encoding.UTF16BE && enc != Encoding.UTF16LE) {
6258
return enc;

src/nu/validator/htmlparser/io/Driver.java

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -497,15 +497,10 @@ protected Encoding whineAboutEncodingAndReturnActual(String encoding,
497497
+ encoding
498498
+ "\u201D is not widely supported. Better interoperability may be achieved by using \u201CUTF-8\u201D.");
499499
}
500-
Encoding actual = cs.getActualHtmlEncoding();
501-
if (actual == null) {
502-
return cs;
503-
} else {
504-
tokenizer.warn("Using \u201C" + actual.getCanonName()
505-
+ "\u201D instead of the declared encoding \u201C"
506-
+ encoding + "\u201D.");
507-
return actual;
500+
if (!canonName.equals(encoding)) {
501+
tokenizer.err(Encoding.msgNotPreferredName(encoding, canonName));
508502
}
503+
return cs;
509504
}
510505

511506
private class ReparseException extends SAXException {

src/nu/validator/htmlparser/io/Encoding.java

Lines changed: 0 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -310,8 +310,6 @@ public class Encoding {
310310

311311
private final boolean likelyEbcdic;
312312

313-
private Encoding actualHtmlEncoding = null;
314-
315313
static {
316314
Set<Encoding> encodings = new HashSet<Encoding>();
317315

@@ -345,46 +343,6 @@ asciiSuperset, isObscure(name),
345343
UTF16BE = forName("utf-16be");
346344
UTF16LE = forName("utf-16le");
347345
WINDOWS1252 = forName("windows-1252");
348-
try {
349-
forName("iso-8859-1").actualHtmlEncoding = forName("windows-1252");
350-
} catch (UnsupportedCharsetException e) {
351-
}
352-
try {
353-
forName("iso-8859-9").actualHtmlEncoding = forName("windows-1254");
354-
} catch (UnsupportedCharsetException e) {
355-
}
356-
try {
357-
forName("iso-8859-11").actualHtmlEncoding = forName("windows-874");
358-
} catch (UnsupportedCharsetException e) {
359-
}
360-
try {
361-
forName("x-iso-8859-11").actualHtmlEncoding = forName("windows-874");
362-
} catch (UnsupportedCharsetException e) {
363-
}
364-
try {
365-
forName("tis-620").actualHtmlEncoding = forName("windows-874");
366-
} catch (UnsupportedCharsetException e) {
367-
}
368-
try {
369-
forName("gb_2312-80").actualHtmlEncoding = forName("gbk");
370-
} catch (UnsupportedCharsetException e) {
371-
}
372-
try {
373-
forName("gb2312").actualHtmlEncoding = forName("gbk");
374-
} catch (UnsupportedCharsetException e) {
375-
}
376-
try {
377-
encodingByLabel.put("x-x-big5", forName("big5"));
378-
} catch (UnsupportedCharsetException e) {
379-
}
380-
try {
381-
encodingByLabel.put("euc-kr", forName("windows-949"));
382-
} catch (UnsupportedCharsetException e) {
383-
}
384-
try {
385-
encodingByLabel.put("ks_c_5601-1987", forName("windows-949"));
386-
} catch (UnsupportedCharsetException e) {
387-
}
388346
}
389347

390348
private static boolean isObscure(String lowerCasePreferredIanaName) {
@@ -551,15 +509,6 @@ public CharsetEncoder newEncoder() {
551509
return charset.newEncoder();
552510
}
553511

554-
/**
555-
* Returns the actualHtmlEncoding.
556-
*
557-
* @return the actualHtmlEncoding
558-
*/
559-
public Encoding getActualHtmlEncoding() {
560-
return actualHtmlEncoding;
561-
}
562-
563512
protected static String msgLegacyEncoding(String name) {
564513
return "Legacy encoding \u201C" + name + "\u201D used. Documents must"
565514
+ " use UTF-8.";

src/nu/validator/htmlparser/io/MetaSniffer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,8 +191,8 @@ protected boolean tryCharset(String encoding) throws SAXException {
191191
} else if (cs.isObscure()) {
192192
warn("The character encoding \u201C" + encoding + "\u201D is not widely supported. Better interoperability may be achieved by using \u201CUTF-8\u201D.");
193193
}
194-
Encoding actual = cs.getActualHtmlEncoding();
195-
if (actual == null) {
194+
if (!cs.getCanonName().equals(encoding)) {
195+
err(Encoding.msgNotCanonicalName(encoding, canonName));
196196
this.characterEncoding = cs;
197197
} else {
198198
warn("Using \u201C" + actual.getCanonName() + "\u201D instead of the declared encoding \u201C" + encoding + "\u201D.");

0 commit comments

Comments
 (0)