Skip to content

Commit 28f680f

Browse files
committed
Fix detection of short comments <!-->
Closes #4.
1 parent 0c03ceb commit 28f680f

File tree

3 files changed

+40
-6
lines changed

3 files changed

+40
-6
lines changed

lib/html-encoding-sniffer.js

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ function prescanMetaCharset(buffer) {
2727
let c = buffer[i];
2828
if (c === 0x3C) {
2929
// "<"
30-
let c1 = buffer[i + 1];
31-
let c2 = buffer[i + 2];
30+
const c1 = buffer[i + 1];
31+
const c2 = buffer[i + 2];
3232
const c3 = buffer[i + 3];
3333
const c4 = buffer[i + 4];
3434
const c5 = buffer[i + 5];
@@ -37,11 +37,10 @@ function prescanMetaCharset(buffer) {
3737
i += 4;
3838
for (; i < l; i++) {
3939
c = buffer[i];
40-
c1 = buffer[i + 1];
41-
c2 = buffer[i + 2];
40+
const cMinus1 = buffer[i - 1];
41+
const cMinus2 = buffer[i - 2];
4242
// --> (comment end)
43-
if (c === 0x2D && c1 === 0x2D && c2 === 0x3E) {
44-
i += 2;
43+
if (c === 0x3E && cMinus1 === 0x2D && cMinus2 === 0x2D) {
4544
break;
4645
}
4746
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
<!DOCTYPE html>
2+
<html>
3+
<head>
4+
<!--><meta charset="iso-8859-2">
5+
</head>
6+
<body></body>
7+
</html>

test/tests.js

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,34 @@ describe("A file with no BOM and a ><meta charset>", () => {
205205
});
206206
});
207207

208+
describe("A file with no BOM and a <meta charset> preceeded by a short comment <!-->", () => {
209+
const buffer = read("no-bom-charset-short-comment.html");
210+
211+
it("should sniff as the charset value, given no options", () => {
212+
const sniffedEncoding = htmlEncodingSniffer(buffer);
213+
214+
assert.strictEqual(sniffedEncoding, "ISO-8859-2");
215+
});
216+
217+
it("should sniff as the transport layer encoding, given that", () => {
218+
const sniffedEncoding = htmlEncodingSniffer(buffer, {
219+
transportLayerEncodingLabel: "windows-1251",
220+
defaultEncoding: "ISO-8859-16"
221+
});
222+
223+
assert.strictEqual(sniffedEncoding, "windows-1251");
224+
});
225+
226+
227+
it("should sniff as the charset value, given only a default encoding", () => {
228+
const sniffedEncoding = htmlEncodingSniffer(buffer, {
229+
defaultEncoding: "ISO-8859-16"
230+
});
231+
232+
assert.strictEqual(sniffedEncoding, "ISO-8859-2");
233+
});
234+
});
235+
208236
for (const utf16Encoding of ["utf-16be", "utf-16", "utf-16le"]) {
209237
describe(`A file with a BOM and a <meta charset> of ${utf16Encoding}`, () => {
210238
const buffer = read(`no-bom-charset-${utf16Encoding}.html`);

0 commit comments

Comments
 (0)