Skip to content

Commit f42831b

Browse files
committed
Fix testing URLs with HTML entities in them
1 parent d04a8ea commit f42831b

File tree

3 files changed

+62
-56
lines changed

3 files changed

+62
-56
lines changed

test/fixtures-html-validate-should-fail.mjs

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -27,20 +27,20 @@ const requiredResults = {
2727
severity: 2,
2828
message:
2929
"external link is broken with status 500: https://freehorses.example.com/free-horses-on-1998-04-01-only.html",
30-
offset: 196,
30+
offset: 271,
3131
line: 9,
32-
column: 6,
32+
column: 81,
3333
size: 1,
3434
selector: "html > body > a:nth-child(1)",
3535
ruleUrl: "https://github.com/fulldecent/github-pages-template/#external-links",
3636
},
3737
{
3838
ruleId: "pacific-medical-training/external-links",
3939
severity: 2,
40-
message: "external link is broken with status 500: https://----.example.com",
41-
offset: 303,
40+
message: "external link is broken with status 500: https://----.example.com?a=b&c=d",
41+
offset: 348,
4242
line: 10,
43-
column: 6,
43+
column: 51,
4444
size: 1,
4545
selector: "html > body > a:nth-child(2)",
4646
ruleUrl: "https://github.com/fulldecent/github-pages-template/#external-links",
@@ -49,9 +49,9 @@ const requiredResults = {
4949
ruleId: "pacific-medical-training/external-links",
5050
severity: 2,
5151
message: "external link is broken with status 500: https://-..-..-.-.-",
52-
offset: 373,
52+
offset: 413,
5353
line: 11,
54-
column: 6,
54+
column: 34,
5555
size: 1,
5656
selector: "html > body > a:nth-child(3)",
5757
ruleUrl: "https://github.com/fulldecent/github-pages-template/#external-links",
@@ -61,9 +61,9 @@ const requiredResults = {
6161
severity: 2,
6262
message:
6363
"external link https://httpbin.org/redirect-to?url=https://example.com&status_code=301 redirects to: https://example.com",
64-
offset: 563,
64+
offset: 655,
6565
line: 14,
66-
column: 6,
66+
column: 86,
6767
size: 1,
6868
selector: "html > body > a:nth-child(6)",
6969
ruleUrl: "https://github.com/fulldecent/github-pages-template/#external-links",
@@ -85,26 +85,26 @@ const requiredResults = {
8585
],
8686
"test/fixtures/ensure-https.html": [
8787
{
88-
ruleId: "pacific-medical-training/external-links",
88+
ruleId: "pacific-medical-training/https-links",
8989
severity: 2,
90-
message: "external link http://en.wikipedia.org/wiki/Horse redirects to: https://en.wikipedia.org/wiki/Horse",
90+
message: "external link is insecure and accessible via HTTPS: http://en.wikipedia.org/wiki/Horse",
9191
offset: 196,
9292
line: 9,
9393
column: 6,
9494
size: 1,
9595
selector: "html > body > a",
96-
ruleUrl: "https://github.com/fulldecent/github-pages-template/#external-links",
96+
ruleUrl: "https://github.com/fulldecent/github-pages-template/#https-links",
9797
},
9898
{
99-
ruleId: "pacific-medical-training/https-links",
99+
ruleId: "pacific-medical-training/external-links",
100100
severity: 2,
101-
message: "external link is insecure and accessible via HTTPS: http://en.wikipedia.org/wiki/Horse",
102-
offset: 196,
101+
message: "external link http://en.wikipedia.org/wiki/Horse redirects to: https://en.wikipedia.org/wiki/Horse",
102+
offset: 239,
103103
line: 9,
104-
column: 6,
104+
column: 49,
105105
size: 1,
106106
selector: "html > body > a",
107-
ruleUrl: "https://github.com/fulldecent/github-pages-template/#https-links",
107+
ruleUrl: "https://github.com/fulldecent/github-pages-template/#external-links",
108108
},
109109
],
110110
"test/fixtures/using-jquery.html": [

test/fixtures/external-link-broken.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
</head>
88
<body>
99
<a href="https://freehorses.example.com/free-horses-on-1998-04-01-only.html">Get free horses here!</a>
10-
<a href="https://----.example.com">This server does not exist</a>
10+
<a href="https://----.example.com?a=b&amp;c=d">This server does not exist</a>
1111
<a href="https://-..-..-.-.-">This URL is bad</a>
1212
<a href="tel:12345678999">12345678999</a>
1313
<a href="https://dont-check-this.example.com">https://dont-check-this.example.com</a>

test/plugin.html-validate.external-links.mjs

Lines changed: 44 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ export default class ExternalLinksRule extends Rule {
5151
setup() {
5252
this.db = this.setupDatabase();
5353
this.skipUrlsRegex = this.loadSkipUrls();
54-
this.on("dom:ready", this.domReady.bind(this));
54+
this.on("tag:ready", this.tagReady.bind(this));
5555
}
5656

5757
setupDatabase() {
@@ -149,48 +149,54 @@ export default class ExternalLinksRule extends Rule {
149149
}
150150
}
151151

152-
domReady({ document }) {
153-
const aElements = document.getElementsByTagName("a");
154-
for (const aElement of aElements) {
155-
if (!aElement.hasAttribute("href")) {
156-
continue;
157-
}
152+
// Check for href external links
153+
tagReady({ target }) {
154+
// TODO: also check image.src, link.href, script.src
155+
if (target.tagName !== "a") {
156+
return;
157+
}
158158

159-
const href = aElement.getAttribute("href").value;
160-
if (!href || !/^https?:\/\//i.test(href)) {
161-
continue;
162-
}
159+
if (!target.hasAttribute("href")) {
160+
return;
161+
}
163162

164-
// Skip URLs that match the skip URLs regex
165-
const url = href;
166-
if (this.skipUrlsRegex.some((regex) => regex.test(url))) {
167-
continue;
168-
}
163+
// Decode the URL from the href attribute, see https://gitlab.com/html-validate/html-validate/-/issues/218
164+
// Quickly replace a few common HTML entities, TODO use a real approach for this
165+
const url = target.getAttribute("href").value.replace(/&amp;/g, "&").replace(/&gt;/g, ">").replace(/&lt;/g, "<");
169166

170-
// Use cache if the URL is in there
171-
const row = this.db.prepare("SELECT * FROM urls WHERE url = ?").get(url);
172-
if (row) {
173-
if (row.redirect_to) {
174-
this.report({
175-
node: aElement,
176-
message: `external link ${url} redirects to: ${row.redirect_to}`,
177-
});
178-
continue;
179-
}
180-
if (row.status < 200 || row.status >= 300) {
181-
this.report({
182-
node: aElement,
183-
message: `external link is broken with status ${row.status}: ${url}`,
184-
});
185-
continue;
186-
}
187-
}
167+
if (/^https?:\/\//i.test(url) === false) {
168+
return;
169+
}
188170

189-
if (PROXY_URL !== null) {
190-
this.checkWithProxy(url, aElement);
191-
} else {
192-
this.check(url, aElement);
171+
if (this.skipUrlsRegex.some((regex) => regex.test(url))) {
172+
return;
173+
}
174+
175+
console.log(`Checking external link: ${url}`);
176+
177+
// Use cache if the URL is in there
178+
const row = this.db.prepare("SELECT * FROM urls WHERE url = ?").get(url);
179+
if (row) {
180+
if (row.redirect_to) {
181+
this.report({
182+
node: target,
183+
message: `external link ${url} redirects to: ${row.redirect_to}`,
184+
});
185+
return;
193186
}
187+
if (row.status < 200 || row.status >= 300) {
188+
this.report({
189+
node: target,
190+
message: `external link is broken with status ${row.status}: ${url}`,
191+
});
192+
return;
193+
}
194+
}
195+
196+
if (PROXY_URL !== null) {
197+
this.checkWithProxy(url, target);
198+
} else {
199+
this.check(url, target);
194200
}
195201
}
196202
}

0 commit comments

Comments
 (0)