Skip to content

Commit 829499f

Browse files
authored
Infrastructure: Fix failing link-checker on GitHub README links (#2931)
Closes #2907 * Supports parsing GitHub's partials when needed to find URI's fragment
1 parent 16dbe83 commit 829499f

File tree

2 files changed

+82
-10
lines changed

2 files changed

+82
-10
lines changed

.link-checker.js

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,29 @@
1+
const HTMLParser = require('node-html-parser');
2+
3+
// Checks object for attribute and returns value.
4+
// If not found on first pass, recursively checks
5+
// nested objects and arrays of nested object(s)
6+
// until attribute is found. If not found,
7+
// returns undefined.
8+
const getAttributeValue = (obj, attribute) => {
9+
if (typeof obj !== 'object' || obj === null) return undefined;
10+
if (obj.hasOwnProperty(attribute)) return obj[attribute];
11+
12+
if (Array.isArray(obj)) {
13+
for (const element of obj) {
14+
const attributeValue = getAttributeValue(element, attribute);
15+
if (attributeValue !== undefined) return attributeValue;
16+
}
17+
} else {
18+
for (const key in obj) {
19+
const attributeValue = getAttributeValue(obj[key], attribute);
20+
if (attributeValue !== undefined) return attributeValue;
21+
}
22+
}
23+
24+
return undefined;
25+
};
26+
127
module.exports = {
228
filesToIgnore: [
329
// For example:
@@ -18,13 +44,34 @@ module.exports = {
1844
{
1945
name: 'github',
2046
pattern: /^https:\/\/github\.com\/.*/,
21-
matchHash: (ids, hash) =>
22-
ids.includes(hash) || ids.includes(`user-content-${hash}`),
47+
matchHash: (ids, hash, { reactPartial }) => {
48+
if (reactPartial) {
49+
// This is where the react-partial keeps data about READMEs and other *.md files
50+
const richText = getAttributeValue(reactPartial, 'richText');
51+
if (richText !== undefined) {
52+
const html = HTMLParser.parse(richText);
53+
const githubIds = html
54+
.querySelectorAll('[id]')
55+
.map((idElement) => idElement.getAttribute('id'));
56+
return githubIds.includes(`user-content-${hash}`);
57+
}
58+
}
59+
return ids.includes(hash) || ids.includes(`user-content-${hash}`);
60+
},
61+
getPartial: (html) => {
62+
return html
63+
.querySelectorAll('react-partial')
64+
.filter(
65+
(partialElement) =>
66+
partialElement.getAttribute('partial-name') === 'repos-overview' // This is the partial that handles the READMEs
67+
)
68+
.flatMap((element) => element.getElementsByTagName('script'))
69+
.map((element) => JSON.parse(element.innerHTML))[0];
70+
},
2371
},
2472
],
2573
ignoreHashesOnExternalPagesMatchingRegex: [
2674
// Some hash links are resolved with JS and are therefore difficult to check algorithmically
2775
/^https:\/\/html\.spec\.whatwg\.org\/multipage\//,
28-
'https://github.com/w3c/aria-practices#code-conformance', // TODO: Remove when #2907 is resolved
2976
],
3077
};

scripts/link-checker.js

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,24 @@ async function checkLinks() {
3333
return getLineNumber;
3434
};
3535

36-
const checkPathForHash = (hrefOrSrc, ids = [], hash) => {
36+
const getHashCheckHandler = (hrefOrSrc) => {
37+
return options.hashCheckHandlers.find(({ pattern }) =>
38+
pattern.test(hrefOrSrc)
39+
);
40+
};
41+
42+
const getReactPartial = (hrefOrSrc, html) => {
43+
const handler = getHashCheckHandler(hrefOrSrc);
44+
if (handler) return handler.getPartial(html);
45+
return undefined;
46+
};
47+
48+
const checkPathForHash = (
49+
hrefOrSrc,
50+
ids = [],
51+
hash,
52+
{ reactPartial } = {}
53+
) => {
3754
// On some websites, the ids may not exactly match the hash included
3855
// in the link.
3956
// For e.g. GitHub will prepend client facing ids with their own
@@ -43,10 +60,8 @@ async function checkLinks() {
4360
// as being 'user-content-foo-bar' for its own page processing purposes.
4461
//
4562
// See https://github.com/w3c/aria-practices/issues/2809
46-
const handler = options.hashCheckHandlers.find(({ pattern }) =>
47-
pattern.test(hrefOrSrc)
48-
);
49-
if (handler) return handler.matchHash(ids, hash);
63+
const handler = getHashCheckHandler(hrefOrSrc);
64+
if (handler) return handler.matchHash(ids, hash, { reactPartial });
5065
else return ids.includes(hash);
5166
};
5267

@@ -149,7 +164,15 @@ async function checkLinks() {
149164
.querySelectorAll('[id]')
150165
.map((idElement) => idElement.getAttribute('id'));
151166

152-
return { ok: response.ok, status: response.status, ids };
167+
// Handle GitHub README links.
168+
// These links are stored within a react-partial element
169+
const reactPartial = getReactPartial(hrefOrSrc, html);
170+
return {
171+
ok: response.ok,
172+
status: response.status,
173+
ids,
174+
reactPartial,
175+
};
153176
} catch (error) {
154177
return {
155178
errorMessage:
@@ -305,7 +328,9 @@ async function checkLinks() {
305328
if (
306329
!isHashCheckingDisabled &&
307330
hash &&
308-
!checkPathForHash(hrefOrSrc, pageData.ids, hash)
331+
!checkPathForHash(hrefOrSrc, pageData.ids, hash, {
332+
reactPartial: pageData.reactPartial,
333+
})
309334
) {
310335
consoleError(
311336
`Found broken external link on ${htmlPath}:${lineNumber}:${columnNumber}, ` +

0 commit comments

Comments
 (0)