Skip to content

Commit f1a68a6

Browse files
Merge pull request #353 from ilanbm/link_alternate_scrap
Fix #293 Crawl `<link rel="alternate">`
2 parents d21328d + 0dda17e commit f1a68a6

File tree

3 files changed

+5
-3
lines changed

3 files changed

+5
-3
lines changed

src/puppeteer_utils.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ const enableLogging = opt => {
108108
const getLinks = async opt => {
109109
const { page } = opt;
110110
const anchors = await page.evaluate(() =>
111-
Array.from(document.querySelectorAll("a")).map(anchor => {
111+
Array.from(document.querySelectorAll("a,link[rel='alternate']")).map(anchor => {
112112
if (anchor.href.baseVal) {
113113
const a = document.createElement("a");
114114
a.href = anchor.href.baseVal;

tests/examples/many-pages/index.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
<head>
44
<meta charset="utf-8">
5+
<link rel="alternate" href='/5' />
56
</head>
67

78
<body>

tests/run.test.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,13 +157,14 @@ describe("many pages", () => {
157157
} = mockFs();
158158
beforeAll(() => snapRun(fs, { source }));
159159
test("crawls all links and saves as index.html in separate folders", () => {
160-
expect(filesCreated()).toEqual(6);
160+
expect(filesCreated()).toEqual(7);
161161
expect(names()).toEqual(
162162
expect.arrayContaining([
163163
`/${source}/1/index.html`, // without slash in the end
164164
`/${source}/2/index.html`, // with slash in the end
165165
`/${source}/3/index.html`, // ignores hash
166-
`/${source}/4/index.html` // ignores query
166+
`/${source}/4/index.html`, // ignores query
167+
`/${source}/5/index.html`, // link rel="alternate"
167168
])
168169
);
169170
});

0 commit comments

Comments
 (0)