Skip to content

Commit 80f0f5f

Browse files
committed
fix: hopefully avoid excessive request queueing
1 parent 67404ed commit 80f0f5f

File tree

3 files changed

+60
-45
lines changed

3 files changed

+60
-45
lines changed

checker-cheerio/src/lib/handlePage.js

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -56,20 +56,25 @@ export async function handlePage(input, requestQueue, state, { request, $, body,
5656
});
5757

5858
if (input.linkSelector) {
59-
await utils.enqueueLinks({
60-
$,
61-
selector: input.linkSelector,
62-
pseudoUrls: input.pseudoUrls.map(
63-
(req) => new PseudoUrl(req.purl, {
64-
url: request.url,
65-
headers: req.headers,
66-
method: req.method,
67-
payload: req.payload,
68-
userData: req.userData,
69-
}),
70-
),
71-
requestQueue,
72-
baseUrl: request.loadedUrl,
73-
});
59+
const info = await requestQueue.getInfo();
60+
61+
// Only queue up more requests in the queue if we should (this should avoid excessive queue writes)
62+
if (input.maxNumberOfPagesCheckedPerDomain > info.totalRequestCount) {
63+
await utils.enqueueLinks({
64+
$,
65+
selector: input.linkSelector,
66+
pseudoUrls: input.pseudoUrls.map(
67+
(req) => new PseudoUrl(req.purl, {
68+
url: request.url,
69+
headers: req.headers,
70+
method: req.method,
71+
payload: req.payload,
72+
userData: req.userData,
73+
}),
74+
),
75+
requestQueue,
76+
baseUrl: request.loadedUrl,
77+
});
78+
}
7479
}
7580
}

checker-playwright/src/lib/handlePage.js

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -81,20 +81,25 @@ export async function handlePage(input, requestQueue, state, { request, response
8181
});
8282

8383
if (input.linkSelector) {
84-
await utils.enqueueLinks({
85-
$,
86-
selector: input.linkSelector,
87-
pseudoUrls: input.pseudoUrls.map(
88-
(req) => new PseudoUrl(req.purl, {
89-
url: request.url,
90-
headers: req.headers,
91-
method: req.method,
92-
payload: req.payload,
93-
userData: req.userData,
94-
}),
95-
),
96-
requestQueue,
97-
baseUrl: request.loadedUrl,
98-
});
84+
const info = await requestQueue.getInfo();
85+
86+
// Only queue up more requests in the queue if we should (this should avoid excessive queue writes)
87+
if (input.maxNumberOfPagesCheckedPerDomain > info.totalRequestCount) {
88+
await utils.enqueueLinks({
89+
$,
90+
selector: input.linkSelector,
91+
pseudoUrls: input.pseudoUrls.map(
92+
(req) => new PseudoUrl(req.purl, {
93+
url: request.url,
94+
headers: req.headers,
95+
method: req.method,
96+
payload: req.payload,
97+
userData: req.userData,
98+
}),
99+
),
100+
requestQueue,
101+
baseUrl: request.loadedUrl,
102+
});
103+
}
99104
}
100105
}

checker-puppeteer/src/lib/handlePage.js

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -75,20 +75,25 @@ export async function handlePage(input, requestQueue, state, { request, response
7575
});
7676

7777
if (input.linkSelector) {
78-
await utils.enqueueLinks({
79-
$,
80-
selector: input.linkSelector,
81-
pseudoUrls: input.pseudoUrls.map(
82-
(req) => new PseudoUrl(req.purl, {
83-
url: request.url,
84-
headers: req.headers,
85-
method: req.method,
86-
payload: req.payload,
87-
userData: req.userData,
88-
}),
89-
),
90-
requestQueue,
91-
baseUrl: request.loadedUrl,
92-
});
78+
const info = await requestQueue.getInfo();
79+
80+
// Only queue up more requests in the queue if we should (this should avoid excessive queue writes)
81+
if (input.maxNumberOfPagesCheckedPerDomain > info.totalRequestCount) {
82+
await utils.enqueueLinks({
83+
$,
84+
selector: input.linkSelector,
85+
pseudoUrls: input.pseudoUrls.map(
86+
(req) => new PseudoUrl(req.purl, {
87+
url: request.url,
88+
headers: req.headers,
89+
method: req.method,
90+
payload: req.payload,
91+
userData: req.userData,
92+
}),
93+
),
94+
requestQueue,
95+
baseUrl: request.loadedUrl,
96+
});
97+
}
9398
}
9499
}

0 commit comments

Comments
 (0)