-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Expand file tree
/
Copy pathmain.js
More file actions
52 lines (41 loc) · 1.56 KB
/
main.js
File metadata and controls
52 lines (41 loc) · 1.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import { Actor } from 'apify';
import { AdaptivePlaywrightCrawler } from '@crawlee/playwright';
import { LogLevel } from '@apify/log';
await Actor.init({
storage:
process.env.STORAGE_IMPLEMENTATION === 'LOCAL'
? new (await import('@apify/storage-local')).ApifyStorageLocal()
: undefined,
});
const crawler = new AdaptivePlaywrightCrawler({
// Override the comparator so that it ignores `requestHandlerMode`
resultComparator: (resultA, resultB) => {
if (resultA.datasetItems.length === 1 && resultB.datasetItems.length === 1) {
const itemA = resultA.datasetItems[0].item;
const itemB = resultB.datasetItems[0].item;
if (itemA.url === itemB.url && itemA.heading === itemB.heading) {
return 'equal';
}
}
return 'different';
},
requestHandler: async (context) => {
const { url } = context.request;
const heading = (await context.querySelector('h1')).text();
const requestHandlerMode = await (async () => {
try {
await context.page.title();
return 'browser';
} catch {
return 'httpOnly';
}
})();
await context.pushData({ url, heading, requestHandlerMode });
await context.enqueueLinks({
globs: ['**/next/examples/*'],
});
},
});
crawler.log.setLevel(LogLevel.DEBUG);
await crawler.run(['https://crawlee.dev/js/docs/next/examples/accept-user-input']);
await Actor.exit({ exit: Actor.isAtHome() });