Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions test/e2e/adaptive-playwright-default/actor/.actor/actor.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"actorSpecification": 1,
"name": "test-adaptive-playwright-default",
"version": "0.0",
"buildTag": "latest",
"env": null
}
7 changes: 7 additions & 0 deletions test/e2e/adaptive-playwright-default/actor/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
.idea
.DS_Store
node_modules
package-lock.json
apify_storage
crawlee_storage
storage
23 changes: 23 additions & 0 deletions test/e2e/adaptive-playwright-default/actor/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
FROM node:22 AS builder

COPY /packages ./packages
COPY /package*.json ./
RUN npm --quiet set progress=false \
&& npm install --only=prod --no-optional --no-audit \
&& npm update

FROM apify/actor-node-playwright-chrome:22-beta

RUN rm -r node_modules
COPY --from=builder /node_modules ./node_modules
COPY --from=builder /packages ./packages
COPY --from=builder /package*.json ./
COPY /.actor ./.actor
COPY /main.js ./

RUN echo "Installed NPM packages:" \
&& (npm list --only=prod --no-optional --all || true) \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version
52 changes: 52 additions & 0 deletions test/e2e/adaptive-playwright-default/actor/main.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import { Actor } from 'apify';
import { AdaptivePlaywrightCrawler } from '@crawlee/playwright';
import { LogLevel } from '@apify/log';

await Actor.init({
storage:
process.env.STORAGE_IMPLEMENTATION === 'LOCAL'
? new (await import('@apify/storage-local')).ApifyStorageLocal()
: undefined,
});

const crawler = new AdaptivePlaywrightCrawler({
// Override the comparator so that it ignores `requestHandlerMode`
resultComparator: (resultA, resultB) => {
if (resultA.datasetItems.length === 1 && resultB.datasetItems.length === 1) {
const itemA = resultA.datasetItems[0].item;
const itemB = resultB.datasetItems[0].item;

if (itemA.url === itemB.url && itemA.heading === itemB.heading) {
return 'equal';
}
}

return 'different';
},
requestHandler: async (context) => {
const { url } = context.request;

const heading = (await context.querySelector('h1')).text();

const requestHandlerMode = await (async () => {
try {
await context.page.title();
return 'browser';
} catch {
return 'httpOnly';
}
})();

await context.pushData({ url, heading, requestHandlerMode });

await context.enqueueLinks({
globs: ['**/next/examples/*'],
});
},
});

crawler.log.setLevel(LogLevel.DEBUG);

await crawler.run(['https://crawlee.dev/js/docs/next/examples/accept-user-input']);

await Actor.exit({ exit: Actor.isAtHome() });
29 changes: 29 additions & 0 deletions test/e2e/adaptive-playwright-default/actor/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"name": "test-adaptive-playwright-default",
"version": "0.0.1",
"description": "Adaptive Playwright Test - Default",
"dependencies": {
"apify": "next",
"@apify/storage-local": "^2.1.3",
"@crawlee/basic": "file:./packages/basic-crawler",
"@crawlee/browser": "file:./packages/browser-crawler",
"@crawlee/browser-pool": "file:./packages/browser-pool",
"@crawlee/core": "file:./packages/core",
"@crawlee/memory-storage": "file:./packages/memory-storage",
"@crawlee/playwright": "file:./packages/playwright-crawler",
"@crawlee/types": "file:./packages/types",
"@crawlee/utils": "file:./packages/utils",
"playwright": "*"
},
"overrides": {
"apify": {
"@crawlee/core": "file:./packages/core",
"@crawlee/utils": "file:./packages/utils"
}
},
"scripts": {
"start": "node main.js"
},
"type": "module",
"license": "ISC"
}
19 changes: 19 additions & 0 deletions test/e2e/adaptive-playwright-default/test.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import { initialize, getActorTestDir, runActor, expect, validateDataset } from '../tools.mjs';

const testActorDirname = getActorTestDir(import.meta.url);
await initialize(testActorDirname);

const { datasetItems } = await runActor(testActorDirname, 16384);

await expect(datasetItems.length > 15, 'Number of dataset items');
await expect(validateDataset(datasetItems, ['url', 'heading', 'requestHandlerMode']), 'Dataset items validation');

await expect(
datasetItems.filter((it) => it.requestHandlerMode === 'browser').length >= 1,
'The crawler should handle at least one request in the browser',
);

await expect(
datasetItems.filter((it) => it.requestHandlerMode === 'httpOnly').length >= 5,
'The crawler should handle some requests in http-only mode',
);
Loading