Skip to content

Commit 6c76d32

Browse files
authored
chore: remove direct dependency on idcac-playwright (#3259)
Due to licensing issues, Crawlee now imports `idcac-playwright` dynamically. If the dependency is missing, using the `closeCookieModals` util method will print an error message and do nothing.
1 parent 59f2074 commit 6c76d32

File tree

6 files changed

+93
-6
lines changed

6 files changed

+93
-6
lines changed

packages/crawlee/package.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,14 @@
7070
"tslib": "^2.4.0"
7171
},
7272
"peerDependencies": {
73+
"idcac-playwright": "*",
7374
"playwright": "*",
7475
"puppeteer": "*"
7576
},
7677
"peerDependenciesMeta": {
78+
"idcac-playwright": {
79+
"optional": true
80+
},
7781
"playwright": {
7882
"optional": true
7983
},

packages/playwright-crawler/package.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@
6262
"@crawlee/types": "3.15.3",
6363
"@crawlee/utils": "3.15.3",
6464
"cheerio": "1.0.0-rc.12",
65-
"idcac-playwright": "^0.1.2",
6665
"jquery": "^3.6.0",
6766
"lodash.isequal": "^4.5.0",
6867
"ml-logistic-regression": "^2.0.0",
@@ -72,9 +71,13 @@
7271
"tslib": "^2.4.0"
7372
},
7473
"peerDependencies": {
74+
"idcac-playwright": "^0.1.2",
7575
"playwright": "*"
7676
},
7777
"peerDependenciesMeta": {
78+
"idcac-playwright": {
79+
"optional": true
80+
},
7881
"playwright": {
7982
"optional": true
8083
}

packages/playwright-crawler/src/internals/utils/playwright-utils.ts

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ import {
3333
import type { BatchAddRequestsResult } from '@crawlee/types';
3434
import { type CheerioRoot, type Dictionary, expandShadowRoots, sleep } from '@crawlee/utils';
3535
import * as cheerio from 'cheerio';
36-
import { getInjectableScript as getCookieClosingScript } from 'idcac-playwright';
3736
import ow from 'ow';
3837
import type { Page, Response, Route } from 'playwright';
3938

@@ -656,10 +655,35 @@ export async function parseWithCheerio(
656655
return cheerio.load(pageContent);
657656
}
658657

658+
let idcacPlaywright: null | { getInjectableScript: () => string } = null;
659+
async function getIdcacPlaywright() {
660+
if (idcacPlaywright) return idcacPlaywright;
661+
662+
try {
663+
idcacPlaywright = await import('idcac-playwright');
664+
} catch (error: any) {
665+
log.warning(`Failed to import 'idcac-playwright'.
666+
667+
We recently made idcac-playwright an optional dependency due to licensing issues.
668+
To use this feature, please install it manually by running
669+
670+
npm install idcac-playwright
671+
672+
Original error message follows:
673+
674+
${error.message}
675+
`);
676+
}
677+
return idcacPlaywright;
678+
}
679+
659680
export async function closeCookieModals(page: Page): Promise<void> {
660681
ow(page, ow.object.validate(validators.browserPage));
682+
const idcac = await getIdcacPlaywright();
661683

662-
await page.evaluate(getCookieClosingScript());
684+
if (idcac?.getInjectableScript()) {
685+
await page.evaluate(idcac.getInjectableScript());
686+
}
663687
}
664688

665689
interface HandleCloudflareChallengeOptions {
@@ -993,6 +1017,15 @@ export interface PlaywrightContextUtils {
9931017

9941018
/**
9951019
* Tries to close cookie consent modals on the page. Based on the I Don't Care About Cookies browser extension.
1020+
*
1021+
* Note that this method requires the idcac-playwright package to be installed.
1022+
* Crawlee does not include it by default due to licensing issues.
1023+
*
1024+
* To use this method, please install the package manually by running:
1025+
*
1026+
* ```bash
1027+
* npm install idcac-playwright
1028+
* ```
9961029
*/
9971030
closeCookieModals(): Promise<void>;
9981031

packages/puppeteer-crawler/package.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,13 @@
6767
"tslib": "^2.4.0"
6868
},
6969
"peerDependencies": {
70+
"idcac-playwright": "^0.1.2",
7071
"puppeteer": "*"
7172
},
7273
"peerDependenciesMeta": {
74+
"idcac-playwright": {
75+
"optional": true
76+
},
7377
"puppeteer": {
7478
"optional": true
7579
}

packages/puppeteer-crawler/src/internals/utils/puppeteer_utils.ts

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ import type { BatchAddRequestsResult, Dictionary } from '@crawlee/types';
2727
import { type CheerioRoot, expandShadowRoots, sleep } from '@crawlee/utils';
2828
import * as cheerio from 'cheerio';
2929
import type { ProtocolMapping } from 'devtools-protocol/types/protocol-mapping.js';
30-
import { getInjectableScript } from 'idcac-playwright';
3130
import ow from 'ow';
3231
import type { HTTPRequest as PuppeteerRequest, HTTPResponse, Page, ResponseForRequest } from 'puppeteer';
3332

@@ -778,8 +777,35 @@ export async function saveSnapshot(page: Page, options: SaveSnapshotOptions = {}
778777
}
779778
}
780779

780+
let idcacPlaywright: null | { getInjectableScript: () => string } = null;
781+
async function getIdcacPlaywright() {
782+
if (idcacPlaywright) return idcacPlaywright;
783+
784+
try {
785+
idcacPlaywright = await import('idcac-playwright');
786+
} catch (error: any) {
787+
log.warning(`Failed to import 'idcac-playwright'.
788+
789+
We recently made idcac-playwright an optional dependency due to licensing issues.
790+
To use this feature, please install it manually by running
791+
792+
npm install idcac-playwright
793+
794+
Original error message follows:
795+
796+
${error.message}
797+
`);
798+
}
799+
return idcacPlaywright;
800+
}
801+
781802
export async function closeCookieModals(page: Page): Promise<void> {
782-
await page.evaluate(getInjectableScript());
803+
ow(page, ow.object.validate(validators.browserPage));
804+
const idcac = await getIdcacPlaywright();
805+
806+
if (idcac?.getInjectableScript()) {
807+
await page.evaluate(idcac.getInjectableScript());
808+
}
783809
}
784810

785811
/** @internal */
@@ -1057,6 +1083,15 @@ export interface PuppeteerContextUtils {
10571083

10581084
/**
10591085
* Tries to close cookie consent modals on the page. Based on the I Don't Care About Cookies browser extension.
1086+
*
1087+
* Note that this method requires the idcac-playwright package to be installed.
1088+
* Crawlee does not include it by default due to licensing issues.
1089+
*
1090+
* To use this method, please install the package manually by running:
1091+
*
1092+
* ```bash
1093+
* npm install idcac-playwright
1094+
* ```
10601095
*/
10611096
closeCookieModals(): Promise<void>;
10621097
}

yarn.lock

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -686,7 +686,6 @@ __metadata:
686686
"@crawlee/types": "npm:3.15.3"
687687
"@crawlee/utils": "npm:3.15.3"
688688
cheerio: "npm:1.0.0-rc.12"
689-
idcac-playwright: "npm:^0.1.2"
690689
jquery: "npm:^3.6.0"
691690
lodash.isequal: "npm:^4.5.0"
692691
ml-logistic-regression: "npm:^2.0.0"
@@ -695,8 +694,11 @@ __metadata:
695694
string-comparison: "npm:^1.3.0"
696695
tslib: "npm:^2.4.0"
697696
peerDependencies:
697+
idcac-playwright: ^0.1.2
698698
playwright: "*"
699699
peerDependenciesMeta:
700+
idcac-playwright:
701+
optional: true
700702
playwright:
701703
optional: true
702704
languageName: unknown
@@ -719,8 +721,11 @@ __metadata:
719721
ow: "npm:^0.28.1"
720722
tslib: "npm:^2.4.0"
721723
peerDependencies:
724+
idcac-playwright: ^0.1.2
722725
puppeteer: "*"
723726
peerDependenciesMeta:
727+
idcac-playwright:
728+
optional: true
724729
puppeteer:
725730
optional: true
726731
languageName: unknown
@@ -4934,9 +4939,12 @@ __metadata:
49344939
import-local: "npm:^3.1.0"
49354940
tslib: "npm:^2.4.0"
49364941
peerDependencies:
4942+
idcac-playwright: "*"
49374943
playwright: "*"
49384944
puppeteer: "*"
49394945
peerDependenciesMeta:
4946+
idcac-playwright:
4947+
optional: true
49404948
playwright:
49414949
optional: true
49424950
puppeteer:

0 commit comments

Comments
 (0)