Skip to content

Commit 20cb743

Browse files
authored
docs: Add an example of running PlaywrightCrawler using local Chrome and Firefox profiles. (#1426)
### Description - Add an example of running `PlaywrightCrawler` using local `Chrome` and `Firefox` profiles.
1 parent 982d423 commit 20cb743

File tree

3 files changed

+139
-0
lines changed

3 files changed

+139
-0
lines changed
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import asyncio
2+
import shutil
3+
from pathlib import Path
4+
from tempfile import TemporaryDirectory
5+
6+
from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
7+
8+
# Profile name to use (usually 'Default' for single profile setups)
9+
PROFILE_NAME = 'Default'
10+
11+
# Paths to Chrome profiles in your system (example for Windows)
12+
# Use `chrome://version/` to find your profile path
13+
PROFILE_PATH = Path(Path.home(), 'AppData', 'Local', 'Google', 'Chrome', 'User Data')
14+
15+
16+
async def main() -> None:
17+
# Create a temporary folder to copy the profile to
18+
with TemporaryDirectory(prefix='crawlee-') as tmpdirname:
19+
tmp_profile_dir = Path(tmpdirname)
20+
21+
# Copy the profile to a temporary folder
22+
shutil.copytree(
23+
PROFILE_PATH / PROFILE_NAME,
24+
tmp_profile_dir / PROFILE_NAME,
25+
dirs_exist_ok=True,
26+
)
27+
28+
crawler = PlaywrightCrawler(
29+
headless=False,
30+
# Use chromium for Chrome compatibility
31+
browser_type='chromium',
32+
# Disable fingerprints to preserve profile identity
33+
fingerprint_generator=None,
34+
# Set user data directory to temp folder
35+
user_data_dir=tmp_profile_dir,
36+
browser_launch_options={
37+
# Use installed Chrome browser
38+
'channel': 'chrome',
39+
# Slow down actions to mimic human behavior
40+
'slow_mo': 200,
41+
'args': [
42+
# Use the specified profile
43+
f'--profile-directory={PROFILE_NAME}',
44+
],
45+
},
46+
)
47+
48+
@crawler.router.default_handler
49+
async def default_handler(context: PlaywrightCrawlingContext) -> None:
50+
context.log.info(f'Visiting {context.request.url}')
51+
52+
await crawler.run(['https://crawlee.dev/'])
53+
54+
55+
if __name__ == '__main__':
56+
asyncio.run(main())
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import asyncio
2+
from pathlib import Path
3+
4+
from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
5+
6+
# Replace this with your actual Firefox profile name
7+
# Find it at about:profiles in Firefox
8+
PROFILE_NAME = 'your-profile-name-here'
9+
10+
# Paths to Firefox profiles in your system (example for Windows)
11+
# Use `about:profiles` to find your profile path
12+
PROFILE_PATH = Path(
13+
Path.home(), 'AppData', 'Roaming', 'Mozilla', 'Firefox', 'Profiles', PROFILE_NAME
14+
)
15+
16+
17+
async def main() -> None:
18+
crawler = PlaywrightCrawler(
19+
# Use Firefox browser type
20+
browser_type='firefox',
21+
# Disable fingerprints to use the profile as is
22+
fingerprint_generator=None,
23+
headless=False,
24+
# Path to your Firefox profile
25+
user_data_dir=PROFILE_PATH,
26+
browser_launch_options={
27+
'args': [
28+
# Required to avoid version conflicts
29+
'--allow-downgrade'
30+
]
31+
},
32+
)
33+
34+
@crawler.router.default_handler
35+
async def default_handler(context: PlaywrightCrawlingContext) -> None:
36+
context.log.info(f'Visiting {context.request.url}')
37+
38+
await crawler.run(['https://crawlee.dev/'])
39+
40+
41+
if __name__ == '__main__':
42+
asyncio.run(main())
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
---
2+
id: using_browser_profile
3+
title: Using browser profile
4+
---
5+
6+
import ApiLink from '@site/src/components/ApiLink';
7+
8+
import CodeBlock from '@theme/CodeBlock';
9+
10+
import ChromeProfileExample from '!!raw-loader!./code_examples/using_browser_profiles_chrome.py';
11+
import FirefoxProfileExample from '!!raw-loader!./code_examples/using_browser_profiles_firefox.py';
12+
13+
This example demonstrates how to run <ApiLink to="class/PlaywrightCrawler">`PlaywrightCrawler`</ApiLink> using your local browser profile from [Chrome](https://www.google.com/intl/us/chrome/) or [Firefox](https://www.firefox.com/).
14+
15+
Using browser profiles allows you to leverage existing login sessions, saved passwords, bookmarks, and other personalized browser data during crawling. This can be particularly useful for testing scenarios or when you need to access content that requires authentication.
16+
17+
## Chrome browser
18+
19+
To run <ApiLink to="class/PlaywrightCrawler">`PlaywrightCrawler`</ApiLink> with your Chrome profile, you need to know the path to your profile files. You can find this information by entering `chrome://version/` as a URL in your Chrome browser. If you have multiple profiles, pay attention to the profile name - if you only have one profile, it's always `Default`.
20+
21+
You also need to use the [`channel`](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-option-channel) parameter in `browser_launch_options` to use the Chrome browser installed on your system instead of Playwright's Chromium.
22+
23+
:::warning Profile access limitation
24+
Due to [Chrome's security policies](https://developer.chrome.com/blog/remote-debugging-port), automation cannot use your main browsing profile directly. The example copies your profile to a temporary location as a workaround.
25+
:::
26+
27+
Make sure you don't have any running Chrome browser processes before running this code:
28+
29+
<CodeBlock className="language-python" language="python">
30+
{ChromeProfileExample}
31+
</CodeBlock>
32+
33+
## Firefox browser
34+
35+
To find the path to your Firefox profile, enter `about:profiles` as a URL in your Firefox browser. Unlike Chrome, you can use your standard profile path directly without copying it first.
36+
37+
Make sure you don't have any running Firefox browser processes before running this code:
38+
39+
<CodeBlock className="language-python" language="python">
40+
{FirefoxProfileExample}
41+
</CodeBlock>

0 commit comments

Comments
 (0)