2
2
import re
3
3
import subprocess
4
4
from pathlib import Path
5
+ from typing import Literal
5
6
6
7
import pytest
7
8
from apify_client import ApifyClientAsync
8
9
from cookiecutter .main import cookiecutter
9
10
10
11
from crawlee ._cli import default_start_url , template_directory
11
12
from crawlee ._utils .crypto import crypto_random_object_id
12
- from tests .e2e .project_template .utils import patch_crawlee_version_in_pyproject_toml_based_project
13
+ from tests .e2e .project_template .utils import patch_crawlee_version_in_project
13
14
14
15
# To run these tests locally, make sure you have apify-cli installed and available in the path.
15
16
# https://docs.apify.com/cli/docs/installation
16
17
17
18
18
- @pytest .mark .parametrize ('http_client' , ['httpx' , 'curl-impersonate' ])
19
- @pytest .mark .parametrize ('crawler_type' , ['parsel' , 'beautifulsoup' ])
20
- @pytest .mark .parametrize ('package_manager' , ['uv' , 'poetry' ])
19
+ @pytest .mark .parametrize (
20
+ 'crawler_type' ,
21
+ [
22
+ pytest .param ('playwright-camoufox' , marks = pytest .mark .playwright_camoufox ),
23
+ pytest .param ('playwright' , marks = pytest .mark .playwright ),
24
+ pytest .param ('parsel' , marks = pytest .mark .parsel ),
25
+ pytest .param ('beautifulsoup' , marks = pytest .mark .beautifulsoup ),
26
+ ],
27
+ )
28
+ @pytest .mark .parametrize (
29
+ 'http_client' ,
30
+ [
31
+ pytest .param ('httpx' , marks = pytest .mark .httpx ),
32
+ pytest .param ('curl-impersonate' , marks = pytest .mark .curl_impersonate ),
33
+ ],
34
+ )
35
+ @pytest .mark .parametrize (
36
+ 'package_manager' ,
37
+ [
38
+ pytest .param ('pip' , marks = pytest .mark .pip ),
39
+ pytest .param ('uv' , marks = pytest .mark .uv ),
40
+ pytest .param ('poetry' , marks = pytest .mark .poetry ),
41
+ ],
42
+ )
21
43
async def test_static_crawler_actor_at_apify (
22
- tmp_path : Path , crawlee_wheel_path : Path , package_manager : str , crawler_type : str , http_client : str
44
+ tmp_path : Path ,
45
+ crawlee_wheel_path : Path ,
46
+ package_manager : Literal ['pip' , 'uv' , 'poetry' ],
47
+ crawler_type : str ,
48
+ http_client : str ,
23
49
) -> None :
24
50
# Generate new actor name
25
51
actor_name = f'crawlee-python-template-e2e-test-{ crypto_random_object_id (8 ).lower ()} '
@@ -40,8 +66,8 @@ async def test_static_crawler_actor_at_apify(
40
66
output_dir = tmp_path ,
41
67
)
42
68
43
- patch_crawlee_version_in_pyproject_toml_based_project (
44
- project_path = tmp_path / actor_name , wheel_path = crawlee_wheel_path
69
+ patch_crawlee_version_in_project (
70
+ project_path = tmp_path / actor_name , wheel_path = crawlee_wheel_path , package_manager = package_manager
45
71
)
46
72
47
73
# Build actor using sequence of cli commands as the user would
@@ -56,16 +82,19 @@ async def test_static_crawler_actor_at_apify(
56
82
build_process = subprocess .run (['apify' , 'push' ], capture_output = True , check = False , cwd = tmp_path / actor_name ) # noqa: ASYNC221, S603, S607
57
83
# Get actor ID from build log
58
84
actor_id_regexp = re .compile (r'https:\/\/console\.apify\.com\/actors\/(.*)#\/builds\/\d*\.\d*\.\d*' )
59
- # Why is it in stderr and not in stdout???
60
- actor_id = re .findall (actor_id_regexp , build_process .stderr .decode ())[0 ]
85
+
86
+ if match := re .findall (actor_id_regexp , build_process .stderr .decode ()):
87
+ actor_id = match [0 ]
88
+ else :
89
+ raise AssertionError (f'Failed to find actor id in build log: { build_process .stderr .decode ()} ' )
61
90
62
91
client = ApifyClientAsync (token = os .getenv ('APIFY_TEST_USER_API_TOKEN' ))
63
92
actor = client .actor (actor_id )
64
93
65
94
# Run actor
66
95
try :
67
96
assert build_process .returncode == 0
68
- started_run_data = await actor .start ()
97
+ started_run_data = await actor .start (memory_mbytes = 8192 )
69
98
actor_run = client .run (started_run_data ['id' ])
70
99
71
100
finished_run_data = await actor_run .wait_for_finish ()
@@ -80,6 +109,6 @@ async def test_static_crawler_actor_at_apify(
80
109
assert finished_run_data
81
110
assert finished_run_data ['status' ] == 'SUCCEEDED' , additional_run_info
82
111
assert (
83
- 'Crawler.stop() was called with following reason: The crawler has reached its limit of 50 requests per crawl.'
112
+ 'Crawler.stop() was called with following reason: The crawler has reached its limit of 10 requests per crawl.'
84
113
) in actor_run_log , additional_run_info
85
- assert int (re .findall (r'requests_finished\s*│\s*(\d*)' , actor_run_log )[- 1 ]) >= 50 , additional_run_info
114
+ assert int (re .findall (r'requests_finished\s*│\s*(\d*)' , actor_run_log )[- 1 ]) >= 10 , additional_run_info
0 commit comments