Skip to content

Commit ae3044e

Browse files
committed
Try to patch the integration tests for the crawlee branch
1 parent 89e572e commit ae3044e

File tree

3 files changed

+114
-1
lines changed

3 files changed

+114
-1
lines changed

tests/integration/actor_source_base/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,4 @@ RUN echo "Python version:" \
1616
&& echo "All installed Python packages:" \
1717
&& pip freeze
1818

19-
CMD ["sh", "-c", "python -m src"]
19+
CMD ["sh", "-c", "python server.py & python -m src"]
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
# The test fixture will put the Apify SDK wheel path on the next line
22
APIFY_SDK_WHEEL_PLACEHOLDER
3+
uvicorn[standard]
4+
crawlee[parsel] @ git+https://github.com/apify/crawlee-python.git@master
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING
4+
5+
if TYPE_CHECKING:
6+
from .conftest import MakeActorFunction, RunActorFunction
7+
8+
9+
async def test_actor_on_platform_max_crawl_depth(
10+
make_actor: MakeActorFunction,
11+
run_actor: RunActorFunction,
12+
) -> None:
13+
"""Test that the actor respects max_crawl_depth."""
14+
15+
async def main() -> None:
16+
"""The crawler entry point."""
17+
import re
18+
19+
from crawlee.crawlers import ParselCrawler, ParselCrawlingContext
20+
21+
from apify import Actor
22+
23+
async with Actor:
24+
crawler = ParselCrawler(max_crawl_depth=2)
25+
finished = []
26+
enqueue_pattern = re.compile(r'http://localhost:8080/2+$')
27+
28+
@crawler.router.default_handler
29+
async def default_handler(context: ParselCrawlingContext) -> None:
30+
"""Default request handler."""
31+
context.log.info(f'Processing {context.request.url} ...')
32+
await context.enqueue_links(include=[enqueue_pattern])
33+
finished.append(context.request.url)
34+
35+
await crawler.run(['http://localhost:8080/'])
36+
assert finished == ['http://localhost:8080/', 'http://localhost:8080/2', 'http://localhost:8080/22']
37+
38+
actor = await make_actor(label='crawler-max-depth', main_func=main)
39+
run_result = await run_actor(actor)
40+
41+
assert run_result.status == 'SUCCEEDED'
42+
43+
44+
async def test_actor_on_platform_max_requests_per_crawl(
45+
make_actor: MakeActorFunction,
46+
run_actor: RunActorFunction,
47+
) -> None:
48+
"""Test that the actor respects max_requests_per_crawl."""
49+
50+
async def main() -> None:
51+
"""The crawler entry point."""
52+
from crawlee import ConcurrencySettings
53+
from crawlee.crawlers import ParselCrawler, ParselCrawlingContext
54+
55+
from apify import Actor
56+
57+
async with Actor:
58+
crawler = ParselCrawler(
59+
max_requests_per_crawl=3, concurrency_settings=ConcurrencySettings(max_concurrency=1)
60+
)
61+
finished = []
62+
63+
@crawler.router.default_handler
64+
async def default_handler(context: ParselCrawlingContext) -> None:
65+
"""Default request handler."""
66+
context.log.info(f'Processing {context.request.url} ...')
67+
await context.enqueue_links()
68+
finished.append(context.request.url)
69+
70+
await crawler.run(['http://localhost:8080/'])
71+
assert len(finished) == 3
72+
73+
actor = await make_actor(label='crawler-max-requests', main_func=main)
74+
run_result = await run_actor(actor)
75+
76+
assert run_result.status == 'SUCCEEDED'
77+
78+
79+
async def test_actor_on_platform_max_request_retries(
80+
make_actor: MakeActorFunction,
81+
run_actor: RunActorFunction,
82+
) -> None:
83+
"""Test that the actor respects max_request_retries."""
84+
85+
async def main() -> None:
86+
"""The crawler entry point."""
87+
from crawlee.crawlers import BasicCrawlingContext, ParselCrawler, ParselCrawlingContext
88+
89+
from apify import Actor
90+
91+
async with Actor:
92+
max_retries = 3
93+
crawler = ParselCrawler(max_request_retries=max_retries)
94+
failed_counter = 0
95+
96+
@crawler.error_handler
97+
async def error_handler(_: BasicCrawlingContext, __: Exception) -> None:
98+
nonlocal failed_counter
99+
failed_counter += 1
100+
101+
@crawler.router.default_handler
102+
async def default_handler(_: ParselCrawlingContext) -> None:
103+
raise RuntimeError('Some error')
104+
105+
await crawler.run(['http://localhost:8080/'])
106+
assert failed_counter == max_retries, f'{failed_counter=}'
107+
108+
actor = await make_actor(label='crawler-max-retries', main_func=main)
109+
run_result = await run_actor(actor)
110+
111+
assert run_result.status == 'SUCCEEDED'

0 commit comments

Comments
 (0)