Skip to content

Commit 3a6afee

Browse files
committed
Add max retries test
1 parent c3d72eb commit 3a6afee

File tree

1 file changed

+39
-6
lines changed

1 file changed

+39
-6
lines changed

tests/integration/test_crawlers_with_storages.py

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from crawlee._types import BasicCrawlingContext
12
from tests.integration.conftest import MakeActorFunction, RunActorFunction
23

34

@@ -25,14 +26,12 @@ async def default_handler(context: ParselCrawlingContext) -> None:
2526
"""Default request handler."""
2627
context.log.info(f'Processing {context.request.url} ...')
2728
await context.enqueue_links(include=[enqueue_pattern])
28-
await context.push_data({'Url': context.request.url})
2929
finished.append(context.request.url)
3030

3131
await crawler.run(['http://localhost:8080/'])
3232
assert finished == ['http://localhost:8080/', 'http://localhost:8080/2', 'http://localhost:8080/22']
33-
# assert some dataset
3433

35-
actor = await make_actor(label='parsel-crawler', main_func=main)
34+
actor = await make_actor(label='crawler-max-depth', main_func=main)
3635
run_result = await run_actor(actor)
3736

3837
assert run_result.status == 'SUCCEEDED'
@@ -62,14 +61,48 @@ async def default_handler(context: ParselCrawlingContext) -> None:
6261
"""Default request handler."""
6362
context.log.info(f'Processing {context.request.url} ...')
6463
await context.enqueue_links()
65-
await context.push_data({'Url': context.request.url})
6664
finished.append(context.request.url)
6765

6866
await crawler.run(['http://localhost:8080/'])
6967
assert len(finished) == 3
70-
# assert some dataset
7168

72-
actor = await make_actor(label='parsel-crawler', main_func=main)
69+
actor = await make_actor(label='crawler-max-requests', main_func=main)
70+
run_result = await run_actor(actor)
71+
72+
assert run_result.status == 'SUCCEEDED'
73+
74+
75+
async def test_actor_on_platform_max_request_retries(
76+
make_actor: MakeActorFunction,
77+
run_actor: RunActorFunction,
78+
) -> None:
79+
"""Test that the actor respects max_requests_per_crawl."""
80+
81+
async def main() -> None:
82+
"""The crawler entry point."""
83+
from crawlee.crawlers import ParselCrawler, ParselCrawlingContext
84+
85+
from apify import Actor
86+
87+
async with Actor:
88+
max_retries = 2
89+
crawler = ParselCrawler(max_request_retries=max_retries)
90+
finished = []
91+
failed = []
92+
93+
@crawler.failed_request_handler
94+
async def failed_handler(context: BasicCrawlingContext, _: Exception) -> None:
95+
failed.add(context.request.url)
96+
97+
@crawler.router.default_handler
98+
async def default_handler(context: ParselCrawlingContext) -> None:
99+
finished.append(context.request.url)
100+
101+
await crawler.run(['http://localhost:8080/non-existing-url'])
102+
assert len(finished) == 0
103+
assert len(failed) == max_retries + 1
104+
105+
actor = await make_actor(label='crawler-max-retries', main_func=main)
73106
run_result = await run_actor(actor)
74107

75108
assert run_result.status == 'SUCCEEDED'

0 commit comments

Comments
 (0)