Skip to content

Commit eadab26

Browse files
committed
WIP
1 parent 553663a commit eadab26

File tree

3 files changed

+34
-4
lines changed

3 files changed

+34
-4
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "apify"
7-
version = "2.7.4"
7+
version = "2.8.1"
88
description = "Apify SDK for Python"
99
authors = [{ name = "Apify Technologies s.r.o.", email = "[email protected]" }]
1010
license = { file = "LICENSE" }

src/apify/storage_clients/_apify/_request_queue_client_simple.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -248,8 +248,9 @@ async def _list_head(self) -> None:
248248
self._requests_already_handled.add(request.unique_key)
249249
else:
250250
self._requests_cache[request.unique_key] = request
251-
# Add new requests to the end of the head
252-
self._head_requests.appendleft(request.unique_key)
251+
# Add new requests to the end of the head, unless already present in head
252+
if request.unique_key not in self._head_requests:
253+
self._head_requests.appendleft(request.unique_key)
253254

254255
@override
255256
async def mark_request_as_handled(self, request: Request) -> ProcessedRequest | None:
@@ -269,7 +270,7 @@ async def mark_request_as_handled(self, request: Request) -> ProcessedRequest |
269270
request.handled_at = datetime.now(tz=timezone.utc)
270271
self._metadata.handled_request_count += 1
271272

272-
if cached_request := self._requests_cache[request.unique_key]:
273+
if cached_request := self._requests_cache.get(request.unique_key):
273274
cached_request.handled_at = request.handled_at
274275

275276
try:

tests/integration/test_crawlers_with_storages.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,35 @@ async def default_handler(context: ParselCrawlingContext) -> None:
4141
assert run_result.status == 'SUCCEEDED'
4242

4343

44+
async def test_actor_on_platform_max_crawl_depth(
45+
) -> None:
46+
"""Test that the actor respects max_crawl_depth."""
47+
48+
"""The crawler entry point."""
49+
import re
50+
51+
from crawlee.crawlers import ParselCrawler, ParselCrawlingContext
52+
53+
from apify import Actor
54+
55+
async with Actor:
56+
rq= await Actor.open_request_queue(force_cloud=True)
57+
crawler = ParselCrawler(max_crawl_depth=2, request_manager=rq)
58+
finished = []
59+
enqueue_pattern = re.compile(r'http://localhost:8080/2+$')
60+
61+
@crawler.router.default_handler
62+
async def default_handler(context: ParselCrawlingContext) -> None:
63+
"""Default request handler."""
64+
context.log.info(f'Processing {context.request.url} ...')
65+
await context.enqueue_links(include=[enqueue_pattern])
66+
finished.append(context.request.url)
67+
68+
await crawler.run(['http://localhost:8080/'])
69+
assert finished == ['http://localhost:8080/', 'http://localhost:8080/2', 'http://localhost:8080/22']
70+
71+
72+
4473
async def test_actor_on_platform_max_requests_per_crawl(
4574
make_actor: MakeActorFunction,
4675
run_actor: RunActorFunction,

0 commit comments

Comments
 (0)