Skip to content

Commit f5189c5

Browse files
committed
Merge branch 'master' into new-apify-storage-clients
2 parents a8881dd + 9b6e337 commit f5189c5

File tree

10 files changed

+652
-171
lines changed

10 files changed

+652
-171
lines changed

CHANGELOG.md

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,39 @@
22

33
All notable changes to this project will be documented in this file.
44

5+
<!-- git-cliff-unreleased-start -->
6+
## 2.7.1 - **not yet released**
7+
8+
### 🐛 Bug Fixes
9+
10+
- Restrict apify-shared and apify-client versions ([#523](https://github.com/apify/apify-sdk-python/pull/523)) ([b3ae5a9](https://github.com/apify/apify-sdk-python/commit/b3ae5a972a65454a4998eda59c9fcc3f6b7e8579)) by [@vdusek](https://github.com/vdusek)
11+
12+
13+
<!-- git-cliff-unreleased-end -->
14+
## [2.7.0](https://github.com/apify/apify-sdk-python/releases/tag/v2.7.0) (2025-07-14)
15+
16+
### 🚀 Features
17+
18+
- **crypto:** Decrypt secret objects ([#482](https://github.com/apify/apify-sdk-python/pull/482)) ([ce9daf7](https://github.com/apify/apify-sdk-python/commit/ce9daf7381212b8dc194e8a643e5ca0dedbc0078)) by [@MFori](https://github.com/MFori)
19+
20+
### 🐛 Bug Fixes
21+
22+
- Sync `@docusaurus` theme version [internal] ([#500](https://github.com/apify/apify-sdk-python/pull/500)) ([a7485e7](https://github.com/apify/apify-sdk-python/commit/a7485e7d2276fde464ce862573d5b95e7d4d836a)) by [@katzino](https://github.com/katzino)
23+
- Tagline overlap ([#501](https://github.com/apify/apify-sdk-python/pull/501)) ([bae8340](https://github.com/apify/apify-sdk-python/commit/bae8340c46fea756ea35ea4d591da84c09d478e2)) by [@katzino](https://github.com/katzino)
24+
25+
26+
## [2.7.0](https://github.com/apify/apify-sdk-python/releases/tag/v2.7.0) (2025-07-14)
27+
28+
### 🚀 Features
29+
30+
- **crypto:** Decrypt secret objects ([#482](https://github.com/apify/apify-sdk-python/pull/482)) ([ce9daf7](https://github.com/apify/apify-sdk-python/commit/ce9daf7381212b8dc194e8a643e5ca0dedbc0078)) by [@MFori](https://github.com/MFori)
31+
32+
### 🐛 Bug Fixes
33+
34+
- Sync `@docusaurus` theme version [internal] ([#500](https://github.com/apify/apify-sdk-python/pull/500)) ([a7485e7](https://github.com/apify/apify-sdk-python/commit/a7485e7d2276fde464ce862573d5b95e7d4d836a)) by [@katzino](https://github.com/katzino)
35+
- Tagline overlap ([#501](https://github.com/apify/apify-sdk-python/pull/501)) ([bae8340](https://github.com/apify/apify-sdk-python/commit/bae8340c46fea756ea35ea4d591da84c09d478e2)) by [@katzino](https://github.com/katzino)
36+
37+
538
## [2.7.2](https://github.com/apify/apify-sdk-python/releases/tag/v2.7.2) (2025-07-30)
639

740
### 🐛 Bug Fixes
@@ -392,4 +425,4 @@ All notable changes to this project will be documented in this file.
392425
- Key error for storage name ([#28](https://github.com/apify/apify-sdk-python/pull/28)) ([83b30a9](https://github.com/apify/apify-sdk-python/commit/83b30a90df4d3b173302f1c6006b346091fced60)) by [@drobnikj](https://github.com/drobnikj)
393426

394427

395-
<!-- generated by git-cliff -->
428+
<!-- generated by git-cliff -->

pyproject.toml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "apify"
7-
version = "2.7.3"
7+
version = "2.7.1"
88
description = "Apify SDK for Python"
99
authors = [{ name = "Apify Technologies s.r.o.", email = "[email protected]" }]
1010
license = { file = "LICENSE" }
@@ -63,7 +63,8 @@ scrapy = ["scrapy>=2.11.0"]
6363

6464
[dependency-groups]
6565
dev = [
66-
"build~=1.2.0",
66+
"build~=1.3.0",
67+
"crawlee[parsel]",
6768
"dycw-pytest-only>=2.1.1",
6869
"griffe~=1.9.0",
6970
"mypy~=1.17.0",
@@ -78,6 +79,7 @@ dev = [
7879
"ruff~=0.12.0",
7980
"setuptools", # setuptools are used by pytest but not explicitly required
8081
"types-cachetools>=6.0.0.20250525",
82+
"uvicorn[standard]",
8183
]
8284

8385
[tool.hatch.build.targets.wheel]

tests/integration/actor_source_base/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,4 @@ RUN echo "Python version:" \
1616
&& echo "All installed Python packages:" \
1717
&& pip freeze
1818

19-
CMD ["python3", "-m", "src"]
19+
CMD ["sh", "-c", "python server.py & python -m src"]
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
# The test fixture will put the Apify SDK wheel path on the next line
22
APIFY_SDK_WHEEL_PLACEHOLDER
3+
uvicorn[standard]
4+
crawlee[parsel]
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
"""
2+
Test server is infinite server http://localhost:8080/{any_number} and each page has links to the next 10 pages.
3+
For example:
4+
http://localhost:8080/ contains links:
5+
http://localhost:8080/0, http://localhost:8080/1, ..., http://localhost:8080/9
6+
7+
http://localhost:8080/1 contains links:
8+
http://localhost:8080/10, http://localhost:8080/11, ..., http://localhost:8080/19
9+
10+
... and so on.
11+
"""
12+
13+
import asyncio
14+
import logging
15+
from collections.abc import Awaitable, Callable, Coroutine
16+
from socket import socket
17+
from typing import Any
18+
19+
from uvicorn import Config
20+
from uvicorn.server import Server
21+
from yarl import URL
22+
23+
Receive = Callable[[], Awaitable[dict[str, Any]]]
24+
Send = Callable[[dict[str, Any]], Coroutine[None, None, None]]
25+
26+
27+
async def send_html_response(send: Send, html_content: bytes, status: int = 200) -> None:
28+
"""Send an HTML response to the client."""
29+
await send(
30+
{
31+
'type': 'http.response.start',
32+
'status': status,
33+
'headers': [[b'content-type', b'text/html; charset=utf-8']],
34+
}
35+
)
36+
await send({'type': 'http.response.body', 'body': html_content})
37+
38+
39+
async def app(scope: dict[str, Any], _: Receive, send: Send) -> None:
40+
"""Main ASGI application handler that routes requests to specific handlers.
41+
42+
Args:
43+
scope: The ASGI connection scope.
44+
_: The ASGI receive function.
45+
send: The ASGI send function.
46+
"""
47+
assert scope['type'] == 'http'
48+
path = scope['path']
49+
50+
links = '\n'.join(f'<a href="{path}{i}">{path}{i}</a>' for i in range(10))
51+
await send_html_response(
52+
send,
53+
f"""\
54+
<html><head>
55+
<title>Title for {path} </title>
56+
</head>
57+
<body>
58+
{links}
59+
</body></html>""".encode(),
60+
)
61+
62+
63+
class TestServer(Server):
64+
"""A test HTTP server implementation based on Uvicorn Server."""
65+
66+
@property
67+
def url(self) -> URL:
68+
"""Get the base URL of the server.
69+
70+
Returns:
71+
A URL instance with the server's base URL.
72+
"""
73+
protocol = 'https' if self.config.is_ssl else 'http'
74+
return URL(f'{protocol}://{self.config.host}:{self.config.port}/')
75+
76+
async def serve(self, sockets: list[socket] | None = None) -> None:
77+
"""Run the server."""
78+
if sockets:
79+
raise RuntimeError('Simple TestServer does not support custom sockets')
80+
self.restart_requested = asyncio.Event()
81+
82+
loop = asyncio.get_event_loop()
83+
tasks = {
84+
loop.create_task(super().serve()),
85+
}
86+
await asyncio.wait(tasks)
87+
88+
89+
if __name__ == '__main__':
90+
asyncio.run(
91+
TestServer(
92+
config=Config(
93+
app=app,
94+
lifespan='off',
95+
loop='asyncio',
96+
port=8080,
97+
log_config=None,
98+
log_level=logging.CRITICAL,
99+
)
100+
).serve()
101+
)
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING
4+
5+
if TYPE_CHECKING:
6+
from .conftest import MakeActorFunction, RunActorFunction
7+
8+
9+
async def test_actor_on_platform_max_crawl_depth(
10+
make_actor: MakeActorFunction,
11+
run_actor: RunActorFunction,
12+
) -> None:
13+
"""Test that the actor respects max_crawl_depth."""
14+
15+
async def main() -> None:
16+
"""The crawler entry point."""
17+
import re
18+
19+
from crawlee.crawlers import ParselCrawler, ParselCrawlingContext
20+
21+
from apify import Actor
22+
23+
async with Actor:
24+
crawler = ParselCrawler(max_crawl_depth=2)
25+
finished = []
26+
enqueue_pattern = re.compile(r'http://localhost:8080/2+$')
27+
28+
@crawler.router.default_handler
29+
async def default_handler(context: ParselCrawlingContext) -> None:
30+
"""Default request handler."""
31+
context.log.info(f'Processing {context.request.url} ...')
32+
await context.enqueue_links(include=[enqueue_pattern])
33+
finished.append(context.request.url)
34+
35+
await crawler.run(['http://localhost:8080/'])
36+
assert finished == ['http://localhost:8080/', 'http://localhost:8080/2', 'http://localhost:8080/22']
37+
38+
actor = await make_actor(label='crawler-max-depth', main_func=main)
39+
run_result = await run_actor(actor)
40+
41+
assert run_result.status == 'SUCCEEDED'
42+
43+
44+
async def test_actor_on_platform_max_requests_per_crawl(
45+
make_actor: MakeActorFunction,
46+
run_actor: RunActorFunction,
47+
) -> None:
48+
"""Test that the actor respects max_requests_per_crawl."""
49+
50+
async def main() -> None:
51+
"""The crawler entry point."""
52+
from crawlee import ConcurrencySettings
53+
from crawlee.crawlers import ParselCrawler, ParselCrawlingContext
54+
55+
from apify import Actor
56+
57+
async with Actor:
58+
crawler = ParselCrawler(
59+
max_requests_per_crawl=3, concurrency_settings=ConcurrencySettings(max_concurrency=1)
60+
)
61+
finished = []
62+
63+
@crawler.router.default_handler
64+
async def default_handler(context: ParselCrawlingContext) -> None:
65+
"""Default request handler."""
66+
context.log.info(f'Processing {context.request.url} ...')
67+
await context.enqueue_links()
68+
finished.append(context.request.url)
69+
70+
await crawler.run(['http://localhost:8080/'])
71+
assert len(finished) == 3
72+
73+
actor = await make_actor(label='crawler-max-requests', main_func=main)
74+
run_result = await run_actor(actor)
75+
76+
assert run_result.status == 'SUCCEEDED'
77+
78+
79+
async def test_actor_on_platform_max_request_retries(
80+
make_actor: MakeActorFunction,
81+
run_actor: RunActorFunction,
82+
) -> None:
83+
"""Test that the actor respects max_request_retries."""
84+
85+
async def main() -> None:
86+
"""The crawler entry point."""
87+
from crawlee.crawlers import BasicCrawlingContext, ParselCrawler, ParselCrawlingContext
88+
89+
from apify import Actor
90+
91+
async with Actor:
92+
max_retries = 3
93+
crawler = ParselCrawler(max_request_retries=max_retries)
94+
failed_counter = 0
95+
96+
@crawler.error_handler
97+
async def error_handler(_: BasicCrawlingContext, __: Exception) -> None:
98+
nonlocal failed_counter
99+
failed_counter += 1
100+
101+
@crawler.router.default_handler
102+
async def default_handler(_: ParselCrawlingContext) -> None:
103+
raise RuntimeError('Some error')
104+
105+
await crawler.run(['http://localhost:8080/'])
106+
assert failed_counter == max_retries, f'{failed_counter=}'
107+
108+
actor = await make_actor(label='crawler-max-retries', main_func=main)
109+
run_result = await run_actor(actor)
110+
111+
assert run_result.status == 'SUCCEEDED'

tests/integration/test_request_queue.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
from typing import TYPE_CHECKING
44

5+
import pytest
6+
57
from apify import Actor
68

79
if TYPE_CHECKING:
@@ -742,6 +744,9 @@ async def main() -> None:
742744
assert run_result.status == 'SUCCEEDED'
743745

744746

747+
@pytest.mark.skip(
748+
reason='The Apify RQ client is not resilient to concurrent processing, making this test flaky. See issue #529.'
749+
)
745750
async def test_concurrent_processing_simulation(
746751
make_actor: MakeActorFunction,
747752
run_actor: RunActorFunction,

0 commit comments

Comments
 (0)