Skip to content

Commit 101fe41

Browse files
committed
Merge branch 'master' into feat/sign-public-url
2 parents 2451a25 + ade53dc commit 101fe41

16 files changed

+955
-1056
lines changed

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,16 @@
22

33
All notable changes to this project will be documented in this file.
44

5+
<!-- git-cliff-unreleased-start -->
6+
## 2.4.0 - **not yet released**
7+
8+
### 🚀 Features
9+
10+
- Update to Crawlee v0.6 ([#420](https://github.com/apify/apify-sdk-python/pull/420)) ([9be4336](https://github.com/apify/apify-sdk-python/commit/9be433667231cc5739861fa693d7a726860d6aca)) by [@vdusek](https://github.com/vdusek)
11+
- Add Actor `exit_process` option ([#424](https://github.com/apify/apify-sdk-python/pull/424)) ([994c832](https://github.com/apify/apify-sdk-python/commit/994c8323b994e009db0ccdcb624891a2fef97070)) by [@vdusek](https://github.com/vdusek), closes [#396](https://github.com/apify/apify-sdk-python/issues/396), [#401](https://github.com/apify/apify-sdk-python/issues/401)
12+
13+
14+
<!-- git-cliff-unreleased-end -->
515
## [2.3.1](https://github.com/apify/apify-sdk-python/releases/tag/v2.3.1) (2025-02-25)
616

717
### 🐛 Bug Fixes

docs/03_concepts/03_storages.mdx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@ Each storage is then stored in its own folder, named after the storage, or calle
3838

3939
Each dataset item, key-value store record, or request in a request queue is then stored in its own file in the storage folder. Dataset items and request queue requests are always JSON files, and key-value store records can be any file type, based on its content type. For example, the Actor input is typically stored in `storage/key_value_stores/default/INPUT.json`.
4040

41+
## Local Actor run with remote storage
42+
43+
When developing locally, opening any storage will by default use local storage. To change this behavior and to use remote storage you have to use `force_cloud=True` argument in [`Actor.open_dataset`](../../reference/class/Actor#open_dataset), [`Actor.open_request_queue`](../../reference/class/Actor#open_request_queue) or [`Actor.open_key_value_store`](../../reference/class/Actor#open_key_value_store). Proper use of this argument allows you to work with both local and remote storages.
44+
45+
Calling another remote Actor and accessing its default storage is typical use-case for using `force-cloud=True` argument to open remote Actor's storages.
46+
4147
### Local storage persistence
4248

4349
By default, the storage contents are persisted across multiple Actor runs. To clean up the Actor storages before the running the Actor, use the `--purge` flag of the [`apify run`](https://docs.apify.com/cli/docs/reference#apify-run) command of the Apify CLI.

pyproject.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "apify"
7-
version = "2.3.1"
7+
version = "2.4.0"
88
description = "Apify SDK for Python"
99
authors = [{ name = "Apify Technologies s.r.o.", email = "[email protected]" }]
1010
license = { file = "LICENSE" }
@@ -35,8 +35,8 @@ keywords = [
3535
]
3636
dependencies = [
3737
"apify-client>=1.9.2",
38-
"apify-shared>=1.2.1",
39-
"crawlee~=0.5.0",
38+
"apify-shared>=1.3.0",
39+
"crawlee~=0.6.0",
4040
"cryptography>=42.0.0",
4141
"httpx>=0.27.0",
4242
"lazy-object-proxy>=1.10.0",
@@ -62,7 +62,7 @@ scrapy = ["scrapy>=2.11.0"]
6262
dev = [
6363
"build~=1.2.0",
6464
"filelock~=3.17.0",
65-
"griffe~=1.5.0",
65+
"griffe~=1.6.0",
6666
"mypy~=1.15.0",
6767
"pre-commit~=4.1.0",
6868
"pydoc-markdown~=4.8.0",

src/apify/_actor.py

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import asyncio
44
import os
55
import sys
6+
from contextlib import suppress
67
from datetime import timedelta
78
from typing import TYPE_CHECKING, Any, Callable, Literal, TypeVar, cast, overload
89

@@ -43,7 +44,7 @@
4344
from typing_extensions import Self
4445

4546
from crawlee.proxy_configuration import _NewUrlFunction
46-
from crawlee.storage_clients import BaseStorageClient
47+
from crawlee.storage_clients import StorageClient
4748

4849
from apify._models import Webhook
4950

@@ -64,6 +65,7 @@ def __init__(
6465
configuration: Configuration | None = None,
6566
*,
6667
configure_logging: bool = True,
68+
exit_process: bool | None = None,
6769
) -> None:
6870
"""Create an Actor instance.
6971
@@ -74,7 +76,10 @@ def __init__(
7476
configuration: The Actor configuration to be used. If not passed, a new Configuration instance will
7577
be created.
7678
configure_logging: Should the default logging configuration be configured?
79+
exit_process: Whether the Actor should call `sys.exit` when the context manager exits. The default is
80+
True except for the IPython, Pytest and Scrapy environments.
7781
"""
82+
self._exit_process = self._get_default_exit_process() if exit_process is None else exit_process
7883
self._is_exiting = False
7984

8085
self._configuration = configuration or Configuration.get_global_configuration()
@@ -141,9 +146,19 @@ def __repr__(self) -> str:
141146

142147
return super().__repr__()
143148

144-
def __call__(self, configuration: Configuration | None = None, *, configure_logging: bool = True) -> Self:
149+
def __call__(
150+
self,
151+
configuration: Configuration | None = None,
152+
*,
153+
configure_logging: bool = True,
154+
exit_process: bool | None = None,
155+
) -> Self:
145156
"""Make a new Actor instance with a non-default configuration."""
146-
return self.__class__(configuration=configuration, configure_logging=configure_logging)
157+
return self.__class__(
158+
configuration=configuration,
159+
configure_logging=configure_logging,
160+
exit_process=exit_process,
161+
)
147162

148163
@property
149164
def apify_client(self) -> ApifyClientAsync:
@@ -171,7 +186,7 @@ def log(self) -> logging.Logger:
171186
return logger
172187

173188
@property
174-
def _local_storage_client(self) -> BaseStorageClient:
189+
def _local_storage_client(self) -> StorageClient:
175190
"""The local storage client the Actor instance uses."""
176191
return service_locator.get_storage_client()
177192

@@ -281,13 +296,7 @@ async def finalize() -> None:
281296
await asyncio.wait_for(finalize(), cleanup_timeout.total_seconds())
282297
self._is_initialized = False
283298

284-
if is_running_in_ipython():
285-
self.log.debug(f'Not calling sys.exit({exit_code}) because Actor is running in IPython')
286-
elif os.getenv('PYTEST_CURRENT_TEST', default=False): # noqa: PLW1508
287-
self.log.debug(f'Not calling sys.exit({exit_code}) because Actor is running in an unit test')
288-
elif os.getenv('SCRAPY_SETTINGS_MODULE'):
289-
self.log.debug(f'Not calling sys.exit({exit_code}) because Actor is running with Scrapy')
290-
else:
299+
if self._exit_process:
291300
sys.exit(exit_code)
292301

293302
async def fail(
@@ -1128,6 +1137,26 @@ async def create_proxy_configuration(
11281137

11291138
return proxy_configuration
11301139

1140+
def _get_default_exit_process(self) -> bool:
1141+
"""Returns False for IPython, Pytest, and Scrapy environments, True otherwise."""
1142+
if is_running_in_ipython():
1143+
self.log.debug('Running in IPython, setting default `exit_process` to False.')
1144+
return False
1145+
1146+
# Check if running in Pytest by detecting the relevant environment variable.
1147+
if os.getenv('PYTEST_CURRENT_TEST'):
1148+
self.log.debug('Running in Pytest, setting default `exit_process` to False.')
1149+
return False
1150+
1151+
# Check if running in Scrapy by attempting to import it.
1152+
with suppress(ImportError):
1153+
import scrapy # noqa: F401
1154+
1155+
self.log.debug('Running in Scrapy, setting default `exit_process` to False.')
1156+
return False
1157+
1158+
return True
1159+
11311160

11321161
Actor = cast(_ActorType, Proxy(_ActorType))
11331162
"""The entry point of the SDK, through which all the Actor operations should be done."""

src/apify/apify_storage_client/_apify_storage_client.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from apify_client import ApifyClientAsync
88
from crawlee._utils.crypto import crypto_random_object_id
9-
from crawlee.storage_clients import BaseStorageClient
9+
from crawlee.storage_clients import StorageClient
1010

1111
from apify._utils import docs_group
1212
from apify.apify_storage_client._dataset_client import DatasetClient
@@ -21,7 +21,7 @@
2121

2222

2323
@docs_group('Classes')
24-
class ApifyStorageClient(BaseStorageClient):
24+
class ApifyStorageClient(StorageClient):
2525
"""A storage client implementation based on the Apify platform storage."""
2626

2727
def __init__(self, *, configuration: Configuration) -> None:
@@ -68,5 +68,5 @@ async def purge_on_start(self) -> None:
6868
pass
6969

7070
@override
71-
def get_rate_limit_errors(self) -> dict[int, int]: # type: ignore[misc]
71+
def get_rate_limit_errors(self) -> dict[int, int]:
7272
return self._apify_client.stats.rate_limit_errors

src/apify/apify_storage_client/_dataset_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from typing_extensions import override
66

7-
from crawlee.storage_clients._base import BaseDatasetClient
7+
from crawlee.storage_clients._base import DatasetClient as BaseDatasetClient
88
from crawlee.storage_clients.models import DatasetItemsListPage, DatasetMetadata
99

1010
if TYPE_CHECKING:

src/apify/apify_storage_client/_dataset_collection_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from typing_extensions import override
66

7-
from crawlee.storage_clients._base import BaseDatasetCollectionClient
7+
from crawlee.storage_clients._base import DatasetCollectionClient as BaseDatasetCollectionClient
88
from crawlee.storage_clients.models import DatasetListPage, DatasetMetadata
99

1010
if TYPE_CHECKING:

src/apify/apify_storage_client/_key_value_store_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from typing_extensions import override
77

8-
from crawlee.storage_clients._base import BaseKeyValueStoreClient
8+
from crawlee.storage_clients._base import KeyValueStoreClient as BaseKeyValueStoreClient
99
from crawlee.storage_clients.models import KeyValueStoreListKeysPage, KeyValueStoreMetadata, KeyValueStoreRecord
1010

1111
from apify._crypto import create_hmac_signature

src/apify/apify_storage_client/_key_value_store_collection_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from typing_extensions import override
66

7-
from crawlee.storage_clients._base import BaseKeyValueStoreCollectionClient
7+
from crawlee.storage_clients._base import KeyValueStoreCollectionClient as BaseKeyValueStoreCollectionClient
88
from crawlee.storage_clients.models import KeyValueStoreListPage, KeyValueStoreMetadata
99

1010
if TYPE_CHECKING:

src/apify/apify_storage_client/_request_queue_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from typing_extensions import override
66

77
from crawlee import Request
8-
from crawlee.storage_clients._base import BaseRequestQueueClient
8+
from crawlee.storage_clients._base import RequestQueueClient as BaseRequestQueueClient
99
from crawlee.storage_clients.models import (
1010
BatchRequestsOperationResponse,
1111
ProcessedRequest,

0 commit comments

Comments
 (0)