diff --git a/docs/04_upgrading/upgrading_to_v4.md b/docs/04_upgrading/upgrading_to_v4.md index a650b932..65d2f27e 100644 --- a/docs/04_upgrading/upgrading_to_v4.md +++ b/docs/04_upgrading/upgrading_to_v4.md @@ -83,6 +83,20 @@ run = await Actor.start('my-actor-id', wait_for_finish=60) run = await Actor.call('my-actor-id', wait=timedelta(seconds=60)) ``` +## Purging storages on the Apify platform + +The Apify platform storage clients do not support `purge()`. Calling `purge()` on a dataset, key-value store, or request queue opened on the platform raises `NotImplementedError`, because the platform API has no operation that clears a storage's contents in place. To delete a storage, call `drop()` instead. + +```python +# Raises NotImplementedError on the Apify platform. +await dataset.purge() + +# Delete the storage instead. +await dataset.drop() +``` + +Local file-system storages are unaffected and still support `purge()`. + ## Built on apify-client v3 The SDK is now built on [`apify-client`](https://docs.apify.com/api/client/python) v3 and no longer depends on `apify-shared`. The sections below cover the user-visible consequences; see the client's [Upgrading to v3](https://docs.apify.com/api/client/python/docs/upgrading/upgrading-to-v3) guide for the full list of changes in the client itself. diff --git a/src/apify/_actor.py b/src/apify/_actor.py index 23423296..d4d00659 100644 --- a/src/apify/_actor.py +++ b/src/apify/_actor.py @@ -118,7 +118,7 @@ def __init__( configuration: The Actor configuration to use. If not provided, a default configuration is created. configure_logging: Whether to set up the default logging configuration. exit_process: Whether the Actor should call `sys.exit` when the context manager exits. - Defaults to True, except in IPython, Pytest, and Scrapy environments. + Defaults to True, except in IPython and Scrapy environments. exit_code: The exit code the Actor should use when exiting. status_message: Final status message to display upon Actor termination. event_listeners_timeout: Maximum time to wait for Actor event listeners to complete before exiting. @@ -913,7 +913,7 @@ async def start( a non-zero status code. memory_mbytes: Memory limit for the run, in megabytes. By default, the run uses a memory limit specified in the default run configuration for the Actor. - timeout: Optional timeout for the run, in seconds. By default, the run uses timeout specified in + timeout: Optional timeout for the run. By default, the run uses timeout specified in the default run configuration for the Actor. Using `inherit` will set timeout of the other Actor to the time remaining from this Actor timeout. force_permission_level: Override the Actor's permissions for this run. If not set, the Actor will run @@ -1020,7 +1020,7 @@ async def call( a non-zero status code. memory_mbytes: Memory limit for the run, in megabytes. By default, the run uses a memory limit specified in the default run configuration for the Actor. - timeout: Optional timeout for the run, in seconds. By default, the run uses timeout specified in + timeout: Optional timeout for the run. By default, the run uses timeout specified in the default run configuration for the Actor. Using `inherit` will set timeout of the other Actor to the time remaining from this Actor timeout. force_permission_level: Override the Actor's permissions for this run. If not set, the Actor will run @@ -1089,17 +1089,16 @@ async def call_task( directly rather than an Actor task, please use the `Actor.call` Args: - task_id: The ID of the Actor to be run. + task_id: The ID of the Actor task to be run. task_input: Overrides the input to pass to the Actor run. token: The Apify API token to use for this request (defaults to the `APIFY_TOKEN` environment variable). - content_type: The content type of the input. build: Specifies the Actor build to run. It can be either a build tag or build number. By default, the run uses the build specified in the default run configuration for the Actor (typically latest). restart_on_error: If true, the Task run process will be restarted whenever it exits with a non-zero status code. memory_mbytes: Memory limit for the run, in megabytes. By default, the run uses a memory limit specified in the default run configuration for the Actor. - timeout: Optional timeout for the run, in seconds. By default, the run uses timeout specified in + timeout: Optional timeout for the run. By default, the run uses timeout specified in the default run configuration for the Actor. Using `inherit` will set timeout of the other Actor to the time remaining from this Actor timeout. webhooks: Optional webhooks (https://docs.apify.com/webhooks) associated with the Actor run, which can @@ -1214,7 +1213,7 @@ async def reboot( # Call all the listeners for the PERSIST_STATE and MIGRATING events, and wait for them to finish. # PERSIST_STATE listeners are called to allow the Actor to persist its state before the reboot. # MIGRATING listeners are called to allow the Actor to gracefully stop in-progress tasks before the reboot. - # Typically, crawlers are listening for the MIIGRATING event to stop processing new requests. + # Typically, crawlers are listening for the MIGRATING event to stop processing new requests. # We can't just emit the events and wait for all listeners to finish, # because this method might be called from an event listener itself, and we would deadlock. persist_state_listeners = flatten( diff --git a/src/apify/_configuration.py b/src/apify/_configuration.py index 05710dd9..4103c684 100644 --- a/src/apify/_configuration.py +++ b/src/apify/_configuration.py @@ -426,7 +426,7 @@ class Configuration(CrawleeConfiguration): 'actor_web_server_port', 'apify_container_port', ), - description='TCP port for the Actor to start an HTTP server on' + description='TCP port for the Actor to start an HTTP server on. ' 'This server can be used to receive external messages or expose monitoring and control interfaces', ), ] = 4321 @@ -458,7 +458,7 @@ class Configuration(CrawleeConfiguration): | None, Field( alias='apify_actor_pricing_info', - description='JSON string with prising info of the actor', + description='JSON string with pricing info of the actor', discriminator='pricing_model', ), BeforeValidator(_parse_actor_pricing_info), diff --git a/src/apify/events/_types.py b/src/apify/events/_types.py index 9e39fd67..c7ba0ada 100644 --- a/src/apify/events/_types.py +++ b/src/apify/events/_types.py @@ -20,9 +20,8 @@ ActorEventTypes = Literal['systemInfo', 'persistState', 'migrating', 'aborting'] """Event types emitted by the Apify platform during an Actor run. -This is the Apify-specific subset of [`Event`][crawlee.events.Event] — for the full set -(including framework-level events like `SESSION_RETIRED` or `BROWSER_LAUNCHED`) use -[`Event`][crawlee.events.Event] from `apify`. +This is the Apify-specific subset of `Event`. For the full set (including framework-level events +like `SESSION_RETIRED` or `BROWSER_LAUNCHED`), use `Event` from `apify`. """ diff --git a/src/apify/storage_clients/_apify/_models.py b/src/apify/storage_clients/_apify/_models.py index a0a1d1e0..a8aba476 100644 --- a/src/apify/storage_clients/_apify/_models.py +++ b/src/apify/storage_clients/_apify/_models.py @@ -102,7 +102,13 @@ class CachedRequest(BaseModel): """The expiration time of the lock on the request.""" +@docs_group('Storage data') class ApifyRequestQueueMetadata(RequestQueueMetadata): + """Extended request queue metadata model for Apify platform. + + Includes additional Apify-specific fields. + """ + model_config = ConfigDict(alias_generator=to_camel) stats: Annotated[ diff --git a/src/apify/storage_clients/_file_system/_storage_client.py b/src/apify/storage_clients/_file_system/_storage_client.py index 8332f0fd..be31dc29 100644 --- a/src/apify/storage_clients/_file_system/_storage_client.py +++ b/src/apify/storage_clients/_file_system/_storage_client.py @@ -9,6 +9,7 @@ from ._dataset_client import ApifyFileSystemDatasetClient from ._key_value_store_client import ApifyFileSystemKeyValueStoreClient +from apify._utils import docs_group if TYPE_CHECKING: from collections.abc import Hashable @@ -16,6 +17,7 @@ from crawlee.storage_clients._file_system import FileSystemKeyValueStoreClient +@docs_group('Storage clients') class ApifyFileSystemStorageClient(FileSystemStorageClient): """Apify-specific implementation of the file system storage client.