From 91dd5e75d31edf162206cbab2fa3caf31ba7bed0 Mon Sep 17 00:00:00 2001 From: Jan Buchar Date: Wed, 28 Aug 2024 16:12:50 +0200 Subject: [PATCH 1/8] Introduce Webhook model --- src/apify/__init__.py | 13 ++++++++++++- src/apify/_actor.py | 40 ++++++++++++++++++++-------------------- src/apify/_models.py | 27 +++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 21 deletions(-) create mode 100644 src/apify/_models.py diff --git a/src/apify/__init__.py b/src/apify/__init__.py index 71ca3d2a..a88db6bb 100644 --- a/src/apify/__init__.py +++ b/src/apify/__init__.py @@ -1,11 +1,22 @@ from importlib import metadata +from apify_shared.consts import WebhookEventType from crawlee.events._types import Event from apify._actor import Actor from apify._configuration import Configuration +from apify._models import Webhook from apify._proxy_configuration import ProxyConfiguration, ProxyInfo __version__ = metadata.version('apify') -__all__ = ['Actor', 'Event', 'Configuration', 'ProxyConfiguration', 'ProxyInfo', '__version__'] +__all__ = [ + 'Actor', + 'Event', + 'Configuration', + 'ProxyConfiguration', + 'ProxyInfo', + 'Webhook', + 'WebhookEventType', + '__version__', +] diff --git a/src/apify/_actor.py b/src/apify/_actor.py index 3a1a19bf..db0fd71c 100644 --- a/src/apify/_actor.py +++ b/src/apify/_actor.py @@ -11,7 +11,7 @@ from typing_extensions import Self from apify_client import ApifyClientAsync -from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars, WebhookEventType +from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars from apify_shared.utils import ignore_docs, maybe_extract_enum_member_value from crawlee import service_container from crawlee.events._types import Event, EventPersistStateData @@ -32,6 +32,8 @@ from crawlee.proxy_configuration import _NewUrlFunction + from apify._models import Webhook + MainReturnType = TypeVar('MainReturnType') @@ -533,7 +535,7 @@ async def start( memory_mbytes: int | None = None, timeout: timedelta | None = None, wait_for_finish: int | None = None, - webhooks: list[dict] | None = None, + webhooks: list[Webhook] | None = None, ) -> dict: """Run an Actor on the Apify platform. @@ -555,10 +557,6 @@ async def start( webhooks: Optional ad-hoc webhooks (https://docs.apify.com/webhooks/ad-hoc-webhooks) associated with the Actor run which can be used to receive a notification, e.g. when the Actor finished or failed. If you already have a webhook set up for the Actor or task, you do not have to add it again here. - Each webhook is represented by a dictionary containing these items: - * `event_types`: list of `WebhookEventType` values which trigger the webhook - * `request_url`: URL to which to send the webhook HTTP request - * `payload_template` (optional): Optional template for the request payload Returns: Info about the started Actor run @@ -574,7 +572,9 @@ async def start( memory_mbytes=memory_mbytes, timeout_secs=int(timeout.total_seconds()) if timeout is not None else None, wait_for_finish=wait_for_finish, - webhooks=webhooks, + webhooks=[hook.model_dump(by_alias=True, exclude_unset=True, exclude_defaults=True) for hook in webhooks] + if webhooks + else None, ) async def abort( @@ -619,7 +619,7 @@ async def call( build: str | None = None, memory_mbytes: int | None = None, timeout: timedelta | None = None, - webhooks: list[dict] | None = None, + webhooks: list[Webhook] | None = None, wait: timedelta | None = None, ) -> dict | None: """Start an Actor on the Apify Platform and wait for it to finish before returning. @@ -656,7 +656,9 @@ async def call( build=build, memory_mbytes=memory_mbytes, timeout_secs=int(timeout.total_seconds()) if timeout is not None else None, - webhooks=webhooks, + webhooks=[hook.model_dump(by_alias=True, exclude_defaults=True, exclude_unset=True) for hook in webhooks] + if webhooks + else [], wait_secs=int(wait.total_seconds()) if wait is not None else None, ) @@ -668,7 +670,7 @@ async def call_task( build: str | None = None, memory_mbytes: int | None = None, timeout: timedelta | None = None, - webhooks: list[dict] | None = None, + webhooks: list[Webhook] | None = None, wait: timedelta | None = None, token: str | None = None, ) -> dict | None: @@ -708,7 +710,9 @@ async def call_task( build=build, memory_mbytes=memory_mbytes, timeout_secs=int(timeout.total_seconds()) if timeout is not None else None, - webhooks=webhooks, + webhooks=[hook.model_dump(by_alias=True, exclude_defaults=True, exclude_unset=True) for hook in webhooks] + if webhooks + else [], wait_secs=int(wait.total_seconds()) if wait is not None else None, ) @@ -796,10 +800,8 @@ async def reboot( async def add_webhook( self, + webhook: Webhook, *, - event_types: list[WebhookEventType], - request_url: str, - payload_template: str | None = None, ignore_ssl_errors: bool | None = None, do_not_retry: bool | None = None, idempotency_key: str | None = None, @@ -814,9 +816,7 @@ async def add_webhook( For more information about Apify Actor webhooks, please see the [documentation](https://docs.apify.com/webhooks). Args: - event_types: List of event types that should trigger the webhook. At least one is required. - request_url: URL that will be invoked once the webhook is triggered. - payload_template: Specification of the payload that will be sent to request_url + webhook: The webhook to be added ignore_ssl_errors: Whether the webhook should ignore SSL errors returned by request_url do_not_retry: Whether the webhook should retry sending the payload to request_url upon failure. idempotency_key: A unique identifier of a webhook. You can use it to ensure that you won't create @@ -837,9 +837,9 @@ async def add_webhook( return await self._apify_client.webhooks().create( actor_run_id=self._configuration.actor_run_id, - event_types=event_types, - request_url=request_url, - payload_template=payload_template, + event_types=webhook.event_types, + request_url=webhook.request_url, + payload_template=webhook.payload_template, ignore_ssl_errors=ignore_ssl_errors, do_not_retry=do_not_retry, idempotency_key=idempotency_key, diff --git a/src/apify/_models.py b/src/apify/_models.py new file mode 100644 index 00000000..88f0f5f1 --- /dev/null +++ b/src/apify/_models.py @@ -0,0 +1,27 @@ +# ruff: noqa: TCH001 TCH002 TCH003 (Pydantic) +from __future__ import annotations + +from typing import Annotated + +from pydantic import BaseModel, BeforeValidator, ConfigDict, Field + +from apify_shared.consts import WebhookEventType +from crawlee._utils.urls import validate_http_url + + +class Webhook(BaseModel): + __model_config__ = ConfigDict(populate_by_name=True) + + event_types: Annotated[ + list[WebhookEventType], + Field(description='Event types that should trigger the webhook'), + ] + request_url: Annotated[ + str, + Field(description='URL that the webhook should call'), + BeforeValidator(validate_http_url), + ] + payload_template: Annotated[ + str | None, + Field(description='Template for the payload sent by the webook'), + ] = None From eb4284b5177d98408427e275bb653a2080732883 Mon Sep 17 00:00:00 2001 From: Jan Buchar Date: Wed, 28 Aug 2024 17:41:00 +0200 Subject: [PATCH 2/8] Add ActorRun model --- src/apify/_actor.py | 81 ++++++++++++++++++++++++------------------ src/apify/_models.py | 83 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 129 insertions(+), 35 deletions(-) diff --git a/src/apify/_actor.py b/src/apify/_actor.py index db0fd71c..51f863a1 100644 --- a/src/apify/_actor.py +++ b/src/apify/_actor.py @@ -19,6 +19,7 @@ from apify._configuration import Configuration from apify._consts import EVENT_LISTENERS_TIMEOUT from apify._crypto import decrypt_input_secrets, load_private_key +from apify._models import ActorRun from apify._platform_event_manager import EventManager, LocalEventManager, PlatformEventManager from apify._proxy_configuration import ProxyConfiguration from apify._utils import get_system_info, is_running_in_ipython @@ -536,7 +537,7 @@ async def start( timeout: timedelta | None = None, wait_for_finish: int | None = None, webhooks: list[Webhook] | None = None, - ) -> dict: + ) -> ActorRun: """Run an Actor on the Apify platform. Unlike `Actor.call`, this method just starts the run without waiting for finish. @@ -565,16 +566,20 @@ async def start( client = self.new_client(token=token) if token else self._apify_client - return await client.actor(actor_id).start( - run_input=run_input, - content_type=content_type, - build=build, - memory_mbytes=memory_mbytes, - timeout_secs=int(timeout.total_seconds()) if timeout is not None else None, - wait_for_finish=wait_for_finish, - webhooks=[hook.model_dump(by_alias=True, exclude_unset=True, exclude_defaults=True) for hook in webhooks] - if webhooks - else None, + return ActorRun.model_validate( + await client.actor(actor_id).start( + run_input=run_input, + content_type=content_type, + build=build, + memory_mbytes=memory_mbytes, + timeout_secs=int(timeout.total_seconds()) if timeout is not None else None, + wait_for_finish=wait_for_finish, + webhooks=[ + hook.model_dump(by_alias=True, exclude_unset=True, exclude_defaults=True) for hook in webhooks + ] + if webhooks + else None, + ) ) async def abort( @@ -584,7 +589,7 @@ async def abort( token: str | None = None, status_message: str | None = None, gracefully: bool | None = None, - ) -> dict: + ) -> ActorRun: """Abort given Actor run on the Apify platform using the current user account. The user account is determined by the `APIFY_TOKEN` environment variable. @@ -607,7 +612,7 @@ async def abort( if status_message: await client.run(run_id).update(status_message=status_message) - return await client.run(run_id).abort(gracefully=gracefully) + return ActorRun.model_validate(await client.run(run_id).abort(gracefully=gracefully)) async def call( self, @@ -621,7 +626,7 @@ async def call( timeout: timedelta | None = None, webhooks: list[Webhook] | None = None, wait: timedelta | None = None, - ) -> dict | None: + ) -> ActorRun | None: """Start an Actor on the Apify Platform and wait for it to finish before returning. It waits indefinitely, unless the wait argument is provided. @@ -650,16 +655,20 @@ async def call( client = self.new_client(token=token) if token else self._apify_client - return await client.actor(actor_id).call( - run_input=run_input, - content_type=content_type, - build=build, - memory_mbytes=memory_mbytes, - timeout_secs=int(timeout.total_seconds()) if timeout is not None else None, - webhooks=[hook.model_dump(by_alias=True, exclude_defaults=True, exclude_unset=True) for hook in webhooks] - if webhooks - else [], - wait_secs=int(wait.total_seconds()) if wait is not None else None, + return ActorRun.model_validate( + await client.actor(actor_id).call( + run_input=run_input, + content_type=content_type, + build=build, + memory_mbytes=memory_mbytes, + timeout_secs=int(timeout.total_seconds()) if timeout is not None else None, + webhooks=[ + hook.model_dump(by_alias=True, exclude_defaults=True, exclude_unset=True) for hook in webhooks + ] + if webhooks + else [], + wait_secs=int(wait.total_seconds()) if wait is not None else None, + ) ) async def call_task( @@ -673,7 +682,7 @@ async def call_task( webhooks: list[Webhook] | None = None, wait: timedelta | None = None, token: str | None = None, - ) -> dict | None: + ) -> ActorRun | None: """Start an Actor task on the Apify Platform and wait for it to finish before returning. It waits indefinitely, unless the wait argument is provided. @@ -705,15 +714,19 @@ async def call_task( client = self.new_client(token=token) if token else self._apify_client - return await client.task(task_id).call( - task_input=task_input, - build=build, - memory_mbytes=memory_mbytes, - timeout_secs=int(timeout.total_seconds()) if timeout is not None else None, - webhooks=[hook.model_dump(by_alias=True, exclude_defaults=True, exclude_unset=True) for hook in webhooks] - if webhooks - else [], - wait_secs=int(wait.total_seconds()) if wait is not None else None, + return ActorRun.model_validate( + await client.task(task_id).call( + task_input=task_input, + build=build, + memory_mbytes=memory_mbytes, + timeout_secs=int(timeout.total_seconds()) if timeout is not None else None, + webhooks=[ + hook.model_dump(by_alias=True, exclude_defaults=True, exclude_unset=True) for hook in webhooks + ] + if webhooks + else [], + wait_secs=int(wait.total_seconds()) if wait is not None else None, + ) ) async def metamorph( diff --git a/src/apify/_models.py b/src/apify/_models.py index 88f0f5f1..501a0a18 100644 --- a/src/apify/_models.py +++ b/src/apify/_models.py @@ -1,11 +1,13 @@ # ruff: noqa: TCH001 TCH002 TCH003 (Pydantic) from __future__ import annotations +from datetime import datetime, timedelta from typing import Annotated from pydantic import BaseModel, BeforeValidator, ConfigDict, Field -from apify_shared.consts import WebhookEventType +from apify_shared.consts import ActorJobStatus, MetaOrigin, WebhookEventType +from crawlee._utils.models import timedelta_ms from crawlee._utils.urls import validate_http_url @@ -25,3 +27,82 @@ class Webhook(BaseModel): str | None, Field(description='Template for the payload sent by the webook'), ] = None + + +class ActorRunMeta(BaseModel): + __model_config__ = ConfigDict(populate_by_name=True) + + origin: Annotated[MetaOrigin, Field()] + + +class ActorRunStats(BaseModel): + __model_config__ = ConfigDict(populate_by_name=True) + + input_body_len: Annotated[int, Field(alias='inputBodyLen')] + restart_count: Annotated[int, Field(alias='restartCount')] + resurrect_count: Annotated[int, Field(alias='resurrectCount')] + mem_avg_bytes: Annotated[float, Field(alias='memAvgBytes')] + mem_max_bytes: Annotated[int, Field(alias='memMaxBytes')] + mem_current_bytes: Annotated[int, Field(alias='memCurrentBytes')] + cpu_avg_usage: Annotated[float, Field(alias='cpuAvgUsage')] + cpu_max_usage: Annotated[float, Field(alias='cpuMaxUsage')] + cpu_current_usage: Annotated[float, Field(alias='cpuCurrentUsage')] + net_rx_bytes: Annotated[int, Field(alias='netRxBytes')] + net_tx_bytes: Annotated[int, Field(alias='netTxBytes')] + duration: Annotated[timedelta_ms, Field(alias='durationMillis')] + run_time: Annotated[timedelta, Field(alias='runTimeSecs')] + metamorph: Annotated[int, Field(alias='metamorph')] + compute_units: Annotated[float, Field(alias='computeUnits')] + + +class ActorRunOptions(BaseModel): + __model_config__ = ConfigDict(populate_by_name=True) + build: str + timeout: Annotated[timedelta, Field(alias='timeoutSecs')] + memory_mbytes: Annotated[int, Field(alias='memoryMbytes')] + disk_mbytes: Annotated[int, Field(alias='diskMbytes')] + + +class ActorRunUsage(BaseModel): + __model_config__ = ConfigDict(populate_by_name=True) + actor_compute_units: Annotated[int | None, Field(alias='ACTOR_COMPUTE_UNITS')] = None + dataset_reads: Annotated[int | None, Field(alias='DATASET_READS')] = None + dataset_writes: Annotated[int | None, Field(alias='DATASET_WRITES')] = None + key_value_store_reads: Annotated[int | None, Field(alias='KEY_VALUE_STORE_READS')] = None + key_value_store_writes: Annotated[int | None, Field(alias='KEY_VALUE_STORE_WRITES')] = None + key_value_store_lists: Annotated[int | None, Field(alias='KEY_VALUE_STORE_LISTS')] = None + request_queue_reads: Annotated[int | None, Field(alias='REQUEST_QUEUE_READS')] = None + request_queue_writes: Annotated[int | None, Field(alias='REQUEST_QUEUE_WRITES')] = None + data_transfer_internal_gbytes: Annotated[int | None, Field(alias='DATA_TRANSFER_INTERNAL_GBYTES')] = None + data_transfer_external_gbytes: Annotated[int | None, Field(alias='DATA_TRANSFER_EXTERNAL_GBYTES')] = None + proxy_residential_transfer_gbytes: Annotated[int | None, Field(alias='PROXY_RESIDENTIAL_TRANSFER_GBYTES')] = None + proxy_serps: Annotated[int | None, Field(alias='PROXY_SERPS')] = None + + +class ActorRun(BaseModel): + __model_config__ = ConfigDict(populate_by_name=True) + + id: Annotated[str, Field(alias='id')] + act_id: Annotated[str, Field(alias='actId')] + user_id: Annotated[str, Field(alias='userId')] + actor_task_id: Annotated[str | None, Field(alias='actorTaskId')] = None + started_at: Annotated[datetime, Field(alias='startedAt')] + finished_at: Annotated[datetime | None, Field(alias='finishedAt')] = None + status: Annotated[ActorJobStatus, Field(alias='status')] + status_message: Annotated[str | None, Field(alias='statusMessage')] = None + is_status_message_terminal: Annotated[bool, Field(alias='isStatusMessageTerminal')] = False + meta: Annotated[ActorRunMeta, Field(alias='meta')] + stats: Annotated[ActorRunStats, Field(alias='stats')] + options: Annotated[ActorRunOptions, Field(alias='options')] + build_id: Annotated[str, Field(alias='buildId')] + exit_code: Annotated[int | None, Field(alias='exitCode')] = None + default_key_value_store_id: Annotated[str, Field(alias='defaultKeyValueStoreId')] + default_dataset_id: Annotated[str, Field(alias='defaultDatasetId')] + default_request_queue_id: Annotated[str, Field(alias='defaultRequestQueueId')] + build_number: Annotated[str, Field(alias='buildNumber')] + container_url: Annotated[str, Field(alias='containerUrl')] + is_container_server_ready: Annotated[bool, Field(alias='isContainerServerReady')] = False + git_branch_name: Annotated[str | None, Field(alias='gitBranchName')] = None + usage: Annotated[ActorRunUsage | None, Field(alias='usage')] = None + usage_total_usd: Annotated[float | None, Field(alias='usageTotalUsd')] = None + usage_usd: Annotated[float | None, Field(alias='usageUsd')] = None From 45226a65929a8394a02aaab49d4517cb7aca7150 Mon Sep 17 00:00:00 2001 From: Jan Buchar Date: Wed, 28 Aug 2024 17:46:37 +0200 Subject: [PATCH 3/8] Fix remaining return types --- src/apify/_actor.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/apify/_actor.py b/src/apify/_actor.py index 51f863a1..ad9d5317 100644 --- a/src/apify/_actor.py +++ b/src/apify/_actor.py @@ -818,7 +818,7 @@ async def add_webhook( ignore_ssl_errors: bool | None = None, do_not_retry: bool | None = None, idempotency_key: str | None = None, - ) -> dict | None: + ) -> None: """Create an ad-hoc webhook for the current Actor run. This webhook lets you receive a notification when the Actor run finished or failed. @@ -842,13 +842,13 @@ async def add_webhook( if not self.is_at_home(): self.log.error('Actor.add_webhook() is only supported when running on the Apify platform.') - return None + return # If is_at_home() is True, config.actor_run_id is always set if not self._configuration.actor_run_id: raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.') - return await self._apify_client.webhooks().create( + await self._apify_client.webhooks().create( actor_run_id=self._configuration.actor_run_id, event_types=webhook.event_types, request_url=webhook.request_url, @@ -863,7 +863,7 @@ async def set_status_message( status_message: str, *, is_terminal: bool | None = None, - ) -> dict | None: + ) -> ActorRun | None: """Set the status message for the current Actor run. Args: @@ -884,8 +884,10 @@ async def set_status_message( if not self._configuration.actor_run_id: raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.') - return await self._apify_client.run(self._configuration.actor_run_id).update( - status_message=status_message, is_status_message_terminal=is_terminal + return ActorRun.model_validate( + await self._apify_client.run(self._configuration.actor_run_id).update( + status_message=status_message, is_status_message_terminal=is_terminal + ) ) async def create_proxy_configuration( From 7caed869bba04bb4314fafe39efdd072a7d2b479 Mon Sep 17 00:00:00 2001 From: Jan Buchar Date: Thu, 29 Aug 2024 11:30:06 +0200 Subject: [PATCH 4/8] Update unit tests --- tests/unit/actor/test_actor_helpers.py | 53 ++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 7 deletions(-) diff --git a/tests/unit/actor/test_actor_helpers.py b/tests/unit/actor/test_actor_helpers.py index 33997d80..f8494a22 100644 --- a/tests/unit/actor/test_actor_helpers.py +++ b/tests/unit/actor/test_actor_helpers.py @@ -1,11 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, ClassVar from apify_client import ApifyClientAsync from apify_shared.consts import ApifyEnvVars, WebhookEventType -from apify import Actor +from apify import Actor, Webhook from apify._actor import _ActorType if TYPE_CHECKING: @@ -34,11 +34,50 @@ async def test_actor_new_client_config(self: TestActorNewClient, monkeypatch: py class TestActorCallStartAbortActor: + FAKE_ACTOR_RUN: ClassVar = { + 'id': 'asdfasdf', + 'buildId': '3ads35', + 'buildNumber': '3.4.5', + 'actId': 'actor_id', + 'actorId': 'actor_id', + 'userId': 'user_id', + 'startedAt': '2024-08-08 12:12:44', + 'status': 'RUNNING', + 'meta': {'origin': 'API'}, + 'containerUrl': 'http://0.0.0.0:3333', + 'defaultDatasetId': 'dhasdrfughaerguoi', + 'defaultKeyValueStoreId': 'asjkldhguiofg', + 'defaultRequestQueueId': 'lkjgklserjghios', + 'stats': { + 'inputBodyLen': 0, + 'restartCount': 0, + 'resurrectCount': 0, + 'memAvgBytes': 0, + 'memMaxBytes': 0, + 'memCurrentBytes': 0, + 'cpuAvgUsage': 0, + 'cpuMaxUsage': 0, + 'cpuCurrentUsage': 0, + 'netRxBytes': 0, + 'netTxBytes': 0, + 'durationMillis': 3333, + 'runTimeSecs': 33, + 'metamorph': 0, + 'computeUnits': 4.33, + }, + 'options': { + 'build': '', + 'timeoutSecs': 44, + 'memoryMbytes': 4096, + 'diskMbytes': 16384, + }, + } + async def test_actor_call( self: TestActorCallStartAbortActor, apify_client_async_patcher: ApifyClientAsyncPatcher, ) -> None: - apify_client_async_patcher.patch('actor', 'call', return_value=None) + apify_client_async_patcher.patch('actor', 'call', return_value=self.FAKE_ACTOR_RUN) actor_id = 'some-actor-id' async with Actor: @@ -52,7 +91,7 @@ async def test_actor_call_task( self: TestActorCallStartAbortActor, apify_client_async_patcher: ApifyClientAsyncPatcher, ) -> None: - apify_client_async_patcher.patch('task', 'call', return_value=None) + apify_client_async_patcher.patch('task', 'call', return_value=self.FAKE_ACTOR_RUN) task_id = 'some-task-id' async with Actor: @@ -65,7 +104,7 @@ async def test_actor_start( self: TestActorCallStartAbortActor, apify_client_async_patcher: ApifyClientAsyncPatcher, ) -> None: - apify_client_async_patcher.patch('actor', 'start', return_value=None) + apify_client_async_patcher.patch('actor', 'start', return_value=self.FAKE_ACTOR_RUN) actor_id = 'some-id' async with Actor: @@ -78,7 +117,7 @@ async def test_actor_abort( self: TestActorCallStartAbortActor, apify_client_async_patcher: ApifyClientAsyncPatcher, ) -> None: - apify_client_async_patcher.patch('run', 'abort', return_value=None) + apify_client_async_patcher.patch('run', 'abort', return_value=self.FAKE_ACTOR_RUN) run_id = 'some-run-id' async with Actor: @@ -119,7 +158,7 @@ async def test_actor_add_webhook_not_work_locally( ) -> None: async with Actor: await Actor.add_webhook( - event_types=[WebhookEventType.ACTOR_BUILD_ABORTED], request_url='https://example.com' + Webhook(event_types=[WebhookEventType.ACTOR_BUILD_ABORTED], request_url='https://example.com') ) assert len(caplog.records) == 1 From 3c491afcaf6ac7ea96dc71117fa241427dbc3c51 Mon Sep 17 00:00:00 2001 From: Jan Buchar Date: Thu, 29 Aug 2024 12:39:56 +0200 Subject: [PATCH 5/8] Adjust ActorRun model so that we actually pass integration tests --- src/apify/_models.py | 54 ++++++++++----------- tests/integration/test_actor_api_helpers.py | 8 +-- 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/src/apify/_models.py b/src/apify/_models.py index 501a0a18..dfec5d29 100644 --- a/src/apify/_models.py +++ b/src/apify/_models.py @@ -41,17 +41,17 @@ class ActorRunStats(BaseModel): input_body_len: Annotated[int, Field(alias='inputBodyLen')] restart_count: Annotated[int, Field(alias='restartCount')] resurrect_count: Annotated[int, Field(alias='resurrectCount')] - mem_avg_bytes: Annotated[float, Field(alias='memAvgBytes')] - mem_max_bytes: Annotated[int, Field(alias='memMaxBytes')] - mem_current_bytes: Annotated[int, Field(alias='memCurrentBytes')] - cpu_avg_usage: Annotated[float, Field(alias='cpuAvgUsage')] - cpu_max_usage: Annotated[float, Field(alias='cpuMaxUsage')] - cpu_current_usage: Annotated[float, Field(alias='cpuCurrentUsage')] - net_rx_bytes: Annotated[int, Field(alias='netRxBytes')] - net_tx_bytes: Annotated[int, Field(alias='netTxBytes')] - duration: Annotated[timedelta_ms, Field(alias='durationMillis')] - run_time: Annotated[timedelta, Field(alias='runTimeSecs')] - metamorph: Annotated[int, Field(alias='metamorph')] + mem_avg_bytes: Annotated[float | None, Field(alias='memAvgBytes')] = None + mem_max_bytes: Annotated[int | None, Field(alias='memMaxBytes')] = None + mem_current_bytes: Annotated[int | None, Field(alias='memCurrentBytes')] = None + cpu_avg_usage: Annotated[float | None, Field(alias='cpuAvgUsage')] = None + cpu_max_usage: Annotated[float | None, Field(alias='cpuMaxUsage')] = None + cpu_current_usage: Annotated[float | None, Field(alias='cpuCurrentUsage')] = None + net_rx_bytes: Annotated[int | None, Field(alias='netRxBytes')] = None + net_tx_bytes: Annotated[int | None, Field(alias='netTxBytes')] = None + duration: Annotated[timedelta_ms | None, Field(alias='durationMillis')] = None + run_time: Annotated[timedelta | None, Field(alias='runTimeSecs')] = None + metamorph: Annotated[int | None, Field(alias='metamorph')] = None compute_units: Annotated[float, Field(alias='computeUnits')] @@ -65,18 +65,18 @@ class ActorRunOptions(BaseModel): class ActorRunUsage(BaseModel): __model_config__ = ConfigDict(populate_by_name=True) - actor_compute_units: Annotated[int | None, Field(alias='ACTOR_COMPUTE_UNITS')] = None - dataset_reads: Annotated[int | None, Field(alias='DATASET_READS')] = None - dataset_writes: Annotated[int | None, Field(alias='DATASET_WRITES')] = None - key_value_store_reads: Annotated[int | None, Field(alias='KEY_VALUE_STORE_READS')] = None - key_value_store_writes: Annotated[int | None, Field(alias='KEY_VALUE_STORE_WRITES')] = None - key_value_store_lists: Annotated[int | None, Field(alias='KEY_VALUE_STORE_LISTS')] = None - request_queue_reads: Annotated[int | None, Field(alias='REQUEST_QUEUE_READS')] = None - request_queue_writes: Annotated[int | None, Field(alias='REQUEST_QUEUE_WRITES')] = None - data_transfer_internal_gbytes: Annotated[int | None, Field(alias='DATA_TRANSFER_INTERNAL_GBYTES')] = None - data_transfer_external_gbytes: Annotated[int | None, Field(alias='DATA_TRANSFER_EXTERNAL_GBYTES')] = None - proxy_residential_transfer_gbytes: Annotated[int | None, Field(alias='PROXY_RESIDENTIAL_TRANSFER_GBYTES')] = None - proxy_serps: Annotated[int | None, Field(alias='PROXY_SERPS')] = None + actor_compute_units: Annotated[float | None, Field(alias='ACTOR_COMPUTE_UNITS')] = None + dataset_reads: Annotated[float | None, Field(alias='DATASET_READS')] = None + dataset_writes: Annotated[float | None, Field(alias='DATASET_WRITES')] = None + key_value_store_reads: Annotated[float | None, Field(alias='KEY_VALUE_STORE_READS')] = None + key_value_store_writes: Annotated[float | None, Field(alias='KEY_VALUE_STORE_WRITES')] = None + key_value_store_lists: Annotated[float | None, Field(alias='KEY_VALUE_STORE_LISTS')] = None + request_queue_reads: Annotated[float | None, Field(alias='REQUEST_QUEUE_READS')] = None + request_queue_writes: Annotated[float | None, Field(alias='REQUEST_QUEUE_WRITES')] = None + data_transfer_internal_gbytes: Annotated[float | None, Field(alias='DATA_TRANSFER_INTERNAL_GBYTES')] = None + data_transfer_external_gbytes: Annotated[float | None, Field(alias='DATA_TRANSFER_EXTERNAL_GBYTES')] = None + proxy_residential_transfer_gbytes: Annotated[float | None, Field(alias='PROXY_RESIDENTIAL_TRANSFER_GBYTES')] = None + proxy_serps: Annotated[float | None, Field(alias='PROXY_SERPS')] = None class ActorRun(BaseModel): @@ -90,7 +90,7 @@ class ActorRun(BaseModel): finished_at: Annotated[datetime | None, Field(alias='finishedAt')] = None status: Annotated[ActorJobStatus, Field(alias='status')] status_message: Annotated[str | None, Field(alias='statusMessage')] = None - is_status_message_terminal: Annotated[bool, Field(alias='isStatusMessageTerminal')] = False + is_status_message_terminal: Annotated[bool | None, Field(alias='isStatusMessageTerminal')] = None meta: Annotated[ActorRunMeta, Field(alias='meta')] stats: Annotated[ActorRunStats, Field(alias='stats')] options: Annotated[ActorRunOptions, Field(alias='options')] @@ -99,10 +99,10 @@ class ActorRun(BaseModel): default_key_value_store_id: Annotated[str, Field(alias='defaultKeyValueStoreId')] default_dataset_id: Annotated[str, Field(alias='defaultDatasetId')] default_request_queue_id: Annotated[str, Field(alias='defaultRequestQueueId')] - build_number: Annotated[str, Field(alias='buildNumber')] + build_number: Annotated[str | None, Field(alias='buildNumber')] = None container_url: Annotated[str, Field(alias='containerUrl')] - is_container_server_ready: Annotated[bool, Field(alias='isContainerServerReady')] = False + is_container_server_ready: Annotated[bool | None, Field(alias='isContainerServerReady')] = None git_branch_name: Annotated[str | None, Field(alias='gitBranchName')] = None usage: Annotated[ActorRunUsage | None, Field(alias='usage')] = None usage_total_usd: Annotated[float | None, Field(alias='usageTotalUsd')] = None - usage_usd: Annotated[float | None, Field(alias='usageUsd')] = None + usage_usd: Annotated[ActorRunUsage | None, Field(alias='usageUsd')] = None diff --git a/tests/integration/test_actor_api_helpers.py b/tests/integration/test_actor_api_helpers.py index a0788a69..8073e292 100644 --- a/tests/integration/test_actor_api_helpers.py +++ b/tests/integration/test_actor_api_helpers.py @@ -406,15 +406,17 @@ def do_POST(self) -> None: # noqa: N802 await Actor.set_value('WEBHOOK_BODY', webhook_body) async def main_client() -> None: - from apify_shared.consts import WebhookEventType + from apify import Webhook, WebhookEventType async with Actor: actor_input = await Actor.get_input() or {} server_actor_container_url = str(actor_input.get('server_actor_container_url')) await Actor.add_webhook( - event_types=[WebhookEventType.ACTOR_RUN_SUCCEEDED], - request_url=server_actor_container_url, + Webhook( + event_types=[WebhookEventType.ACTOR_RUN_SUCCEEDED], + request_url=server_actor_container_url, + ) ) server_actor, client_actor = await asyncio.gather( From 63e666dc332871ddcc120561d8f351cbbdbb17ca Mon Sep 17 00:00:00 2001 From: Jan Buchar Date: Thu, 29 Aug 2024 12:55:30 +0200 Subject: [PATCH 6/8] Update upgrading guide --- docs/04-upgrading/upgrading_to_v20.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/04-upgrading/upgrading_to_v20.md b/docs/04-upgrading/upgrading_to_v20.md index 43c18b1a..4f396a76 100644 --- a/docs/04-upgrading/upgrading_to_v20.md +++ b/docs/04-upgrading/upgrading_to_v20.md @@ -19,7 +19,9 @@ Attributes suffixed with `_millis` were renamed to remove said suffix and have t ## Actor -The `Actor.main` method has been removed as it brings no benefits compared to using `async with Actor`. +- The `Actor.main` method has been removed as it brings no benefits compared to using `async with Actor`. +- The `Actor.add_webhook`, `Actor.start`, `Actor.call` and `Actor.start_task` methods now accept instances of the `apify.Webhook` model instead of an untyped `dict`. +- `Actor.start`, `Actor.call`, `Actor.start_task`, `Actor.set_status_message` and `Actor.abort` return instances of the `ActorRun` model instead of an untyped `dict`. ## Scrapy integration From 2e92adbee4541202eb1b42f60d336579a50b58e0 Mon Sep 17 00:00:00 2001 From: Jan Buchar Date: Thu, 29 Aug 2024 14:24:34 +0200 Subject: [PATCH 7/8] Apply suggestions from code review Co-authored-by: Vlada Dusek --- src/apify/_models.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/apify/_models.py b/src/apify/_models.py index dfec5d29..5963ec9a 100644 --- a/src/apify/_models.py +++ b/src/apify/_models.py @@ -57,6 +57,7 @@ class ActorRunStats(BaseModel): class ActorRunOptions(BaseModel): __model_config__ = ConfigDict(populate_by_name=True) + build: str timeout: Annotated[timedelta, Field(alias='timeoutSecs')] memory_mbytes: Annotated[int, Field(alias='memoryMbytes')] @@ -65,6 +66,7 @@ class ActorRunOptions(BaseModel): class ActorRunUsage(BaseModel): __model_config__ = ConfigDict(populate_by_name=True) + actor_compute_units: Annotated[float | None, Field(alias='ACTOR_COMPUTE_UNITS')] = None dataset_reads: Annotated[float | None, Field(alias='DATASET_READS')] = None dataset_writes: Annotated[float | None, Field(alias='DATASET_WRITES')] = None From dde7fc2fd5dd23b73be5c431dcae5c3e13c26f07 Mon Sep 17 00:00:00 2001 From: Jan Buchar Date: Thu, 29 Aug 2024 14:49:24 +0200 Subject: [PATCH 8/8] More intermediate variables --- src/apify/_actor.py | 103 ++++++++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 46 deletions(-) diff --git a/src/apify/_actor.py b/src/apify/_actor.py index ad9d5317..bc5f4915 100644 --- a/src/apify/_actor.py +++ b/src/apify/_actor.py @@ -566,22 +566,25 @@ async def start( client = self.new_client(token=token) if token else self._apify_client - return ActorRun.model_validate( - await client.actor(actor_id).start( - run_input=run_input, - content_type=content_type, - build=build, - memory_mbytes=memory_mbytes, - timeout_secs=int(timeout.total_seconds()) if timeout is not None else None, - wait_for_finish=wait_for_finish, - webhooks=[ - hook.model_dump(by_alias=True, exclude_unset=True, exclude_defaults=True) for hook in webhooks - ] - if webhooks - else None, - ) + if webhooks: + serialized_webhooks = [ + hook.model_dump(by_alias=True, exclude_unset=True, exclude_defaults=True) for hook in webhooks + ] + else: + serialized_webhooks = None + + api_result = await client.actor(actor_id).start( + run_input=run_input, + content_type=content_type, + build=build, + memory_mbytes=memory_mbytes, + timeout_secs=int(timeout.total_seconds()) if timeout is not None else None, + wait_for_finish=wait_for_finish, + webhooks=serialized_webhooks, ) + return ActorRun.model_validate(api_result) + async def abort( self, run_id: str, @@ -612,7 +615,9 @@ async def abort( if status_message: await client.run(run_id).update(status_message=status_message) - return ActorRun.model_validate(await client.run(run_id).abort(gracefully=gracefully)) + api_result = await client.run(run_id).abort(gracefully=gracefully) + + return ActorRun.model_validate(api_result) async def call( self, @@ -655,22 +660,25 @@ async def call( client = self.new_client(token=token) if token else self._apify_client - return ActorRun.model_validate( - await client.actor(actor_id).call( - run_input=run_input, - content_type=content_type, - build=build, - memory_mbytes=memory_mbytes, - timeout_secs=int(timeout.total_seconds()) if timeout is not None else None, - webhooks=[ - hook.model_dump(by_alias=True, exclude_defaults=True, exclude_unset=True) for hook in webhooks - ] - if webhooks - else [], - wait_secs=int(wait.total_seconds()) if wait is not None else None, - ) + if webhooks: + serialized_webhooks = [ + hook.model_dump(by_alias=True, exclude_unset=True, exclude_defaults=True) for hook in webhooks + ] + else: + serialized_webhooks = None + + api_result = await client.actor(actor_id).call( + run_input=run_input, + content_type=content_type, + build=build, + memory_mbytes=memory_mbytes, + timeout_secs=int(timeout.total_seconds()) if timeout is not None else None, + webhooks=serialized_webhooks, + wait_secs=int(wait.total_seconds()) if wait is not None else None, ) + return ActorRun.model_validate(api_result) + async def call_task( self, task_id: str, @@ -714,21 +722,24 @@ async def call_task( client = self.new_client(token=token) if token else self._apify_client - return ActorRun.model_validate( - await client.task(task_id).call( - task_input=task_input, - build=build, - memory_mbytes=memory_mbytes, - timeout_secs=int(timeout.total_seconds()) if timeout is not None else None, - webhooks=[ - hook.model_dump(by_alias=True, exclude_defaults=True, exclude_unset=True) for hook in webhooks - ] - if webhooks - else [], - wait_secs=int(wait.total_seconds()) if wait is not None else None, - ) + if webhooks: + serialized_webhooks = [ + hook.model_dump(by_alias=True, exclude_unset=True, exclude_defaults=True) for hook in webhooks + ] + else: + serialized_webhooks = None + + api_result = await client.task(task_id).call( + task_input=task_input, + build=build, + memory_mbytes=memory_mbytes, + timeout_secs=int(timeout.total_seconds()) if timeout is not None else None, + webhooks=serialized_webhooks, + wait_secs=int(wait.total_seconds()) if wait is not None else None, ) + return ActorRun.model_validate(api_result) + async def metamorph( self, target_actor_id: str, @@ -884,12 +895,12 @@ async def set_status_message( if not self._configuration.actor_run_id: raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.') - return ActorRun.model_validate( - await self._apify_client.run(self._configuration.actor_run_id).update( - status_message=status_message, is_status_message_terminal=is_terminal - ) + api_result = await self._apify_client.run(self._configuration.actor_run_id).update( + status_message=status_message, is_status_message_terminal=is_terminal ) + return ActorRun.model_validate(api_result) + async def create_proxy_configuration( self, *,