Skip to content

Commit abb87e7

Browse files
janbucharvdusek
andauthored
feat: Better Actor API typing (#256)
Methods such as `Actor.add_webhook` or `Actor.start` now use Pydantic models to ensure correctness and enable type checking and completions. - closes #243 --------- Co-authored-by: Vlada Dusek <[email protected]>
1 parent 71141a8 commit abb87e7

File tree

6 files changed

+235
-45
lines changed

6 files changed

+235
-45
lines changed

docs/04-upgrading/upgrading_to_v20.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@ Attributes suffixed with `_millis` were renamed to remove said suffix and have t
1919

2020
## Actor
2121

22-
The `Actor.main` method has been removed as it brings no benefits compared to using `async with Actor`.
22+
- The `Actor.main` method has been removed as it brings no benefits compared to using `async with Actor`.
23+
- The `Actor.add_webhook`, `Actor.start`, `Actor.call` and `Actor.start_task` methods now accept instances of the `apify.Webhook` model instead of an untyped `dict`.
24+
- `Actor.start`, `Actor.call`, `Actor.start_task`, `Actor.set_status_message` and `Actor.abort` return instances of the `ActorRun` model instead of an untyped `dict`.
2325

2426
## Scrapy integration
2527

src/apify/__init__.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,22 @@
11
from importlib import metadata
22

3+
from apify_shared.consts import WebhookEventType
34
from crawlee.events._types import Event
45

56
from apify._actor import Actor
67
from apify._configuration import Configuration
8+
from apify._models import Webhook
79
from apify._proxy_configuration import ProxyConfiguration, ProxyInfo
810

911
__version__ = metadata.version('apify')
1012

11-
__all__ = ['Actor', 'Event', 'Configuration', 'ProxyConfiguration', 'ProxyInfo', '__version__']
13+
__all__ = [
14+
'Actor',
15+
'Event',
16+
'Configuration',
17+
'ProxyConfiguration',
18+
'ProxyInfo',
19+
'Webhook',
20+
'WebhookEventType',
21+
'__version__',
22+
]

src/apify/_actor.py

Lines changed: 59 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,15 @@
1111
from typing_extensions import Self
1212

1313
from apify_client import ApifyClientAsync
14-
from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars, WebhookEventType
14+
from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars
1515
from apify_shared.utils import ignore_docs, maybe_extract_enum_member_value
1616
from crawlee import service_container
1717
from crawlee.events._types import Event, EventPersistStateData
1818

1919
from apify._configuration import Configuration
2020
from apify._consts import EVENT_LISTENERS_TIMEOUT
2121
from apify._crypto import decrypt_input_secrets, load_private_key
22+
from apify._models import ActorRun
2223
from apify._platform_event_manager import EventManager, LocalEventManager, PlatformEventManager
2324
from apify._proxy_configuration import ProxyConfiguration
2425
from apify._utils import get_system_info, is_running_in_ipython
@@ -32,6 +33,8 @@
3233

3334
from crawlee.proxy_configuration import _NewUrlFunction
3435

36+
from apify._models import Webhook
37+
3538

3639
MainReturnType = TypeVar('MainReturnType')
3740

@@ -533,8 +536,8 @@ async def start(
533536
memory_mbytes: int | None = None,
534537
timeout: timedelta | None = None,
535538
wait_for_finish: int | None = None,
536-
webhooks: list[dict] | None = None,
537-
) -> dict:
539+
webhooks: list[Webhook] | None = None,
540+
) -> ActorRun:
538541
"""Run an Actor on the Apify platform.
539542
540543
Unlike `Actor.call`, this method just starts the run without waiting for finish.
@@ -555,10 +558,6 @@ async def start(
555558
webhooks: Optional ad-hoc webhooks (https://docs.apify.com/webhooks/ad-hoc-webhooks) associated with
556559
the Actor run which can be used to receive a notification, e.g. when the Actor finished or failed.
557560
If you already have a webhook set up for the Actor or task, you do not have to add it again here.
558-
Each webhook is represented by a dictionary containing these items:
559-
* `event_types`: list of `WebhookEventType` values which trigger the webhook
560-
* `request_url`: URL to which to send the webhook HTTP request
561-
* `payload_template` (optional): Optional template for the request payload
562561
563562
Returns:
564563
Info about the started Actor run
@@ -567,24 +566,33 @@ async def start(
567566

568567
client = self.new_client(token=token) if token else self._apify_client
569568

570-
return await client.actor(actor_id).start(
569+
if webhooks:
570+
serialized_webhooks = [
571+
hook.model_dump(by_alias=True, exclude_unset=True, exclude_defaults=True) for hook in webhooks
572+
]
573+
else:
574+
serialized_webhooks = None
575+
576+
api_result = await client.actor(actor_id).start(
571577
run_input=run_input,
572578
content_type=content_type,
573579
build=build,
574580
memory_mbytes=memory_mbytes,
575581
timeout_secs=int(timeout.total_seconds()) if timeout is not None else None,
576582
wait_for_finish=wait_for_finish,
577-
webhooks=webhooks,
583+
webhooks=serialized_webhooks,
578584
)
579585

586+
return ActorRun.model_validate(api_result)
587+
580588
async def abort(
581589
self,
582590
run_id: str,
583591
*,
584592
token: str | None = None,
585593
status_message: str | None = None,
586594
gracefully: bool | None = None,
587-
) -> dict:
595+
) -> ActorRun:
588596
"""Abort given Actor run on the Apify platform using the current user account.
589597
590598
The user account is determined by the `APIFY_TOKEN` environment variable.
@@ -607,7 +615,9 @@ async def abort(
607615
if status_message:
608616
await client.run(run_id).update(status_message=status_message)
609617

610-
return await client.run(run_id).abort(gracefully=gracefully)
618+
api_result = await client.run(run_id).abort(gracefully=gracefully)
619+
620+
return ActorRun.model_validate(api_result)
611621

612622
async def call(
613623
self,
@@ -619,9 +629,9 @@ async def call(
619629
build: str | None = None,
620630
memory_mbytes: int | None = None,
621631
timeout: timedelta | None = None,
622-
webhooks: list[dict] | None = None,
632+
webhooks: list[Webhook] | None = None,
623633
wait: timedelta | None = None,
624-
) -> dict | None:
634+
) -> ActorRun | None:
625635
"""Start an Actor on the Apify Platform and wait for it to finish before returning.
626636
627637
It waits indefinitely, unless the wait argument is provided.
@@ -650,16 +660,25 @@ async def call(
650660

651661
client = self.new_client(token=token) if token else self._apify_client
652662

653-
return await client.actor(actor_id).call(
663+
if webhooks:
664+
serialized_webhooks = [
665+
hook.model_dump(by_alias=True, exclude_unset=True, exclude_defaults=True) for hook in webhooks
666+
]
667+
else:
668+
serialized_webhooks = None
669+
670+
api_result = await client.actor(actor_id).call(
654671
run_input=run_input,
655672
content_type=content_type,
656673
build=build,
657674
memory_mbytes=memory_mbytes,
658675
timeout_secs=int(timeout.total_seconds()) if timeout is not None else None,
659-
webhooks=webhooks,
676+
webhooks=serialized_webhooks,
660677
wait_secs=int(wait.total_seconds()) if wait is not None else None,
661678
)
662679

680+
return ActorRun.model_validate(api_result)
681+
663682
async def call_task(
664683
self,
665684
task_id: str,
@@ -668,10 +687,10 @@ async def call_task(
668687
build: str | None = None,
669688
memory_mbytes: int | None = None,
670689
timeout: timedelta | None = None,
671-
webhooks: list[dict] | None = None,
690+
webhooks: list[Webhook] | None = None,
672691
wait: timedelta | None = None,
673692
token: str | None = None,
674-
) -> dict | None:
693+
) -> ActorRun | None:
675694
"""Start an Actor task on the Apify Platform and wait for it to finish before returning.
676695
677696
It waits indefinitely, unless the wait argument is provided.
@@ -703,15 +722,24 @@ async def call_task(
703722

704723
client = self.new_client(token=token) if token else self._apify_client
705724

706-
return await client.task(task_id).call(
725+
if webhooks:
726+
serialized_webhooks = [
727+
hook.model_dump(by_alias=True, exclude_unset=True, exclude_defaults=True) for hook in webhooks
728+
]
729+
else:
730+
serialized_webhooks = None
731+
732+
api_result = await client.task(task_id).call(
707733
task_input=task_input,
708734
build=build,
709735
memory_mbytes=memory_mbytes,
710736
timeout_secs=int(timeout.total_seconds()) if timeout is not None else None,
711-
webhooks=webhooks,
737+
webhooks=serialized_webhooks,
712738
wait_secs=int(wait.total_seconds()) if wait is not None else None,
713739
)
714740

741+
return ActorRun.model_validate(api_result)
742+
715743
async def metamorph(
716744
self,
717745
target_actor_id: str,
@@ -796,14 +824,12 @@ async def reboot(
796824

797825
async def add_webhook(
798826
self,
827+
webhook: Webhook,
799828
*,
800-
event_types: list[WebhookEventType],
801-
request_url: str,
802-
payload_template: str | None = None,
803829
ignore_ssl_errors: bool | None = None,
804830
do_not_retry: bool | None = None,
805831
idempotency_key: str | None = None,
806-
) -> dict | None:
832+
) -> None:
807833
"""Create an ad-hoc webhook for the current Actor run.
808834
809835
This webhook lets you receive a notification when the Actor run finished or failed.
@@ -814,9 +840,7 @@ async def add_webhook(
814840
For more information about Apify Actor webhooks, please see the [documentation](https://docs.apify.com/webhooks).
815841
816842
Args:
817-
event_types: List of event types that should trigger the webhook. At least one is required.
818-
request_url: URL that will be invoked once the webhook is triggered.
819-
payload_template: Specification of the payload that will be sent to request_url
843+
webhook: The webhook to be added
820844
ignore_ssl_errors: Whether the webhook should ignore SSL errors returned by request_url
821845
do_not_retry: Whether the webhook should retry sending the payload to request_url upon failure.
822846
idempotency_key: A unique identifier of a webhook. You can use it to ensure that you won't create
@@ -829,17 +853,17 @@ async def add_webhook(
829853

830854
if not self.is_at_home():
831855
self.log.error('Actor.add_webhook() is only supported when running on the Apify platform.')
832-
return None
856+
return
833857

834858
# If is_at_home() is True, config.actor_run_id is always set
835859
if not self._configuration.actor_run_id:
836860
raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')
837861

838-
return await self._apify_client.webhooks().create(
862+
await self._apify_client.webhooks().create(
839863
actor_run_id=self._configuration.actor_run_id,
840-
event_types=event_types,
841-
request_url=request_url,
842-
payload_template=payload_template,
864+
event_types=webhook.event_types,
865+
request_url=webhook.request_url,
866+
payload_template=webhook.payload_template,
843867
ignore_ssl_errors=ignore_ssl_errors,
844868
do_not_retry=do_not_retry,
845869
idempotency_key=idempotency_key,
@@ -850,7 +874,7 @@ async def set_status_message(
850874
status_message: str,
851875
*,
852876
is_terminal: bool | None = None,
853-
) -> dict | None:
877+
) -> ActorRun | None:
854878
"""Set the status message for the current Actor run.
855879
856880
Args:
@@ -871,10 +895,12 @@ async def set_status_message(
871895
if not self._configuration.actor_run_id:
872896
raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')
873897

874-
return await self._apify_client.run(self._configuration.actor_run_id).update(
898+
api_result = await self._apify_client.run(self._configuration.actor_run_id).update(
875899
status_message=status_message, is_status_message_terminal=is_terminal
876900
)
877901

902+
return ActorRun.model_validate(api_result)
903+
878904
async def create_proxy_configuration(
879905
self,
880906
*,

src/apify/_models.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
# ruff: noqa: TCH001 TCH002 TCH003 (Pydantic)
2+
from __future__ import annotations
3+
4+
from datetime import datetime, timedelta
5+
from typing import Annotated
6+
7+
from pydantic import BaseModel, BeforeValidator, ConfigDict, Field
8+
9+
from apify_shared.consts import ActorJobStatus, MetaOrigin, WebhookEventType
10+
from crawlee._utils.models import timedelta_ms
11+
from crawlee._utils.urls import validate_http_url
12+
13+
14+
class Webhook(BaseModel):
15+
__model_config__ = ConfigDict(populate_by_name=True)
16+
17+
event_types: Annotated[
18+
list[WebhookEventType],
19+
Field(description='Event types that should trigger the webhook'),
20+
]
21+
request_url: Annotated[
22+
str,
23+
Field(description='URL that the webhook should call'),
24+
BeforeValidator(validate_http_url),
25+
]
26+
payload_template: Annotated[
27+
str | None,
28+
Field(description='Template for the payload sent by the webook'),
29+
] = None
30+
31+
32+
class ActorRunMeta(BaseModel):
33+
__model_config__ = ConfigDict(populate_by_name=True)
34+
35+
origin: Annotated[MetaOrigin, Field()]
36+
37+
38+
class ActorRunStats(BaseModel):
39+
__model_config__ = ConfigDict(populate_by_name=True)
40+
41+
input_body_len: Annotated[int, Field(alias='inputBodyLen')]
42+
restart_count: Annotated[int, Field(alias='restartCount')]
43+
resurrect_count: Annotated[int, Field(alias='resurrectCount')]
44+
mem_avg_bytes: Annotated[float | None, Field(alias='memAvgBytes')] = None
45+
mem_max_bytes: Annotated[int | None, Field(alias='memMaxBytes')] = None
46+
mem_current_bytes: Annotated[int | None, Field(alias='memCurrentBytes')] = None
47+
cpu_avg_usage: Annotated[float | None, Field(alias='cpuAvgUsage')] = None
48+
cpu_max_usage: Annotated[float | None, Field(alias='cpuMaxUsage')] = None
49+
cpu_current_usage: Annotated[float | None, Field(alias='cpuCurrentUsage')] = None
50+
net_rx_bytes: Annotated[int | None, Field(alias='netRxBytes')] = None
51+
net_tx_bytes: Annotated[int | None, Field(alias='netTxBytes')] = None
52+
duration: Annotated[timedelta_ms | None, Field(alias='durationMillis')] = None
53+
run_time: Annotated[timedelta | None, Field(alias='runTimeSecs')] = None
54+
metamorph: Annotated[int | None, Field(alias='metamorph')] = None
55+
compute_units: Annotated[float, Field(alias='computeUnits')]
56+
57+
58+
class ActorRunOptions(BaseModel):
59+
__model_config__ = ConfigDict(populate_by_name=True)
60+
61+
build: str
62+
timeout: Annotated[timedelta, Field(alias='timeoutSecs')]
63+
memory_mbytes: Annotated[int, Field(alias='memoryMbytes')]
64+
disk_mbytes: Annotated[int, Field(alias='diskMbytes')]
65+
66+
67+
class ActorRunUsage(BaseModel):
68+
__model_config__ = ConfigDict(populate_by_name=True)
69+
70+
actor_compute_units: Annotated[float | None, Field(alias='ACTOR_COMPUTE_UNITS')] = None
71+
dataset_reads: Annotated[float | None, Field(alias='DATASET_READS')] = None
72+
dataset_writes: Annotated[float | None, Field(alias='DATASET_WRITES')] = None
73+
key_value_store_reads: Annotated[float | None, Field(alias='KEY_VALUE_STORE_READS')] = None
74+
key_value_store_writes: Annotated[float | None, Field(alias='KEY_VALUE_STORE_WRITES')] = None
75+
key_value_store_lists: Annotated[float | None, Field(alias='KEY_VALUE_STORE_LISTS')] = None
76+
request_queue_reads: Annotated[float | None, Field(alias='REQUEST_QUEUE_READS')] = None
77+
request_queue_writes: Annotated[float | None, Field(alias='REQUEST_QUEUE_WRITES')] = None
78+
data_transfer_internal_gbytes: Annotated[float | None, Field(alias='DATA_TRANSFER_INTERNAL_GBYTES')] = None
79+
data_transfer_external_gbytes: Annotated[float | None, Field(alias='DATA_TRANSFER_EXTERNAL_GBYTES')] = None
80+
proxy_residential_transfer_gbytes: Annotated[float | None, Field(alias='PROXY_RESIDENTIAL_TRANSFER_GBYTES')] = None
81+
proxy_serps: Annotated[float | None, Field(alias='PROXY_SERPS')] = None
82+
83+
84+
class ActorRun(BaseModel):
85+
__model_config__ = ConfigDict(populate_by_name=True)
86+
87+
id: Annotated[str, Field(alias='id')]
88+
act_id: Annotated[str, Field(alias='actId')]
89+
user_id: Annotated[str, Field(alias='userId')]
90+
actor_task_id: Annotated[str | None, Field(alias='actorTaskId')] = None
91+
started_at: Annotated[datetime, Field(alias='startedAt')]
92+
finished_at: Annotated[datetime | None, Field(alias='finishedAt')] = None
93+
status: Annotated[ActorJobStatus, Field(alias='status')]
94+
status_message: Annotated[str | None, Field(alias='statusMessage')] = None
95+
is_status_message_terminal: Annotated[bool | None, Field(alias='isStatusMessageTerminal')] = None
96+
meta: Annotated[ActorRunMeta, Field(alias='meta')]
97+
stats: Annotated[ActorRunStats, Field(alias='stats')]
98+
options: Annotated[ActorRunOptions, Field(alias='options')]
99+
build_id: Annotated[str, Field(alias='buildId')]
100+
exit_code: Annotated[int | None, Field(alias='exitCode')] = None
101+
default_key_value_store_id: Annotated[str, Field(alias='defaultKeyValueStoreId')]
102+
default_dataset_id: Annotated[str, Field(alias='defaultDatasetId')]
103+
default_request_queue_id: Annotated[str, Field(alias='defaultRequestQueueId')]
104+
build_number: Annotated[str | None, Field(alias='buildNumber')] = None
105+
container_url: Annotated[str, Field(alias='containerUrl')]
106+
is_container_server_ready: Annotated[bool | None, Field(alias='isContainerServerReady')] = None
107+
git_branch_name: Annotated[str | None, Field(alias='gitBranchName')] = None
108+
usage: Annotated[ActorRunUsage | None, Field(alias='usage')] = None
109+
usage_total_usd: Annotated[float | None, Field(alias='usageTotalUsd')] = None
110+
usage_usd: Annotated[ActorRunUsage | None, Field(alias='usageUsd')] = None

0 commit comments

Comments
 (0)