diff --git a/src/apify/_actor.py b/src/apify/_actor.py index e69e213b..8f3c3c51 100644 --- a/src/apify/_actor.py +++ b/src/apify/_actor.py @@ -13,7 +13,7 @@ from apify_client import ApifyClientAsync from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars -from apify_shared.utils import ignore_docs, maybe_extract_enum_member_value +from apify_shared.utils import maybe_extract_enum_member_value from crawlee import service_locator from crawlee.events import ( Event, @@ -54,9 +54,46 @@ @docs_name('Actor') -@docs_group('Classes') +@docs_group('Actor') class _ActorType: - """The class of `Actor`. Only make a new instance if you're absolutely sure you need to.""" + """The core class for building Actors on the Apify platform. + + Actors are serverless programs running in the cloud that can perform anything from simple actions + (such as filling out a web form or sending an email) to complex operations (such as crawling an + entire website or removing duplicates from a large dataset). They are packaged as Docker containers + which accept well-defined JSON input, perform an action, and optionally produce well-defined output. + + ### References + + - Apify platform documentation: https://docs.apify.com/platform/actors + - Actor whitepaper: https://whitepaper.actor/ + + ### Usage + + ```python + import asyncio + + import httpx + from apify import Actor + from bs4 import BeautifulSoup + + + async def main() -> None: + async with Actor: + actor_input = await Actor.get_input() + async with httpx.AsyncClient() as client: + response = await client.get(actor_input['url']) + soup = BeautifulSoup(response.content, 'html.parser') + data = { + 'url': actor_input['url'], + 'title': soup.title.string if soup.title else None, + } + await Actor.push_data(data) + + if __name__ == '__main__': + asyncio.run(main()) + ``` + """ _is_rebooting = False _is_any_instance_initialized = False @@ -108,7 +145,6 @@ def __init__( self._is_initialized = False - @ignore_docs async def __aenter__(self) -> Self: """Initialize the Actor. @@ -120,7 +156,6 @@ async def __aenter__(self) -> Self: await self.init() return self - @ignore_docs async def __aexit__( self, _exc_type: type[BaseException] | None, diff --git a/src/apify/_charging.py b/src/apify/_charging.py index 3aee2777..65cce951 100644 --- a/src/apify/_charging.py +++ b/src/apify/_charging.py @@ -8,7 +8,6 @@ from pydantic import TypeAdapter -from apify_shared.utils import ignore_docs from crawlee._utils.context import ensure_context from apify._models import ActorRun, PricingModel @@ -26,9 +25,18 @@ run_validator = TypeAdapter[ActorRun | None](ActorRun | None) -@docs_group('Interfaces') +@docs_group('Charging') class ChargingManager(Protocol): - """Provides fine-grained access to pay-per-event functionality.""" + """Provides fine-grained access to pay-per-event functionality. + + The ChargingManager allows you to charge for specific events in your Actor when using + the pay-per-event pricing model. This enables precise cost control and transparent + billing for different operations within your Actor. + + ### References + + - Apify platform documentation: https://docs.apify.com/platform/actors/publishing/monetize + """ async def charge(self, event_name: str, count: int = 1) -> ChargeResult: """Charge for a specified number of events - sub-operations of the Actor. @@ -57,7 +65,7 @@ def get_pricing_info(self) -> ActorPricingInfo: """ -@docs_group('Data structures') +@docs_group('Charging') @dataclass(frozen=True) class ChargeResult: """Result of the `ChargingManager.charge` method.""" @@ -72,7 +80,7 @@ class ChargeResult: """How many events of each known type can still be charged within the limit.""" -@docs_group('Data structures') +@docs_group('Charging') @dataclass class ActorPricingInfo: """Result of the `ChargingManager.get_pricing_info` method.""" @@ -90,7 +98,6 @@ class ActorPricingInfo: """Price of every known event type.""" -@ignore_docs class ChargingManagerImplementation(ChargingManager): """Implementation of the `ChargingManager` Protocol - this is only meant to be instantiated internally.""" diff --git a/src/apify/_configuration.py b/src/apify/_configuration.py index 4e12304c..91d6954d 100644 --- a/src/apify/_configuration.py +++ b/src/apify/_configuration.py @@ -25,7 +25,7 @@ def _transform_to_list(value: Any) -> list[str] | None: return value if isinstance(value, list) else str(value).split(',') -@docs_group('Classes') +@docs_group('Configuration') class Configuration(CrawleeConfiguration): """A class for specifying the configuration of an Actor. diff --git a/src/apify/_crypto.py b/src/apify/_crypto.py index fe8fb23c..c7c2bc16 100644 --- a/src/apify/_crypto.py +++ b/src/apify/_crypto.py @@ -12,7 +12,6 @@ from cryptography.hazmat.primitives.asymmetric import padding, rsa from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes -from apify_shared.utils import ignore_docs from crawlee._utils.crypto import crypto_random_object_id from apify._consts import ENCRYPTED_INPUT_VALUE_REGEXP, ENCRYPTED_JSON_VALUE_PREFIX, ENCRYPTED_STRING_VALUE_PREFIX @@ -22,7 +21,6 @@ ENCRYPTION_AUTH_TAG_LENGTH = 16 -@ignore_docs def public_encrypt(value: str, *, public_key: rsa.RSAPublicKey) -> dict: """Encrypts the given value using AES cipher and the password for encryption using the public key. @@ -66,7 +64,6 @@ def public_encrypt(value: str, *, public_key: rsa.RSAPublicKey) -> dict: } -@ignore_docs def private_decrypt( encrypted_password: str, encrypted_value: str, @@ -118,7 +115,6 @@ def private_decrypt( return decipher_bytes.decode('utf-8') -@ignore_docs def load_private_key(private_key_file_base64: str, private_key_password: str) -> rsa.RSAPrivateKey: private_key = serialization.load_pem_private_key( base64.b64decode(private_key_file_base64.encode('utf-8')), @@ -138,7 +134,6 @@ def _load_public_key(public_key_file_base64: str) -> rsa.RSAPublicKey: return public_key -@ignore_docs def decrypt_input_secrets(private_key: rsa.RSAPrivateKey, input_data: Any) -> Any: """Decrypt input secrets.""" if not isinstance(input_data, dict): @@ -180,7 +175,6 @@ def encode_base62(num: int) -> str: return res -@ignore_docs def create_hmac_signature(secret_key: str, message: str) -> str: """Generate an HMAC signature and encodes it using Base62. Base62 encoding reduces the signature length. diff --git a/src/apify/_models.py b/src/apify/_models.py index 5898a3ee..6dd72dca 100644 --- a/src/apify/_models.py +++ b/src/apify/_models.py @@ -16,7 +16,7 @@ from typing import TypeAlias -@docs_group('Data structures') +@docs_group('Actor') class Webhook(BaseModel): __model_config__ = ConfigDict(populate_by_name=True) @@ -35,14 +35,14 @@ class Webhook(BaseModel): ] = None -@docs_group('Data structures') +@docs_group('Actor') class ActorRunMeta(BaseModel): __model_config__ = ConfigDict(populate_by_name=True) origin: Annotated[MetaOrigin, Field()] -@docs_group('Data structures') +@docs_group('Actor') class ActorRunStats(BaseModel): __model_config__ = ConfigDict(populate_by_name=True) @@ -63,7 +63,7 @@ class ActorRunStats(BaseModel): compute_units: Annotated[float, Field(alias='computeUnits')] -@docs_group('Data structures') +@docs_group('Actor') class ActorRunOptions(BaseModel): __model_config__ = ConfigDict(populate_by_name=True) @@ -74,7 +74,7 @@ class ActorRunOptions(BaseModel): max_total_charge_usd: Annotated[Decimal | None, Field(alias='maxTotalChargeUsd')] = None -@docs_group('Data structures') +@docs_group('Actor') class ActorRunUsage(BaseModel): __model_config__ = ConfigDict(populate_by_name=True) @@ -92,7 +92,7 @@ class ActorRunUsage(BaseModel): proxy_serps: Annotated[float | None, Field(alias='PROXY_SERPS')] = None -@docs_group('Data structures') +@docs_group('Actor') class ActorRun(BaseModel): __model_config__ = ConfigDict(populate_by_name=True) diff --git a/src/apify/_platform_event_manager.py b/src/apify/_platform_event_manager.py index 65540a85..41d9379e 100644 --- a/src/apify/_platform_event_manager.py +++ b/src/apify/_platform_event_manager.py @@ -27,17 +27,10 @@ from apify._configuration import Configuration - __all__ = ['EventManager', 'LocalEventManager', 'PlatformEventManager'] -@docs_group('Data structures') -class PersistStateEvent(BaseModel): - name: Literal[Event.PERSIST_STATE] - data: Annotated[EventPersistStateData, Field(default_factory=lambda: EventPersistStateData(is_migrating=False))] - - -@docs_group('Data structures') +@docs_group('Event data') class SystemInfoEventData(BaseModel): mem_avg_bytes: Annotated[float, Field(alias='memAvgBytes')] mem_current_bytes: Annotated[float, Field(alias='memCurrentBytes')] @@ -64,31 +57,37 @@ def to_crawlee_format(self, dedicated_cpus: float) -> EventSystemInfoData: ) -@docs_group('Data structures') +@docs_group('Events') +class PersistStateEvent(BaseModel): + name: Literal[Event.PERSIST_STATE] + data: Annotated[EventPersistStateData, Field(default_factory=lambda: EventPersistStateData(is_migrating=False))] + + +@docs_group('Events') class SystemInfoEvent(BaseModel): name: Literal[Event.SYSTEM_INFO] data: SystemInfoEventData -@docs_group('Data structures') +@docs_group('Events') class MigratingEvent(BaseModel): name: Literal[Event.MIGRATING] data: Annotated[EventMigratingData, Field(default_factory=EventMigratingData)] -@docs_group('Data structures') +@docs_group('Events') class AbortingEvent(BaseModel): name: Literal[Event.ABORTING] data: Annotated[EventAbortingData, Field(default_factory=EventAbortingData)] -@docs_group('Data structures') +@docs_group('Events') class ExitEvent(BaseModel): name: Literal[Event.EXIT] data: Annotated[EventExitData, Field(default_factory=EventExitData)] -@docs_group('Data structures') +@docs_group('Events') class EventWithoutData(BaseModel): name: Literal[ Event.SESSION_RETIRED, @@ -101,13 +100,13 @@ class EventWithoutData(BaseModel): data: Any = None -@docs_group('Data structures') +@docs_group('Events') class DeprecatedEvent(BaseModel): name: Literal['cpuInfo'] data: Annotated[dict[str, Any], Field(default_factory=dict)] -@docs_group('Data structures') +@docs_group('Events') class UnknownEvent(BaseModel): name: str data: Annotated[dict[str, Any], Field(default_factory=dict)] @@ -120,7 +119,7 @@ class UnknownEvent(BaseModel): ) -@docs_group('Classes') +@docs_group('Event managers') class PlatformEventManager(EventManager): """A class for managing Actor events. diff --git a/src/apify/_proxy_configuration.py b/src/apify/_proxy_configuration.py index 1d5b9f72..37ec01ca 100644 --- a/src/apify/_proxy_configuration.py +++ b/src/apify/_proxy_configuration.py @@ -10,7 +10,6 @@ import httpx from apify_shared.consts import ApifyEnvVars -from apify_shared.utils import ignore_docs from crawlee.proxy_configuration import ProxyConfiguration as CrawleeProxyConfiguration from crawlee.proxy_configuration import ProxyInfo as CrawleeProxyInfo from crawlee.proxy_configuration import _NewUrlFunction @@ -28,7 +27,6 @@ SESSION_ID_MAX_LENGTH = 50 -@ignore_docs def is_url(url: str) -> bool: """Check if the given string is a valid URL.""" try: @@ -69,7 +67,7 @@ def _check( raise ValueError(f'{error_str} does not match pattern {pattern.pattern!r}') -@docs_group('Classes') +@docs_group('Configuration') @dataclass class ProxyInfo(CrawleeProxyInfo): """Provides information about a proxy connection that is used for requests.""" @@ -89,7 +87,7 @@ class ProxyInfo(CrawleeProxyInfo): """ -@docs_group('Classes') +@docs_group('Configuration') class ProxyConfiguration(CrawleeProxyConfiguration): """Configures a connection to a proxy server with the provided options. @@ -104,7 +102,6 @@ class ProxyConfiguration(CrawleeProxyConfiguration): _configuration: Configuration - @ignore_docs def __init__( self, *, diff --git a/src/apify/_utils.py b/src/apify/_utils.py index 8686d5c1..6c768e0c 100644 --- a/src/apify/_utils.py +++ b/src/apify/_utils.py @@ -30,7 +30,19 @@ def is_running_in_ipython() -> bool: return getattr(builtins, '__IPYTHON__', False) -GroupName = Literal['Classes', 'Abstract classes', 'Interfaces', 'Data structures', 'Errors', 'Functions'] +# The order of the rendered API groups is defined in the website/docusaurus.config.js file. +GroupName = Literal[ + 'Actor', + 'Charging', + 'Configuration', + 'Event data', + 'Event managers', + 'Events', + 'Request loaders', + 'Storage clients', + 'Storage data', + 'Storages', +] def docs_group(group_name: GroupName) -> Callable: # noqa: ARG001 diff --git a/src/apify/apify_storage_client/_apify_storage_client.py b/src/apify/apify_storage_client/_apify_storage_client.py index 51e3fc24..0a544d58 100644 --- a/src/apify/apify_storage_client/_apify_storage_client.py +++ b/src/apify/apify_storage_client/_apify_storage_client.py @@ -20,7 +20,7 @@ from apify._configuration import Configuration -@docs_group('Classes') +@docs_group('Storage clients') class ApifyStorageClient(StorageClient): """A storage client implementation based on the Apify platform storage.""" diff --git a/src/apify/log.py b/src/apify/log.py index 970a37a6..d5440345 100644 --- a/src/apify/log.py +++ b/src/apify/log.py @@ -2,7 +2,6 @@ import logging -from apify_shared.utils import ignore_docs from crawlee._log_config import CrawleeLogFormatter, configure_logger, get_configured_log_level # Name of the logger used throughout the library (resolves to 'apify') @@ -12,7 +11,6 @@ logger = logging.getLogger(logger_name) -@ignore_docs class ActorLogFormatter(CrawleeLogFormatter): # noqa: D101 (Inherited from parent class) pass diff --git a/src/apify/storages/_request_list.py b/src/apify/storages/_request_list.py index cbc56dfb..28994041 100644 --- a/src/apify/storages/_request_list.py +++ b/src/apify/storages/_request_list.py @@ -38,7 +38,7 @@ class _SimpleUrlInput(_RequestDetails): url_input_adapter = TypeAdapter(list[_RequestsFromUrlInput | _SimpleUrlInput]) -@docs_group('Classes') +@docs_group('Request loaders') class RequestList(CrawleeRequestList): """Extends crawlee RequestList. diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js index 461be0ab..e15af975 100644 --- a/website/docusaurus.config.js +++ b/website/docusaurus.config.js @@ -5,8 +5,16 @@ const { config } = require('@apify/docs-theme'); const { externalLinkProcessor } = require('./tools/utils/externalLink'); const GROUP_ORDER = [ - 'Classes', - 'Data structures', + 'Actor', + 'Charging', + 'Configuration', + 'Event data', + 'Event managers', + 'Events', + 'Request loaders', + 'Storage clients', + 'Storage data', + 'Storages', ]; const groupSort = (g1, g2) => { @@ -112,21 +120,132 @@ module.exports = { moduleShortcutsPath: path.join(__dirname, '/module_shortcuts.json'), }, reexports: [ + // Storages + { + url: 'https://crawlee.dev/python/api/class/Storage', + group: 'Storages', + }, { url: 'https://crawlee.dev/python/api/class/Dataset', - group: 'Classes', + group: 'Storages', }, { url: 'https://crawlee.dev/python/api/class/KeyValueStore', - group: 'Classes', + group: 'Storages', }, { url: 'https://crawlee.dev/python/api/class/RequestQueue', - group: 'Classes', + group: 'Storages', + }, + // Storage data + { + url: 'https://crawlee.dev/python/api/class/AddRequestsResponse', + group: 'Storage data', + }, + { + url: 'https://crawlee.dev/python/api/class/DatasetItemsListPage', + group: 'Storage data', + }, + { + url: 'https://crawlee.dev/python/api/class/DatasetMetadata', + group: 'Storage data', + }, + { + url: 'https://crawlee.dev/python/api/class/KeyValueStoreMetadata', + group: 'Storage data', + }, + { + url: 'https://crawlee.dev/python/api/class/KeyValueStoreRecord', + group: 'Storage data', + }, + { + url: 'https://crawlee.dev/python/api/class/KeyValueStoreRecordMetadata', + group: 'Storage data', + }, + { + url: 'https://crawlee.dev/python/api/class/ProcessedRequest', + group: 'Storage data', }, { url: 'https://crawlee.dev/python/api/class/Request', - group: 'Classes', + group: 'Storage data', + }, + { + url: 'https://crawlee.dev/python/api/class/RequestQueueMetadata', + group: 'Storage data', + }, + { + url: 'https://crawlee.dev/python/api/class/StorageMetadata', + group: 'Storage data', + }, + { + url: 'https://crawlee.dev/python/api/class/ReqUnprocessedRequestuest', + group: 'Storage data', + }, + // Event managers + { + url: 'https://crawlee.dev/python/api/class/EventManager', + group: 'Event managers', + }, + { + url: 'https://crawlee.dev/python/api/class/LocalEventManager', + group: 'Event managers', + }, + // Events + { + url: 'https://crawlee.dev/python/api/enum/Event', + group: 'Events', + }, + // Event data + { + url: 'https://crawlee.dev/python/api/class/EventAbortingData', + group: 'Event data', + }, + { + url: 'https://crawlee.dev/python/api/class/EventExitData', + group: 'Event data', + }, + { + url: 'https://crawlee.dev/python/api/class/EventMigratingData', + group: 'Event data', + }, + { + url: 'https://crawlee.dev/python/api/class/EventPersistStateData', + group: 'Event data', + }, + { + url: 'https://crawlee.dev/python/api/class/EventSystemInfoData', + group: 'Event data', + }, + // Storage clients + { + url: 'https://crawlee.dev/python/api/class/StorageClient', + group: 'Storage clients', + }, + { + url: 'https://crawlee.dev/python/api/class/MemoryStorageClient', + group: 'Storage clients', + }, + { + url: 'https://crawlee.dev/python/api/class/FileSystemStorageClient', + group: 'Storage clients', + }, + // Request loaders + { + url: 'https://crawlee.dev/python/api/class/RequestLoader', + group: 'Request loaders', + }, + { + url: 'https://crawlee.dev/python/api/class/RequestManager', + group: 'Request loaders', + }, + { + url: 'https://crawlee.dev/python/api/class/RequestManagerTandem', + group: 'Request loaders', + }, + { + url: 'https://crawlee.dev/python/api/class/SitemapRequestLoader', + group: 'Request loaders', }, ], },