diff --git a/.github/workflows/build_and_deploy_docs.yaml b/.github/workflows/build_and_deploy_docs.yaml
index acbc2963..efea84f4 100644
--- a/.github/workflows/build_and_deploy_docs.yaml
+++ b/.github/workflows/build_and_deploy_docs.yaml
@@ -67,6 +67,8 @@ jobs:
- name: Build Docusaurus docs
run: make build-docs
+ env:
+ APIFY_SIGNING_TOKEN: ${{ secrets.APIFY_SIGNING_TOKEN }}
- name: Set up GitHub Pages
uses: actions/configure-pages@v5
diff --git a/.github/workflows/run_code_checks.yaml b/.github/workflows/run_code_checks.yaml
index 4323b479..6b1b3d69 100644
--- a/.github/workflows/run_code_checks.yaml
+++ b/.github/workflows/run_code_checks.yaml
@@ -33,6 +33,7 @@ jobs:
docs_check:
name: Docs check
uses: apify/workflows/.github/workflows/python_docs_check.yaml@main
+ secrets: inherit
integration_tests:
name: Integration tests
diff --git a/docs/01_overview/01_introduction.mdx b/docs/01_overview/01_introduction.mdx
index 33433f21..7e5bc56e 100644
--- a/docs/01_overview/01_introduction.mdx
+++ b/docs/01_overview/01_introduction.mdx
@@ -3,15 +3,15 @@ id: introduction
title: Introduction
---
-import CodeBlock from '@theme/CodeBlock';
+import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
-import IntroductionExample from '!!raw-loader!./code/01_introduction.py';
+import IntroductionExample from '!!raw-loader!roa-loader!./code/01_introduction.py';
The Apify SDK for Python is the official library for creating [Apify Actors](https://docs.apify.com/platform/actors) using Python.
-
+
{IntroductionExample}
-
+
## What are Actors?
diff --git a/docs/01_overview/03_actor_structure.mdx b/docs/01_overview/03_actor_structure.mdx
index f59b645d..1cff2661 100644
--- a/docs/01_overview/03_actor_structure.mdx
+++ b/docs/01_overview/03_actor_structure.mdx
@@ -3,12 +3,12 @@ id: actor-structure
title: Actor structure
---
-import CodeBlock from '@theme/CodeBlock';
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
+import CodeBlock from '@theme/CodeBlock';
-import UnderscoreMainExample from '!!raw-loader!./code/actor_structure/main.py';
-import MainExample from '!!raw-loader!./code/actor_structure/__main__.py';
+import UnderscoreMainExample from '!!raw-loader!./code/actor_structure/__main__.py';
+import MainExample from '!!raw-loader!./code/actor_structure/main.py';
All Python Actor templates follow the same structure.
@@ -20,14 +20,14 @@ which follows the [standard requirements file format](https://pip.pypa.io/en/sta
The Actor's source code is in the `src/` folder. This folder contains two important files: `main.py`, which contains the main function of the Actor, and `__main__.py`, which is the entrypoint of the Actor package, setting up the Actor [logger](../concepts/logging) and executing the Actor's main function via [`asyncio.run`](https://docs.python.org/3/library/asyncio-runner.html#asyncio.run).
-
+
- {MainExample}
+ {UnderscoreMainExample}
-
+
- {UnderscoreMainExample}
+ {MainExample}
diff --git a/docs/01_overview/code/01_introduction.py b/docs/01_overview/code/01_introduction.py
index a3eaba25..3c875170 100644
--- a/docs/01_overview/code/01_introduction.py
+++ b/docs/01_overview/code/01_introduction.py
@@ -1,3 +1,5 @@
+import asyncio
+
import httpx
from bs4 import BeautifulSoup
@@ -6,12 +8,17 @@
async def main() -> None:
async with Actor:
- actor_input = await Actor.get_input()
+ actor_input = await Actor.get_input() or {}
+ url = actor_input.get('url', 'https://apify.com')
async with httpx.AsyncClient() as client:
- response = await client.get(actor_input['url'])
+ response = await client.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
data = {
- 'url': actor_input['url'],
+ 'url': url,
'title': soup.title.string if soup.title else None,
}
await Actor.push_data(data)
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/01_actor_lifecycle.mdx b/docs/02_concepts/01_actor_lifecycle.mdx
index be96e7d4..bf07cf5d 100644
--- a/docs/02_concepts/01_actor_lifecycle.mdx
+++ b/docs/02_concepts/01_actor_lifecycle.mdx
@@ -3,21 +3,21 @@ id: actor-lifecycle
title: Actor lifecycle
---
-import CodeBlock from '@theme/CodeBlock';
+import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
-import ClassContextExample from '!!raw-loader!./code/01_class_context.py';
-import ClassManualExample from '!!raw-loader!./code/01_class_manual.py';
-import InstanceContextExample from '!!raw-loader!./code/01_instance_context.py';
-import InstanceManualExample from '!!raw-loader!./code/01_instance_manual.py';
+import ClassContextExample from '!!raw-loader!roa-loader!./code/01_class_context.py';
+import ClassManualExample from '!!raw-loader!roa-loader!./code/01_class_manual.py';
+import InstanceContextExample from '!!raw-loader!roa-loader!./code/01_instance_context.py';
+import InstanceManualExample from '!!raw-loader!roa-loader!./code/01_instance_manual.py';
-import ErrorHandlingContextExample from '!!raw-loader!./code/01_error_handling_context.py';
-import ErrorHandlingManualExample from '!!raw-loader!./code/01_error_handling_manual.py';
+import ErrorHandlingContextExample from '!!raw-loader!roa-loader!./code/01_error_handling_context.py';
+import ErrorHandlingManualExample from '!!raw-loader!roa-loader!./code/01_error_handling_manual.py';
-import RebootExample from '!!raw-loader!./code/01_reboot.py';
+import RebootExample from '!!raw-loader!roa-loader!./code/01_reboot.py';
-import StatusMessageExample from '!!raw-loader!./code/01_status_message.py';
+import StatusMessageExample from '!!raw-loader!roa-loader!./code/01_status_message.py';
This guide explains how an **Apify Actor** starts, runs, and shuts down, describing the complete Actor lifecycle. For information about the core concepts such as Actors, the Apify Console, storages, and events, check out the [Apify platform documentation](https://docs.apify.com/platform).
@@ -31,14 +31,14 @@ When the Actor exits, either normally or due to an exception, the SDK performs a
-
+
{ClassContextExample}
-
+
-
+
{ClassManualExample}
-
+
@@ -46,14 +46,14 @@ You can also create an [`Actor`](https://docs.apify.com/sdk/python/reference/cla
-
+
{InstanceContextExample}
-
+
-
+
{InstanceManualExample}
-
+
@@ -72,11 +72,11 @@ Catch exceptions only when necessary - for example, to retry network timeouts or
Below is a minimal context-manager example where an unhandled exception automatically fails the run, followed by a manual pattern giving you more control.
-{ErrorHandlingContextExample}
+{ErrorHandlingContextExample}
If you need explicit control over exit codes or status messages, you can manage the Actor manually using [`Actor.init`](https://docs.apify.com/sdk/python/reference/class/Actor#init), [`Actor.exit`](https://docs.apify.com/sdk/python/reference/class/Actor#exit), and [`Actor.fail`](https://docs.apify.com/sdk/python/reference/class/Actor#fail).
-{ErrorHandlingManualExample}
+{ErrorHandlingManualExample}
## Reboot
@@ -84,7 +84,7 @@ Rebooting (available on the Apify platform only) instructs the platform worker t
Before triggering a reboot, persist any essential state externally (e.g., to the key-value store or dataset), as all in-memory data is lost after reboot. The example below tracks a reboot counter in the default key-value store and allows at most three restarts before exiting normally.
-{RebootExample}
+{RebootExample}
## Status message
@@ -94,7 +94,7 @@ Update the status only when the user's understanding of progress changes - avoid
The SDK optimizes updates by sending an API request only when the message text changes, so repeating the same message incurs no additional cost.
-{StatusMessageExample}
+{StatusMessageExample}
## Conclusion
diff --git a/docs/02_concepts/02_actor_input.mdx b/docs/02_concepts/02_actor_input.mdx
index ec68b849..3a4deabb 100644
--- a/docs/02_concepts/02_actor_input.mdx
+++ b/docs/02_concepts/02_actor_input.mdx
@@ -3,9 +3,9 @@ id: actor-input
title: Actor input
---
-import CodeBlock from '@theme/CodeBlock';
+import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
-import InputExample from '!!raw-loader!./code/02_input.py';
+import InputExample from '!!raw-loader!roa-loader!./code/02_input.py';
The Actor gets its [input](https://docs.apify.com/platform/actors/running/input) from the input record in its default [key-value store](https://docs.apify.com/platform/storage/key-value-store).
@@ -13,6 +13,6 @@ To access it, instead of reading the record manually, you can use the [`Actor.ge
For example, if an Actor received a JSON input with two fields, `{ "firstNumber": 1, "secondNumber": 2 }`, this is how you might process it:
-
+
{InputExample}
-
+
diff --git a/docs/02_concepts/03_storages.mdx b/docs/02_concepts/03_storages.mdx
index a56a54c1..193ce36f 100644
--- a/docs/02_concepts/03_storages.mdx
+++ b/docs/02_concepts/03_storages.mdx
@@ -3,16 +3,16 @@ id: storages
title: Working with storages
---
-import CodeBlock from '@theme/CodeBlock';
+import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
-import OpeningStoragesExample from '!!raw-loader!./code/03_opening_storages.py';
-import DeletingStoragesExample from '!!raw-loader!./code/03_deleting_storages.py';
-import DatasetReadWriteExample from '!!raw-loader!./code/03_dataset_read_write.py';
-import DatasetExportsExample from '!!raw-loader!./code/03_dataset_exports.py';
-import KvsReadWriteExample from '!!raw-loader!./code/03_kvs_read_write.py';
-import KvsIteratingExample from '!!raw-loader!./code/03_kvs_iterating.py';
-import KvsPublicRecordExample from '!!raw-loader!./code/03_kvs_public_url.py';
-import RqExample from '!!raw-loader!./code/03_rq.py';
+import OpeningStoragesExample from '!!raw-loader!roa-loader!./code/03_opening_storages.py';
+import DeletingStoragesExample from '!!raw-loader!roa-loader!./code/03_deleting_storages.py';
+import DatasetReadWriteExample from '!!raw-loader!roa-loader!./code/03_dataset_read_write.py';
+import DatasetExportsExample from '!!raw-loader!roa-loader!./code/03_dataset_exports.py';
+import KvsReadWriteExample from '!!raw-loader!roa-loader!./code/03_kvs_read_write.py';
+import KvsIteratingExample from '!!raw-loader!roa-loader!./code/03_kvs_iterating.py';
+import KvsPublicRecordExample from '!!raw-loader!roa-loader!./code/03_kvs_public_url.py';
+import RqExample from '!!raw-loader!roa-loader!./code/03_rq.py';
The `Actor` class provides methods to work either with the default storages of the Actor, or with any other storage, named or unnamed.
@@ -65,18 +65,18 @@ There are several methods for directly working with the default key-value store
The [`Actor.open_dataset`](../../reference/class/Actor#open_dataset), [`Actor.open_key_value_store`](../../reference/class/Actor#open_key_value_store) and [`Actor.open_request_queue`](../../reference/class/Actor#open_request_queue) methods can be used to open any storage for reading and writing. You can either use them without arguments to open the default storages, or you can pass a storage ID or name to open another storage.
-
+
{OpeningStoragesExample}
-
+
## Deleting storages
To delete a storage, you can use the [`Dataset.drop`](../../reference/class/Dataset#drop),
[`KeyValueStore.drop`](../../reference/class/KeyValueStore#drop) or [`RequestQueue.drop`](../../reference/class/RequestQueue#drop) methods.
-
+
{DeletingStoragesExample}
-
+
## Working with datasets
@@ -90,9 +90,9 @@ To read data from a dataset, you can use the [`Dataset.get_data`](../../referenc
To get an iterator of the data, you can use the [`Dataset.iterate_items`](../../reference/class/Dataset#iterate_items) method.
-
+
{DatasetReadWriteExample}
-
+
### Exporting items
@@ -100,9 +100,9 @@ You can also export the dataset items into a key-value store, as either a CSV or
using the [`Dataset.export_to_csv`](../../reference/class/Dataset#export_to_csv)
or [`Dataset.export_to_json`](../../reference/class/Dataset#export_to_json) method.
-
+
{DatasetExportsExample}
-
+
## Working with key-value stores
@@ -116,27 +116,27 @@ To write records into a key-value store, you can use the [`KeyValueStore.set_val
You can set the content type of a record with the `content_type` argument.
To delete a record, set its value to `None`.
-
+
{KvsReadWriteExample}
-
+
### Iterating keys
To get an iterator of the key-value store record keys,
you can use the [`KeyValueStore.iterate_keys`](../../reference/class/KeyValueStore#iterate_keys) method.
-
+
{KvsIteratingExample}
-
+
### Public URLs of records
To get a publicly accessible URL of a key-value store record,
you can use the [`KeyValueStore.get_public_url`](../../reference/class/KeyValueStore#get_public_url) method.
-
+
{KvsPublicRecordExample}
-
+
## Working with request queues
@@ -168,6 +168,6 @@ To check if all the requests in the queue are handled, you can use the [`Request
### Full example
-
+
{RqExample}
-
+
diff --git a/docs/02_concepts/04_actor_events.mdx b/docs/02_concepts/04_actor_events.mdx
index 1be6cc63..9ce7fd59 100644
--- a/docs/02_concepts/04_actor_events.mdx
+++ b/docs/02_concepts/04_actor_events.mdx
@@ -3,9 +3,9 @@ id: actor-events
title: Actor events & state persistence
---
-import CodeBlock from '@theme/CodeBlock';
+import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
-import ActorEventsExample from '!!raw-loader!./code/04_actor_events.py';
+import ActorEventsExample from '!!raw-loader!roa-loader!./code/04_actor_events.py';
During its runtime, the Actor receives Actor events sent by the Apify platform or generated by the Apify SDK itself.
@@ -76,6 +76,6 @@ During its runtime, the Actor receives Actor events sent by the Apify platform o
To add handlers to these events, you use the [`Actor.on`](../../reference/class/Actor#on) method,
and to remove them, you use the [`Actor.off`](../../reference/class/Actor#off) method.
-
+
{ActorEventsExample}
-
+
diff --git a/docs/02_concepts/05_proxy_management.mdx b/docs/02_concepts/05_proxy_management.mdx
index 1f15cfae..64420eee 100644
--- a/docs/02_concepts/05_proxy_management.mdx
+++ b/docs/02_concepts/05_proxy_management.mdx
@@ -3,15 +3,15 @@ id: proxy-management
title: Proxy management
---
-import CodeBlock from '@theme/CodeBlock';
+import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
-import ApifyProxyExample from '!!raw-loader!./code/05_apify_proxy.py';
-import CustomProxyExample from '!!raw-loader!./code/05_custom_proxy.py';
-import ProxyRotationExample from '!!raw-loader!./code/05_proxy_rotation.py';
-import ApifyProxyConfig from '!!raw-loader!./code/05_apify_proxy_config.py';
-import CustomProxyFunctionExample from '!!raw-loader!./code/05_custom_proxy_function.py';
-import ProxyActorInputExample from '!!raw-loader!./code/05_proxy_actor_input.py';
-import ProxyHttpxExample from '!!raw-loader!./code/05_proxy_httpx.py';
+import ApifyProxyExample from '!!raw-loader!roa-loader!./code/05_apify_proxy.py';
+import CustomProxyExample from '!!raw-loader!roa-loader!./code/05_custom_proxy.py';
+import ProxyRotationExample from '!!raw-loader!roa-loader!./code/05_proxy_rotation.py';
+import ApifyProxyConfig from '!!raw-loader!roa-loader!./code/05_apify_proxy_config.py';
+import CustomProxyFunctionExample from '!!raw-loader!roa-loader!./code/05_custom_proxy_function.py';
+import ProxyActorInputExample from '!!raw-loader!roa-loader!./code/05_proxy_actor_input.py';
+import ProxyHttpxExample from '!!raw-loader!roa-loader!./code/05_proxy_httpx.py';
[IP address blocking](https://en.wikipedia.org/wiki/IP_address_blocking) is one of the oldest and most effective ways of preventing access to a website. It is therefore paramount for a good web scraping library to provide easy to use but powerful tools which can work around IP blocking. The most powerful weapon in your anti IP blocking arsenal is a [proxy server](https://en.wikipedia.org/wiki/Proxy_server).
@@ -23,15 +23,15 @@ If you want to use Apify Proxy locally, make sure that you run your Actors via t
### Using Apify proxy
-
+
{ApifyProxyExample}
-
+
### Using your own proxies
-
+
{CustomProxyExample}
-
+
## Proxy configuration
@@ -49,17 +49,17 @@ The difference is easy to remember. Using the `proxy_url` or `new_url_function`
When no `session_id` is provided, your custom proxy URLs are rotated round-robin, whereas Apify Proxy manages their rotation using black magic to get the best performance.
-
+
{ProxyRotationExample}
-
+
### Apify proxy configuration
With Apify Proxy, you can select specific proxy groups to use, or countries to connect from. This allows you to get better proxy performance after some initial research.
-
+
{ApifyProxyConfig}
-
+
Now your connections using proxy_url will use only Residential proxies from the US. Note that you must first get access to a proxy group before you are able to use it. You can find your available proxy groups in the [proxy dashboard](https://console.apify.com/proxy).
@@ -71,15 +71,15 @@ There are two options how to make `ProxyConfiguration` work with your own proxie
Either you can pass it a list of your own proxy servers:
-
+
{CustomProxyExample}
-
+
Or you can pass it a method (accepting one optional argument, the session ID), to generate proxy URLs automatically:
-
+
{CustomProxyFunctionExample}
-
+
### Configuring proxy based on Actor input
@@ -87,9 +87,9 @@ To make selecting the proxies that the Actor uses easier, you can use an input f
You can then use that input to create the proxy configuration:
-
+
{ProxyActorInputExample}
-
+
## Using the generated proxy URLs
@@ -97,9 +97,9 @@ You can then use that input to create the proxy configuration:
To use the generated proxy URLs with the `httpx` library, use the [`proxies`](https://www.python-httpx.org/advanced/#http-proxying) argument:
-
+
{ProxyHttpxExample}
-
+
Make sure you have the `httpx` library installed:
diff --git a/docs/02_concepts/06_interacting_with_other_actors.mdx b/docs/02_concepts/06_interacting_with_other_actors.mdx
index d9b0b3d0..880cbb89 100644
--- a/docs/02_concepts/06_interacting_with_other_actors.mdx
+++ b/docs/02_concepts/06_interacting_with_other_actors.mdx
@@ -3,12 +3,12 @@ id: interacting-with-other-actors
title: Interacting with other Actors
---
-import CodeBlock from '@theme/CodeBlock';
+import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
-import InteractingStartExample from '!!raw-loader!./code/06_interacting_start.py';
-import InteractingCallExample from '!!raw-loader!./code/06_interacting_call.py';
-import InteractingCallTaskExample from '!!raw-loader!./code/06_interacting_call_task.py';
-import InteractingMetamorphExample from '!!raw-loader!./code/06_interacting_metamorph.py';
+import InteractingStartExample from '!!raw-loader!roa-loader!./code/06_interacting_start.py';
+import InteractingCallExample from '!!raw-loader!roa-loader!./code/06_interacting_call.py';
+import InteractingCallTaskExample from '!!raw-loader!roa-loader!./code/06_interacting_call_task.py';
+import InteractingMetamorphExample from '!!raw-loader!roa-loader!./code/06_interacting_metamorph.py';
There are several methods that interact with other Actors and Actor tasks on the Apify platform.
@@ -16,25 +16,25 @@ There are several methods that interact with other Actors and Actor tasks on the
The [`Actor.start`](../../reference/class/Actor#start) method starts another Actor on the Apify platform, and immediately returns the details of the started Actor run.
-
+
{InteractingStartExample}
-
+
## Actor call
The [`Actor.call`](../../reference/class/Actor#call) method starts another Actor on the Apify platform, and waits for the started Actor run to finish.
-
+
{InteractingCallExample}
-
+
## Actor call task
The [`Actor.call_task`](../../reference/class/Actor#call_task) method starts an [Actor task](https://docs.apify.com/platform/actors/tasks) on the Apify platform, and waits for the started Actor run to finish.
-
+
{InteractingCallTaskExample}
-
+
## Actor metamorph
@@ -46,6 +46,6 @@ To make you Actor compatible with the metamorph operation, use [`Actor.get_input
For example, imagine you have an Actor that accepts a hotel URL on input, and then internally uses the [`apify/web-scraper`](https://apify.com/apify/web-scraper) public Actor to scrape all the hotel reviews. The metamorphing code would look as follows:
-
+
{InteractingMetamorphExample}
-
+
diff --git a/docs/02_concepts/07_webhooks.mdx b/docs/02_concepts/07_webhooks.mdx
index 9dd11531..04697bbb 100644
--- a/docs/02_concepts/07_webhooks.mdx
+++ b/docs/02_concepts/07_webhooks.mdx
@@ -3,10 +3,10 @@ id: webhooks
title: Creating webhooks
---
-import CodeBlock from '@theme/CodeBlock';
+import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
-import WebhookExample from '!!raw-loader!./code/07_webhook.py';
-import WebhookPreventingExample from '!!raw-loader!./code/07_webhook_preventing.py';
+import WebhookExample from '!!raw-loader!roa-loader!./code/07_webhook.py';
+import WebhookPreventingExample from '!!raw-loader!roa-loader!./code/07_webhook_preventing.py';
Webhooks allow you to configure the Apify platform to perform an action when a certain event occurs. For example, you can use them to start another Actor when the current run finishes or fails.
@@ -16,9 +16,9 @@ You can learn more in the [documentation for webhooks](https://docs.apify.com/pl
Besides creating webhooks manually in Apify Console, or through the Apify API,you can also create [ad-hoc webhooks](https://docs.apify.com/platform/integrations/webhooks/ad-hoc-webhooks) dynamically from the code of your Actor using the [`Actor.add_webhook`](../../reference/class/Actor#add_webhook) method:
-
+
{WebhookExample}
-
+
Note that webhooks are only supported when running on the Apify platform. When running the Actor locally, the method will print a warning and have no effect.
@@ -26,6 +26,6 @@ Note that webhooks are only supported when running on the Apify platform. When r
To ensure that duplicate ad-hoc webhooks won't get created in a case of Actor restart, you can use the `idempotency_key` parameter. The idempotency key must be unique across all the webhooks of a user so that only one webhook gets created for a given value. You can use, for example, the Actor run ID as the idempotency key:
-
+
{WebhookPreventingExample}
-
+
diff --git a/docs/02_concepts/08_access_apify_api.mdx b/docs/02_concepts/08_access_apify_api.mdx
index d3fc05bf..ff6fefb1 100644
--- a/docs/02_concepts/08_access_apify_api.mdx
+++ b/docs/02_concepts/08_access_apify_api.mdx
@@ -3,10 +3,10 @@ id: access-apify-api
title: Accessing Apify API
---
-import CodeBlock from '@theme/CodeBlock';
+import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
-import ActorClientExample from '!!raw-loader!./code/08_actor_client.py';
-import ActorNewClientExample from '!!raw-loader!./code/08_actor_new_client.py';
+import ActorClientExample from '!!raw-loader!roa-loader!./code/08_actor_client.py';
+import ActorNewClientExample from '!!raw-loader!roa-loader!./code/08_actor_new_client.py';
The Apify SDK contains many useful features for making Actor development easier. However, it does not cover all the features the Apify API offers.
@@ -18,14 +18,14 @@ To access the provided instance of [`ApifyClientAsync`](https://docs.apify.com/a
For example, to get the details of your user, you can use this snippet:
-
+
{ActorClientExample}
-
+
## Actor new client
If you want to create a completely new instance of the client, for example, to get a client for a different user or change the configuration of the client,you can use the [`Actor.new_client`](../../reference/class/Actor#new_client) method:
-
+
{ActorNewClientExample}
-
+
diff --git a/docs/02_concepts/09_running_webserver.mdx b/docs/02_concepts/09_running_webserver.mdx
index c35bf598..30a80722 100644
--- a/docs/02_concepts/09_running_webserver.mdx
+++ b/docs/02_concepts/09_running_webserver.mdx
@@ -3,9 +3,9 @@ id: running-webserver
title: Running webserver in your Actor
---
-import CodeBlock from '@theme/CodeBlock';
+import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
-import WebserverExample from '!!raw-loader!./code/09_webserver.py';
+import WebserverExample from '!!raw-loader!roa-loader!./code/09_webserver.py';
Each Actor run on the Apify platform is assigned a unique hard-to-guess URL (for example `https://8segt5i81sokzm.runs.apify.net`), which enables HTTP access to an optional web server running inside the Actor run's container.
@@ -21,6 +21,6 @@ The web server running inside the container must listen at the port defined by t
The following example demonstrates how to start a simple web server in your Actor,which will respond to every GET request with the number of items that the Actor has processed so far:
-
+
{WebserverExample}
-
+
diff --git a/docs/02_concepts/10_logging.mdx b/docs/02_concepts/10_logging.mdx
index b8dc5a74..6bdb754b 100644
--- a/docs/02_concepts/10_logging.mdx
+++ b/docs/02_concepts/10_logging.mdx
@@ -3,12 +3,12 @@ id: logging
title: Logging
---
-import CodeBlock from '@theme/CodeBlock';
+import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
-import LogConfigExample from '!!raw-loader!./code/10_log_config.py';
-import LoggerUsageExample from '!!raw-loader!./code/10_logger_usage.py';
-import RedirectLog from '!!raw-loader!./code/10_redirect_log.py';
-import RedirectLogExistingRun from '!!raw-loader!./code/10_redirect_log_existing_run.py';
+import LogConfigExample from '!!raw-loader!roa-loader!./code/10_log_config.py';
+import LoggerUsageExample from '!!raw-loader!roa-loader!./code/10_logger_usage.py';
+import RedirectLog from '!!raw-loader!roa-loader!./code/10_redirect_log.py';
+import RedirectLogExistingRun from '!!raw-loader!roa-loader!./code/10_redirect_log_existing_run.py';
The Apify SDK is logging useful information through the [`logging`](https://docs.python.org/3/library/logging.html) module from Python's standard library, into the logger with the name `apify`.
@@ -30,9 +30,9 @@ By default, only the log message is printed out to the output, without any forma
To configure and test the logger, you can use this snippet:
-
+
{LogConfigExample}
-
+
This configuration will cause all levels of messages to be printed to the standard output, with some pretty formatting.
@@ -42,9 +42,9 @@ Here you can see how all the log levels would look like.
You can use the `extra` argument for all log levels, it's not specific to the warning level. When you use `Logger.exception`, there is no need to pass the Exception object to the log manually, it will automatiacally infer it from the current execution context and print the exception details.
-
+
{LoggerUsageExample}
-
+
Result:
@@ -93,9 +93,9 @@ In some situations, one Actor is going to start one or more other Actors and wai
Typical use case for log redirection is to call another Actor using the [`Actor.call`](../../reference/class/Actor#call) method. This method has an optional `logger` argument, which is by default set to the `default` literal. This means that the logs of the called Actor will be automatically redirected to the parent Actor's logs with default formatting and filtering. If you set the `logger` argument to `None`, then no log redirection happens. The third option is to pass your own `Logger` instance with the possibility to define your own formatter, filter, and handler. Below you can see those three possible ways of log redirection when starting another Actor run through [`Actor.call`](../../reference/class/Actor#call).
-
+
{RedirectLog}
-
+
Each default redirect logger log entry will have a specific format. After the timestamp, it will contain cyan colored text that will contain the redirect information - the other actor's name and the run ID. The rest of the log message will be printed in the same manner as the parent Actor's logger is configured.
@@ -109,6 +109,6 @@ In some cases, you might want to connect to an already running Actor run and red
You can further decide whether you want to redirect just new logs of the ongoing Actor run, or if you also want to redirect historical logs from that Actor's run, so all logs it has produced since it was started. Both options are shown in the example code below.
-
+
{RedirectLogExistingRun}
-
+
diff --git a/docs/02_concepts/11_configuration.mdx b/docs/02_concepts/11_configuration.mdx
index 36ea66b5..bcc4ea00 100644
--- a/docs/02_concepts/11_configuration.mdx
+++ b/docs/02_concepts/11_configuration.mdx
@@ -3,9 +3,9 @@ id: actor-configuration
title: Actor configuration
---
-import CodeBlock from '@theme/CodeBlock';
+import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
-import ConfigExample from '!!raw-loader!./code/11_config.py';
+import ConfigExample from '!!raw-loader!roa-loader!./code/11_config.py';
The [`Actor`](../../reference/class/Actor) class gets configured using the [`Configuration`](../../reference/class/Configuration) class, which initializes itself based on the provided environment variables.
@@ -19,9 +19,9 @@ To see the full list of configuration options, check the `Configuration` class o
This will cause the Actor to persist its state every 10 seconds:
-
+
{ConfigExample}
-
+
## Configuring via environment variables
diff --git a/docs/02_concepts/12_pay_per_event.mdx b/docs/02_concepts/12_pay_per_event.mdx
index 3e52e332..503d3158 100644
--- a/docs/02_concepts/12_pay_per_event.mdx
+++ b/docs/02_concepts/12_pay_per_event.mdx
@@ -4,10 +4,10 @@ title: Pay-per-event monetization
description: Monetize your Actors using the pay-per-event pricing model
---
-import ActorChargeSource from '!!raw-loader!./code/actor_charge.py';
-import ConditionalActorChargeSource from '!!raw-loader!./code/conditional_actor_charge.py';
+import ActorChargeSource from '!!raw-loader!roa-loader!./code/12_actor_charge.py';
+import ConditionalActorChargeSource from '!!raw-loader!roa-loader!./code/12_conditional_actor_charge.py';
import ApiLink from '@site/src/components/ApiLink';
-import CodeBlock from '@theme/CodeBlock';
+import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
Apify provides several [pricing models](https://docs.apify.com/platform/actors/publishing/monetize) for monetizing your Actors. The most recent and most flexible one is [pay-per-event](https://docs.apify.com/platform/actors/running/actors-in-store#pay-per-event), which lets you charge your users programmatically directly from your Actor. As the name suggests, you may charge the users each time a specific event occurs, for example a call to an external API or when you return a result.
@@ -23,9 +23,9 @@ If you want more details about PPE pricing, please refer to our [PPE documentati
After monetization is set in the Apify console, you can add `Actor.charge` calls to your code and start monetizing!
-
-{ActorChargeSource}
-
+
+ {ActorChargeSource}
+
Then you just push your code to Apify and that's it! The SDK will even keep track of the max total charge setting for you, so you will not provide more value than what the user chose to pay for.
@@ -35,9 +35,9 @@ If you need finer control over charging, you can access call `ChargingManager.get_pricing_info()` method which returns information about the current pricing model.
-
-{ConditionalActorChargeSource}
-
+
+ {ConditionalActorChargeSource}
+
## Local development
diff --git a/docs/02_concepts/code/02_input.py b/docs/02_concepts/code/02_input.py
index b3bd3034..7dd0f86e 100644
--- a/docs/02_concepts/code/02_input.py
+++ b/docs/02_concepts/code/02_input.py
@@ -1,3 +1,5 @@
+import asyncio
+
from apify import Actor
@@ -7,3 +9,7 @@ async def main() -> None:
first_number = actor_input.get('firstNumber', 0)
second_number = actor_input.get('secondNumber', 0)
Actor.log.info('Sum: %s', first_number + second_number)
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/03_dataset_exports.py b/docs/02_concepts/code/03_dataset_exports.py
index 4f0c01c4..bd879fc6 100644
--- a/docs/02_concepts/code/03_dataset_exports.py
+++ b/docs/02_concepts/code/03_dataset_exports.py
@@ -1,3 +1,5 @@
+import asyncio
+
from apify import Actor
@@ -29,3 +31,7 @@ async def main() -> None:
json_data = await store.get_value('data.json')
Actor.log.info(f'JSON data: {json_data}')
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/03_dataset_read_write.py b/docs/02_concepts/code/03_dataset_read_write.py
index 6d8ac7f0..f5c6333e 100644
--- a/docs/02_concepts/code/03_dataset_read_write.py
+++ b/docs/02_concepts/code/03_dataset_read_write.py
@@ -1,3 +1,5 @@
+import asyncio
+
from apify import Actor
@@ -14,3 +16,7 @@ async def main() -> None:
# Iterate over the second half
second_half = [item async for item in dataset.iterate_items(offset=500)]
Actor.log.info(f'The second half of items = {second_half}')
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/03_deleting_storages.py b/docs/02_concepts/code/03_deleting_storages.py
index 68925bd9..75d8a69a 100644
--- a/docs/02_concepts/code/03_deleting_storages.py
+++ b/docs/02_concepts/code/03_deleting_storages.py
@@ -1,3 +1,5 @@
+import asyncio
+
from apify import Actor
@@ -11,3 +13,7 @@ async def main() -> None:
# Now we don't want it anymore
await key_value_store.drop()
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/03_kvs_iterating.py b/docs/02_concepts/code/03_kvs_iterating.py
index f5944095..a5a3009b 100644
--- a/docs/02_concepts/code/03_kvs_iterating.py
+++ b/docs/02_concepts/code/03_kvs_iterating.py
@@ -1,3 +1,5 @@
+import asyncio
+
from apify import Actor
@@ -16,3 +18,7 @@ async def main() -> None:
async for key, info in kvs.iterate_keys():
Actor.log.info(f'key={key}, info={info}')
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/03_kvs_public_url.py b/docs/02_concepts/code/03_kvs_public_url.py
index fe8ae07a..f2a870e4 100644
--- a/docs/02_concepts/code/03_kvs_public_url.py
+++ b/docs/02_concepts/code/03_kvs_public_url.py
@@ -1,3 +1,5 @@
+import asyncio
+
from apify import Actor
@@ -9,3 +11,7 @@ async def main() -> None:
# Get the public URL of a record
my_record_url = await store.get_public_url('my_record')
Actor.log.info(f'URL of "my_record": {my_record_url}')
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/03_kvs_read_write.py b/docs/02_concepts/code/03_kvs_read_write.py
index 239aa2e2..ba4a1dd2 100644
--- a/docs/02_concepts/code/03_kvs_read_write.py
+++ b/docs/02_concepts/code/03_kvs_read_write.py
@@ -1,3 +1,5 @@
+import asyncio
+
from apify import Actor
@@ -23,3 +25,7 @@ async def main() -> None:
# Delete the `automatic_text` value
await kvs.set_value('automatic_text', None)
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/03_opening_storages.py b/docs/02_concepts/code/03_opening_storages.py
index b4ccbd09..39730dfb 100644
--- a/docs/02_concepts/code/03_opening_storages.py
+++ b/docs/02_concepts/code/03_opening_storages.py
@@ -1,3 +1,5 @@
+import asyncio
+
from apify import Actor, Request
@@ -14,3 +16,7 @@ async def main() -> None:
# Work with the request queue with the name 'my-queue'
request_queue = await Actor.open_request_queue(name='my-queue')
await request_queue.add_request(Request.from_url('https://apify.com'))
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/03_rq.py b/docs/02_concepts/code/03_rq.py
index e9ad6a51..4823570b 100644
--- a/docs/02_concepts/code/03_rq.py
+++ b/docs/02_concepts/code/03_rq.py
@@ -48,3 +48,7 @@ async def main() -> None:
# processed again.
Actor.log.warning('Request failed, will retry!')
await queue.reclaim_request(request)
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/04_actor_events.py b/docs/02_concepts/code/04_actor_events.py
index 1c8c785d..591fddcd 100644
--- a/docs/02_concepts/code/04_actor_events.py
+++ b/docs/02_concepts/code/04_actor_events.py
@@ -36,3 +36,7 @@ async def save_state(event_data: Any) -> None:
for j in range(10):
Actor.log.info(f'Processing item {j} of another kind...')
await asyncio.sleep(1)
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/05_apify_proxy.py b/docs/02_concepts/code/05_apify_proxy.py
index 96e7104c..12d1dcca 100644
--- a/docs/02_concepts/code/05_apify_proxy.py
+++ b/docs/02_concepts/code/05_apify_proxy.py
@@ -1,12 +1,18 @@
+import asyncio
+
from apify import Actor
async def main() -> None:
async with Actor:
- proxy_configuration = await Actor.create_proxy_configuration()
+ proxy_cfg = await Actor.create_proxy_configuration()
- if not proxy_configuration:
+ if not proxy_cfg:
raise RuntimeError('No proxy configuration available.')
- proxy_url = await proxy_configuration.new_url()
+ proxy_url = await proxy_cfg.new_url()
Actor.log.info(f'Using proxy URL: {proxy_url}')
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/05_apify_proxy_config.py b/docs/02_concepts/code/05_apify_proxy_config.py
index ba078b35..68f39e09 100644
--- a/docs/02_concepts/code/05_apify_proxy_config.py
+++ b/docs/02_concepts/code/05_apify_proxy_config.py
@@ -1,15 +1,21 @@
+import asyncio
+
from apify import Actor
async def main() -> None:
async with Actor:
- proxy_configuration = await Actor.create_proxy_configuration(
+ proxy_cfg = await Actor.create_proxy_configuration(
groups=['RESIDENTIAL'],
country_code='US',
)
- if not proxy_configuration:
+ if not proxy_cfg:
raise RuntimeError('No proxy configuration available.')
- proxy_url = await proxy_configuration.new_url()
+ proxy_url = await proxy_cfg.new_url()
Actor.log.info(f'Proxy URL: {proxy_url}')
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/05_custom_proxy.py b/docs/02_concepts/code/05_custom_proxy.py
index d4c8a24a..43ad2dbf 100644
--- a/docs/02_concepts/code/05_custom_proxy.py
+++ b/docs/02_concepts/code/05_custom_proxy.py
@@ -1,17 +1,23 @@
+import asyncio
+
from apify import Actor
async def main() -> None:
async with Actor:
- proxy_configuration = await Actor.create_proxy_configuration(
+ proxy_cfg = await Actor.create_proxy_configuration(
proxy_urls=[
'http://proxy-1.com',
'http://proxy-2.com',
],
)
- if not proxy_configuration:
+ if not proxy_cfg:
raise RuntimeError('No proxy configuration available.')
- proxy_url = await proxy_configuration.new_url()
+ proxy_url = await proxy_cfg.new_url()
Actor.log.info(f'Using proxy URL: {proxy_url}')
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/05_custom_proxy_function.py b/docs/02_concepts/code/05_custom_proxy_function.py
index 71aced2a..1ff56db5 100644
--- a/docs/02_concepts/code/05_custom_proxy_function.py
+++ b/docs/02_concepts/code/05_custom_proxy_function.py
@@ -1,5 +1,7 @@
from __future__ import annotations
+import asyncio
+
from apify import Actor, Request
@@ -14,15 +16,19 @@ async def custom_new_url_function(
async def main() -> None:
async with Actor:
- proxy_configuration = await Actor.create_proxy_configuration(
+ proxy_cfg = await Actor.create_proxy_configuration(
new_url_function=custom_new_url_function, # type: ignore[arg-type]
)
- if not proxy_configuration:
+ if not proxy_cfg:
raise RuntimeError('No proxy configuration available.')
- proxy_url_with_session = await proxy_configuration.new_url('a')
+ proxy_url_with_session = await proxy_cfg.new_url('a')
Actor.log.info(f'Using proxy URL: {proxy_url_with_session}')
- proxy_url_without_session = await proxy_configuration.new_url()
+ proxy_url_without_session = await proxy_cfg.new_url()
Actor.log.info(f'Using proxy URL: {proxy_url_without_session}')
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/05_proxy_actor_input.py b/docs/02_concepts/code/05_proxy_actor_input.py
index 3ca0344d..adf0d8d3 100644
--- a/docs/02_concepts/code/05_proxy_actor_input.py
+++ b/docs/02_concepts/code/05_proxy_actor_input.py
@@ -1,3 +1,5 @@
+import asyncio
+
from apify import Actor
@@ -5,12 +7,16 @@ async def main() -> None:
async with Actor:
actor_input = await Actor.get_input() or {}
proxy_settings = actor_input.get('proxySettings')
- proxy_configuration = await Actor.create_proxy_configuration(
+ proxy_cfg = await Actor.create_proxy_configuration(
actor_proxy_input=proxy_settings
)
- if not proxy_configuration:
+ if not proxy_cfg:
raise RuntimeError('No proxy configuration available.')
- proxy_url = await proxy_configuration.new_url()
+ proxy_url = await proxy_cfg.new_url()
Actor.log.info(f'Using proxy URL: {proxy_url}')
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/05_proxy_httpx.py b/docs/02_concepts/code/05_proxy_httpx.py
index a124d1a5..dbabacaa 100644
--- a/docs/02_concepts/code/05_proxy_httpx.py
+++ b/docs/02_concepts/code/05_proxy_httpx.py
@@ -1,3 +1,5 @@
+import asyncio
+
import httpx
from apify import Actor
@@ -5,18 +7,22 @@
async def main() -> None:
async with Actor:
- proxy_configuration = await Actor.create_proxy_configuration(
+ proxy_cfg = await Actor.create_proxy_configuration(
proxy_urls=[
'http://proxy-1.com',
'http://proxy-2.com',
],
)
- if not proxy_configuration:
+ if not proxy_cfg:
raise RuntimeError('No proxy configuration available.')
- proxy_url = await proxy_configuration.new_url()
+ proxy_url = await proxy_cfg.new_url()
async with httpx.AsyncClient(proxy=proxy_url) as httpx_client:
response = await httpx_client.get('http://example.com')
Actor.log.info(f'Response: {response}')
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/05_proxy_rotation.py b/docs/02_concepts/code/05_proxy_rotation.py
index 8e6a5de0..427eb585 100644
--- a/docs/02_concepts/code/05_proxy_rotation.py
+++ b/docs/02_concepts/code/05_proxy_rotation.py
@@ -1,31 +1,29 @@
+import asyncio
+
from apify import Actor
async def main() -> None:
async with Actor:
- proxy_configuration = await Actor.create_proxy_configuration(
+ proxy_cfg = await Actor.create_proxy_configuration(
proxy_urls=[
'http://proxy-1.com',
'http://proxy-2.com',
],
)
- if not proxy_configuration:
+ if not proxy_cfg:
raise RuntimeError('No proxy configuration available.')
- proxy_url = await proxy_configuration.new_url() # http://proxy-1.com
- proxy_url = await proxy_configuration.new_url() # http://proxy-2.com
- proxy_url = await proxy_configuration.new_url() # http://proxy-1.com
- proxy_url = await proxy_configuration.new_url() # http://proxy-2.com
- proxy_url = await proxy_configuration.new_url(
- session_id='a'
- ) # http://proxy-1.com
- proxy_url = await proxy_configuration.new_url(
- session_id='b'
- ) # http://proxy-2.com
- proxy_url = await proxy_configuration.new_url(
- session_id='b'
- ) # http://proxy-2.com
- proxy_url = await proxy_configuration.new_url(
- session_id='a'
- ) # http://proxy-1.com
+ proxy_url = await proxy_cfg.new_url() # http://proxy-1.com
+ proxy_url = await proxy_cfg.new_url() # http://proxy-2.com
+ proxy_url = await proxy_cfg.new_url() # http://proxy-1.com
+ proxy_url = await proxy_cfg.new_url() # http://proxy-2.com
+ proxy_url = await proxy_cfg.new_url(session_id='a') # http://proxy-1.com
+ proxy_url = await proxy_cfg.new_url(session_id='b') # http://proxy-2.com
+ proxy_url = await proxy_cfg.new_url(session_id='b') # http://proxy-2.com
+ proxy_url = await proxy_cfg.new_url(session_id='a') # http://proxy-1.com
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/06_interacting_call.py b/docs/02_concepts/code/06_interacting_call.py
index 46a0a90a..664e261f 100644
--- a/docs/02_concepts/code/06_interacting_call.py
+++ b/docs/02_concepts/code/06_interacting_call.py
@@ -1,3 +1,5 @@
+import asyncio
+
from apify import Actor
@@ -20,3 +22,7 @@ async def main() -> None:
kvs_client = run_client.key_value_store()
output = await kvs_client.get_record('OUTPUT')
Actor.log.info(f'Actor output: {output}')
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/06_interacting_call_task.py b/docs/02_concepts/code/06_interacting_call_task.py
index 75335d69..4796be4d 100644
--- a/docs/02_concepts/code/06_interacting_call_task.py
+++ b/docs/02_concepts/code/06_interacting_call_task.py
@@ -1,3 +1,5 @@
+import asyncio
+
from apify import Actor
@@ -17,3 +19,7 @@ async def main() -> None:
dataset_client = run_client.dataset()
items = await dataset_client.list_items()
Actor.log.info(f'Task run dataset items: {items}')
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/06_interacting_metamorph.py b/docs/02_concepts/code/06_interacting_metamorph.py
index 53d48882..b1db959a 100644
--- a/docs/02_concepts/code/06_interacting_metamorph.py
+++ b/docs/02_concepts/code/06_interacting_metamorph.py
@@ -1,3 +1,5 @@
+import asyncio
+
from apify import Actor
@@ -22,3 +24,7 @@ async def main() -> None:
# This code will not be called, since the `metamorph` action terminates
# the current Actor run container.
Actor.log.info('You will not see this!')
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/06_interacting_start.py b/docs/02_concepts/code/06_interacting_start.py
index 075347c2..b5e5fe54 100644
--- a/docs/02_concepts/code/06_interacting_start.py
+++ b/docs/02_concepts/code/06_interacting_start.py
@@ -1,3 +1,5 @@
+import asyncio
+
from apify import Actor
@@ -11,3 +13,7 @@ async def main() -> None:
# Log the Actor run ID.
Actor.log.info(f'Actor run ID: {actor_run.id}')
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/07_webhook.py b/docs/02_concepts/code/07_webhook.py
index c2e382cf..0fd15abe 100644
--- a/docs/02_concepts/code/07_webhook.py
+++ b/docs/02_concepts/code/07_webhook.py
@@ -1,3 +1,5 @@
+import asyncio
+
from apify import Actor, Webhook
@@ -14,3 +16,7 @@ async def main() -> None:
# Raise an error to simulate a failed run.
raise RuntimeError('I am an error and I know it!')
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/07_webhook_preventing.py b/docs/02_concepts/code/07_webhook_preventing.py
index de5f189a..5c30ade7 100644
--- a/docs/02_concepts/code/07_webhook_preventing.py
+++ b/docs/02_concepts/code/07_webhook_preventing.py
@@ -1,3 +1,5 @@
+import asyncio
+
from apify import Actor, Webhook
@@ -15,3 +17,7 @@ async def main() -> None:
# Raise an error to simulate a failed run.
raise RuntimeError('I am an error and I know it!')
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/08_actor_client.py b/docs/02_concepts/code/08_actor_client.py
index 68f5c2d7..304fdf09 100644
--- a/docs/02_concepts/code/08_actor_client.py
+++ b/docs/02_concepts/code/08_actor_client.py
@@ -1,3 +1,5 @@
+import asyncio
+
from apify import Actor
@@ -9,3 +11,7 @@ async def main() -> None:
# Get information about the current user.
me = await user_client.get()
Actor.log.info(f'User: {me}')
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/08_actor_new_client.py b/docs/02_concepts/code/08_actor_new_client.py
index da59e6fc..0569dfa6 100644
--- a/docs/02_concepts/code/08_actor_new_client.py
+++ b/docs/02_concepts/code/08_actor_new_client.py
@@ -1,3 +1,5 @@
+import asyncio
+
from apify import Actor
TOKEN = 'ANOTHER_USERS_TOKEN'
@@ -12,3 +14,7 @@ async def main() -> None:
# Get information about the another user.
them = await user_client.get()
Actor.log.info(f'Another user: {them}')
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/09_webserver.py b/docs/02_concepts/code/09_webserver.py
index e8b54200..d4bc0655 100644
--- a/docs/02_concepts/code/09_webserver.py
+++ b/docs/02_concepts/code/09_webserver.py
@@ -47,3 +47,7 @@ async def main() -> None:
# Signal the HTTP server to shut down, and wait for it to finish.
http_server.shutdown()
await run_server_task
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/10_log_config.py b/docs/02_concepts/code/10_log_config.py
index 520df753..c3c93610 100644
--- a/docs/02_concepts/code/10_log_config.py
+++ b/docs/02_concepts/code/10_log_config.py
@@ -1,3 +1,4 @@
+import asyncio
import logging
from apify.log import ActorLogFormatter
@@ -10,3 +11,7 @@ async def main() -> None:
apify_logger = logging.getLogger('apify')
apify_logger.setLevel(logging.DEBUG)
apify_logger.addHandler(handler)
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/10_logger_usage.py b/docs/02_concepts/code/10_logger_usage.py
index a707ab5c..74f2beb1 100644
--- a/docs/02_concepts/code/10_logger_usage.py
+++ b/docs/02_concepts/code/10_logger_usage.py
@@ -1,3 +1,4 @@
+import asyncio
import logging
from apify import Actor
@@ -21,3 +22,7 @@ async def main() -> None:
raise RuntimeError('Ouch!')
except RuntimeError:
Actor.log.exception('This is an exceptional message')
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/10_redirect_log.py b/docs/02_concepts/code/10_redirect_log.py
index 89bda9b3..387e6d1d 100644
--- a/docs/02_concepts/code/10_redirect_log.py
+++ b/docs/02_concepts/code/10_redirect_log.py
@@ -1,3 +1,4 @@
+import asyncio
import logging
from apify import Actor
@@ -11,5 +12,10 @@ async def main() -> None:
await Actor.call(actor_id='some_actor_id', logger=None)
# Custom redirect logger
await Actor.call(
- actor_id='some_actor_id', logger=logging.getLogger('custom_logger')
+ actor_id='some_actor_id',
+ logger=logging.getLogger('custom_logger'),
)
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/10_redirect_log_existing_run.py b/docs/02_concepts/code/10_redirect_log_existing_run.py
index 6de420a7..cd982833 100644
--- a/docs/02_concepts/code/10_redirect_log_existing_run.py
+++ b/docs/02_concepts/code/10_redirect_log_existing_run.py
@@ -22,3 +22,7 @@ async def main() -> None:
streamed_log.start()
await asyncio.sleep(5)
await streamed_log.stop()
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/11_config.py b/docs/02_concepts/code/11_config.py
index 10b07079..f7e00c16 100644
--- a/docs/02_concepts/code/11_config.py
+++ b/docs/02_concepts/code/11_config.py
@@ -1,16 +1,23 @@
+import asyncio
from datetime import timedelta
from apify import Actor, Configuration, Event
async def main() -> None:
- global_config = Configuration.get_global_configuration()
- global_config.persist_state_interval = timedelta(seconds=10)
+ configuration = Configuration(
+ persist_state_interval=timedelta(seconds=10)
+ # Set other configuration options here as needed.
+ )
- async with Actor:
+ async with Actor(configuration=configuration):
# Define a handler that will be called for every persist state event.
async def save_state() -> None:
await Actor.set_value('STATE', 'Hello, world!')
# The save_state handler will be called every 10 seconds now.
Actor.on(Event.PERSIST_STATE, save_state)
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/actor_charge.py b/docs/02_concepts/code/12_actor_charge.py
similarity index 92%
rename from docs/02_concepts/code/actor_charge.py
rename to docs/02_concepts/code/12_actor_charge.py
index 3478f60f..fc8a4433 100644
--- a/docs/02_concepts/code/actor_charge.py
+++ b/docs/02_concepts/code/12_actor_charge.py
@@ -1,3 +1,5 @@
+import asyncio
+
from apify import Actor
@@ -28,3 +30,7 @@ async def main() -> None:
count=len(result),
)
# highlight-end
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_concepts/code/conditional_actor_charge.py b/docs/02_concepts/code/12_conditional_actor_charge.py
similarity index 91%
rename from docs/02_concepts/code/conditional_actor_charge.py
rename to docs/02_concepts/code/12_conditional_actor_charge.py
index 12b03d96..193284fd 100644
--- a/docs/02_concepts/code/conditional_actor_charge.py
+++ b/docs/02_concepts/code/12_conditional_actor_charge.py
@@ -1,3 +1,5 @@
+import asyncio
+
from apify import Actor
@@ -16,3 +18,7 @@ async def main() -> None:
elif charged_items < (Actor.configuration.max_paid_dataset_items or 0):
await Actor.push_data({'hello': 'world'})
charged_items += 1
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/03_guides/01_beautifulsoup_httpx.mdx b/docs/03_guides/01_beautifulsoup_httpx.mdx
index b6a69c01..42452a2a 100644
--- a/docs/03_guides/01_beautifulsoup_httpx.mdx
+++ b/docs/03_guides/01_beautifulsoup_httpx.mdx
@@ -3,9 +3,9 @@ id: beautifulsoup-httpx
title: Using BeautifulSoup with HTTPX
---
-import CodeBlock from '@theme/CodeBlock';
+import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
-import BeautifulSoupHttpxExample from '!!raw-loader!./code/01_beautifulsoup_httpx.py';
+import BeautifulSoupHttpxExample from '!!raw-loader!roa-loader!./code/01_beautifulsoup_httpx.py';
In this guide, you'll learn how to use the [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/) library with the [HTTPX](https://www.python-httpx.org/) library in your Apify Actors.
@@ -21,9 +21,9 @@ To create an Actor which uses those libraries, start from the [BeautifulSoup & P
Below is a simple Actor that recursively scrapes titles from all linked websites, up to a specified maximum depth, starting from URLs provided in the Actor input. It uses [HTTPX](https://www.python-httpx.org/) for fetching pages and [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/) for parsing their content to extract titles and links to other pages.
-
+
{BeautifulSoupHttpxExample}
-
+
## Conclusion
diff --git a/docs/03_guides/02_parsel_impit.mdx b/docs/03_guides/02_parsel_impit.mdx
index 2ac4d610..0b572bf8 100644
--- a/docs/03_guides/02_parsel_impit.mdx
+++ b/docs/03_guides/02_parsel_impit.mdx
@@ -3,9 +3,9 @@ id: parsel-impit
title: Using Parsel with Impit
---
-import CodeBlock from '@theme/CodeBlock';
+import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
-import ParselImpitExample from '!!raw-loader!./code/02_parsel_impit.py';
+import ParselImpitExample from '!!raw-loader!roa-loader!./code/02_parsel_impit.py';
In this guide, you'll learn how to combine the [Parsel](https://github.com/scrapy/parsel) and [Impit](https://github.com/apify/impit) libraries when building Apify Actors.
@@ -19,9 +19,9 @@ In this guide, you'll learn how to combine the [Parsel](https://github.com/scrap
The following example shows a simple Actor that recursively scrapes titles from linked pages, up to a user-defined maximum depth. It uses [Impit](https://github.com/apify/impit) to fetch pages and [Parsel](https://github.com/scrapy/parsel) to extract titles and discover new links.
-
+
{ParselImpitExample}
-
+
## Conclusion
diff --git a/docs/03_guides/03_playwright.mdx b/docs/03_guides/03_playwright.mdx
index 8cada682..2c7428a5 100644
--- a/docs/03_guides/03_playwright.mdx
+++ b/docs/03_guides/03_playwright.mdx
@@ -6,8 +6,9 @@ title: Using Playwright
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import CodeBlock from '@theme/CodeBlock';
+import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
-import PlaywrightExample from '!!raw-loader!./code/03_playwright.py';
+import PlaywrightExample from '!!raw-loader!roa-loader!./code/03_playwright.py';
[Playwright](https://playwright.dev) is a tool for web automation and testing that can also be used for web scraping. It allows you to control a web browser programmatically and interact with web pages just as a human would.
@@ -47,9 +48,9 @@ This is a simple Actor that recursively scrapes titles from all linked websites,
It uses Playwright to open the pages in an automated Chrome browser, and to extract the title and anchor elements after the pages load.
-
+
{PlaywrightExample}
-
+
## Conclusion
diff --git a/docs/03_guides/04_selenium.mdx b/docs/03_guides/04_selenium.mdx
index 834dc33c..bbc6abe1 100644
--- a/docs/03_guides/04_selenium.mdx
+++ b/docs/03_guides/04_selenium.mdx
@@ -3,9 +3,9 @@ id: selenium
title: Using Selenium
---
-import CodeBlock from '@theme/CodeBlock';
+import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
-import SeleniumExample from '!!raw-loader!./code/04_selenium.py';
+import SeleniumExample from '!!raw-loader!roa-loader!./code/04_selenium.py';
[Selenium](https://www.selenium.dev/) is a tool for web automation and testing that can also be used for web scraping. It allows you to control a web browser programmatically and interact with web pages just as a human would.
@@ -37,9 +37,9 @@ This is a simple Actor that recursively scrapes titles from all linked websites,
It uses Selenium ChromeDriver to open the pages in an automated Chrome browser, and to extract the title and anchor elements after the pages load.
-
+
{SeleniumExample}
-
+
## Conclusion
diff --git a/docs/03_guides/05_crawlee.mdx b/docs/03_guides/05_crawlee.mdx
index 6b513417..ed805dea 100644
--- a/docs/03_guides/05_crawlee.mdx
+++ b/docs/03_guides/05_crawlee.mdx
@@ -3,11 +3,11 @@ id: crawlee
title: Using Crawlee
---
-import CodeBlock from '@theme/CodeBlock';
+import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
-import CrawleeBeautifulSoupExample from '!!raw-loader!./code/05_crawlee_beautifulsoup.py';
-import CrawleeParselExample from '!!raw-loader!./code/05_crawlee_parsel.py';
-import CrawleePlaywrightExample from '!!raw-loader!./code/05_crawlee_playwright.py';
+import CrawleeBeautifulSoupExample from '!!raw-loader!roa-loader!./code/05_crawlee_beautifulsoup.py';
+import CrawleeParselExample from '!!raw-loader!roa-loader!./code/05_crawlee_parsel.py';
+import CrawleePlaywrightExample from '!!raw-loader!roa-loader!./code/05_crawlee_playwright.py';
In this guide you'll learn how to use the [Crawlee](https://crawlee.dev/python) library in your Apify Actors.
@@ -21,25 +21,25 @@ In this guide, you'll learn how to use Crawlee with [`BeautifulSoupCrawler`](htt
The [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler) is ideal for extracting data from static HTML pages. It uses [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/bs4/doc/) for parsing and [`ImpitHttpClient`](https://crawlee.dev/python/api/class/ImpitHttpClient) for HTTP communication, ensuring efficient and lightweight scraping. If you do not need to execute JavaScript on the page, [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler) is a great choice for your scraping tasks. Below is an example of how to use it` in an Apify Actor.
-
+
{CrawleeBeautifulSoupExample}
-
+
## Actor with ParselCrawler
The [`ParselCrawler`](https://crawlee.dev/python/api/class/ParselCrawler) works in the same way as [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler), but it uses the [Parsel](https://parsel.readthedocs.io/en/latest/) library for HTML parsing. This allows for more powerful and flexible data extraction using [XPath](https://en.wikipedia.org/wiki/XPath) selectors. It should be faster than [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler). Below is an example of how to use [`ParselCrawler`](https://crawlee.dev/python/api/class/ParselCrawler) in an Apify Actor.
-
+
{CrawleeParselExample}
-
+
## Actor with PlaywrightCrawler
The [`PlaywrightCrawler`](https://crawlee.dev/python/api/class/PlaywrightCrawler) is built for handling dynamic web pages that rely on JavaScript for content rendering. Using the [Playwright](https://playwright.dev/) library, it provides a browser-based automation environment to interact with complex websites. Below is an example of how to use [`PlaywrightCrawler`](https://crawlee.dev/python/api/class/PlaywrightCrawler) in an Apify Actor.
-
+
{CrawleePlaywrightExample}
-
+
## Conclusion
diff --git a/docs/03_guides/06_scrapy.mdx b/docs/03_guides/06_scrapy.mdx
index 95f34fae..1697b8bb 100644
--- a/docs/03_guides/06_scrapy.mdx
+++ b/docs/03_guides/06_scrapy.mdx
@@ -95,7 +95,7 @@ The following example demonstrates a Scrapy Actor that scrapes page titles and e
-## Dealing with ‘imminent migration to another host’
+## Dealing with imminent migration to another host
Under some circumstances, the platform may decide to [migrate your Actor](https://docs.apify.com/academy/expert-scraping-with-apify/migrations-maintaining-state) from one piece of infrastructure to another while it's in progress. While [Crawlee](https://crawlee.dev/python)-based projects can pause and resume their work after a restart, achieving the same with a Scrapy-based project can be challenging.
diff --git a/docs/03_guides/code/scrapy_project/src/main.py b/docs/03_guides/code/scrapy_project/src/main.py
index a5586a25..608a867b 100644
--- a/docs/03_guides/code/scrapy_project/src/main.py
+++ b/docs/03_guides/code/scrapy_project/src/main.py
@@ -1,4 +1,5 @@
from __future__ import annotations
+import asyncio
from scrapy.crawler import CrawlerRunner
from scrapy.utils.defer import deferred_to_future
@@ -30,3 +31,7 @@ async def main() -> None:
allowed_domains=allowed_domains,
)
await deferred_to_future(crawl_deferred)
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/website/roa-loader/index.js b/website/roa-loader/index.js
new file mode 100644
index 00000000..9c602dd2
--- /dev/null
+++ b/website/roa-loader/index.js
@@ -0,0 +1,95 @@
+const { createHash } = require('node:crypto');
+const { inspect } = require('node:util');
+
+const { urlToRequest } = require('loader-utils');
+
+const signingUrl = new URL('https://api.apify.com/v2/tools/encode-and-sign');
+signingUrl.searchParams.set('token', process.env.APIFY_SIGNING_TOKEN);
+const queue = [];
+const cache = {};
+let working = false;
+
+function hash(source) {
+ return createHash('sha1').update(source).digest('hex');
+}
+
+async function getHash(source) {
+ const cacheKey = hash(source);
+
+ if (cache[cacheKey]) {
+ return cache[cacheKey];
+ }
+
+ const memory = source.match(/playwright|puppeteer/i) ? 4096 : 1024;
+ const res = await (await fetch(signingUrl, {
+ method: 'POST',
+ body: JSON.stringify({
+ input: JSON.stringify({ code: source }),
+ options: {
+ build: 'latest',
+ contentType: 'application/json; charset=utf-8',
+ memory,
+ timeout: 180,
+ },
+ }),
+ headers: {
+ 'Content-Type': 'application/json; charset=utf-8',
+ },
+ }));
+
+ if (!res.ok) {
+ console.error(`Signing failed: ${res.status} ${res.statusText}`, await res.text());
+ return 'invalid-token';
+ }
+
+ const body = await res.json();
+
+ if (!body.data || !body.data.encoded) {
+ console.error(`Signing failed:' ${inspect(body.error) || 'Unknown error'}`, body);
+ return 'invalid-token';
+ }
+
+ cache[cacheKey] = body.data.encoded;
+ await new Promise((resolve) => setTimeout(resolve, 100));
+
+ return body.data.encoded;
+}
+
+async function encodeAndSign(source) {
+ if (!process.env.APIFY_SIGNING_TOKEN) {
+ return 'invalid-token';
+ }
+
+ if (working) {
+ return new Promise((resolve, reject) => {
+ queue.push(() => {
+ return getHash(source).then(resolve, reject);
+ });
+ });
+ }
+
+ let res;
+
+ try {
+ working = true;
+ res = await getHash(source);
+
+ while (queue.length) {
+ await queue.shift()();
+ }
+ } finally {
+ working = false;
+ }
+
+ return res;
+}
+
+module.exports = async function (code) {
+ if (process.env.APIFY_SDK_DOCS_FAST) {
+ return { code, hash: 'fast' };
+ }
+
+ console.log(`Signing ${urlToRequest(this.resourcePath)}...`, { working, queue: queue.length });
+ const codeHash = await encodeAndSign(code);
+ return { code, hash: codeHash };
+};
diff --git a/website/roa-loader/package.json b/website/roa-loader/package.json
new file mode 100644
index 00000000..b04ff790
--- /dev/null
+++ b/website/roa-loader/package.json
@@ -0,0 +1,15 @@
+{
+ "name": "roa-loader",
+ "version": "1.0.0",
+ "description": "",
+ "main": "index.js",
+ "scripts": {
+ "test": "echo \"Error: no test specified\" && exit 1"
+ },
+ "keywords": [],
+ "author": "",
+ "license": "ISC",
+ "dependencies": {
+ "loader-utils": "^3.2.1"
+ }
+}
diff --git a/website/src/components/RunnableCodeBlock.jsx b/website/src/components/RunnableCodeBlock.jsx
index 0bdbd54a..8749fd6d 100644
--- a/website/src/components/RunnableCodeBlock.jsx
+++ b/website/src/components/RunnableCodeBlock.jsx
@@ -4,13 +4,9 @@ import CodeBlock from '@theme/CodeBlock';
import Link from '@docusaurus/Link';
import styles from './RunnableCodeBlock.module.css';
-const EXAMPLE_RUNNERS = {
- playwright: '6i5QsHBMtm3hKph70',
- puppeteer: '7tWSD8hrYzuc9Lte7',
- cheerio: 'kk67IcZkKSSBTslXI',
-};
+const PYTHON_ACTOR_RUNNER = 'HH9rhkFXiZbheuq1V';
-const RunnableCodeBlock = ({ children, actor, hash, type, ...props }) => {
+const RunnableCodeBlock = ({ children, actor, hash, ...props }) => {
hash = hash ?? children.hash;
if (!children.code) {
@@ -26,7 +22,7 @@ Make sure you are importing the code block contents with the roa-loader.`);
);
}
- const href = `https://console.apify.com/actors/${actor ?? EXAMPLE_RUNNERS[type ?? 'playwright']}?runConfig=${hash}&asrc=run_on_apify`;
+ const href = `https://console.apify.com/actors/${actor ?? PYTHON_ACTOR_RUNNER}?runConfig=${hash}&asrc=run_on_apify`;
return (
diff --git a/website/src/components/RunnableCodeBlock.module.css b/website/src/components/RunnableCodeBlock.module.css
index 5cbeabc5..fce26f12 100644
--- a/website/src/components/RunnableCodeBlock.module.css
+++ b/website/src/components/RunnableCodeBlock.module.css
@@ -2,7 +2,7 @@
display: inline-block;
padding: 3px 10px;
position: absolute;
- top: 9px;
+ top: calc(var(--ifm-pre-padding) / 2);
right: 9px;
z-index: 1;
font-size: 16px;