From 6b5712b9a8df57bfe5a8d4f687f99c305d4e7463 Mon Sep 17 00:00:00 2001 From: Jan Buchar Date: Tue, 25 Feb 2025 13:08:16 +0100 Subject: [PATCH 1/5] docs: Add PPE guide --- docs/02_guides/code/actor_charge.py | 30 ++++++++++++ .../code/conditional_actor_charge.py | 18 ++++++++ docs/02_guides/pay_per_event.mdx | 46 +++++++++++++++++++ uv.lock | 2 +- 4 files changed, 95 insertions(+), 1 deletion(-) create mode 100644 docs/02_guides/code/actor_charge.py create mode 100644 docs/02_guides/code/conditional_actor_charge.py create mode 100644 docs/02_guides/pay_per_event.mdx diff --git a/docs/02_guides/code/actor_charge.py b/docs/02_guides/code/actor_charge.py new file mode 100644 index 00000000..d8d771a3 --- /dev/null +++ b/docs/02_guides/code/actor_charge.py @@ -0,0 +1,30 @@ +from apify import Actor + + +async def main() -> None: + async with Actor: + # highlight-start + # Charge for a single occurence of an event + await Actor.charge(event_name='init') + # highlight-end + + # Prepare some mock results + result = [ + {'word': 'Lorem'}, + {'word': 'Ipsum'}, + {'word': 'Dolor'}, + {'word': 'Sit'}, + {'word': 'Amet'}, + ] + # highlight-start + # Shortcut for charging for each pushed dataset item + await Actor.push_data(result, 'result-item') + # highlight-end + + # highlight-start + # Or you can charge for a given number of events manually + await Actor.charge( + event_name='result-item', + count=len(result), + ) + # highlight-end diff --git a/docs/02_guides/code/conditional_actor_charge.py b/docs/02_guides/code/conditional_actor_charge.py new file mode 100644 index 00000000..926c591d --- /dev/null +++ b/docs/02_guides/code/conditional_actor_charge.py @@ -0,0 +1,18 @@ +from apify import Actor + + +async def main() -> None: + async with Actor: + # Check the dataset because there might already be items + # if the run migrated or was restarted + default_dataset = await Actor.open_dataset() + dataset_info = await default_dataset.get_info() + charged_items = dataset_info.item_count if dataset_info else 0 + + # highlight-start + if Actor.get_charging_manager().get_pricing_info().is_pay_per_event: + # highlight-end + await Actor.push_data({'hello': 'world'}, 'dataset-item') + elif charged_items < (Actor.config.max_paid_dataset_items or 0): + await Actor.push_data({'hello': 'world'}) + charged_items += 1 diff --git a/docs/02_guides/pay_per_event.mdx b/docs/02_guides/pay_per_event.mdx new file mode 100644 index 00000000..7482f10d --- /dev/null +++ b/docs/02_guides/pay_per_event.mdx @@ -0,0 +1,46 @@ +--- +id: pay-per-event +title: Pay-per-event Monetization +description: Monetize your Actors using the pay-per-event pricing model +--- + +import ActorChargeSource from '!!raw-loader!./code/actor_charge.py'; +import ConditionalActorChargeSource from '!!raw-loader!./code/conditional_actor_charge.py'; +import ApiLink from '@site/src/components/ApiLink'; +import CodeBlock from '@theme/CodeBlock'; + +Apify provides several [pricing models](https://docs.apify.com/platform/actors/publishing/monetize) for monetizing your Actors. The most recent and most flexible one is [pay-per-event](https://docs.apify.com/platform/actors/running/actors-in-store#pay-per-event), which lets you charge your users programmatically directly from your Actor. As the name suggests, you may charge the users each time a specific event occurs, for example a call to an external API or when you return a result. + +To use the pay-per-event pricing model, you first need to [set it up](https://docs.apify.com/platform/actors/running/actors-in-store#pay-per-event) for your Actor in the Apify console. After that, you're free to start charging for events. + +## Charging for events + +After monetization is set in the Apify console, you can add `Actor.charge` calls to your code and start monetizing! + + +{ActorChargeSource} + + +Then you just push your code to Apify and that's it! The SDK will even keep track of the max total charge setting for you, so you will not provide more value than what the user chose to pay for. + +If you need finer control over charging, you can access call `Actor.get_charging_manager()` to access the `ChargingManager`, which can provide more detailed information - for example how many events of each type can be charged before reaching the configured limit. + +## Transitioning from a different pricing model + +When you plan to start using the pay-per-event pricing model for an Actor that is already monetized with a different pricing model, your source code will need support both pricing models during the transition period enforced by the Apify platform. Arguably the most frequent case is the transition from the pay-per-result model which utilizes the `ACTOR_MAX_PAID_DATASET_ITEMS` environment variable to prevent returning unpaid dataset items. The following is an example how to handle such scenarios. The key part is the `ChargingManager.get_pricing_info` method which returns information about the current pricing model. + + +{ConditionalActorChargeSource} + + +## Local development + +It is encouraged to test your monetization code on your machine before releasing it to the public. To tell your Actor that it should work in pay-per-event mode, pass it the `ACTOR_TEST_PAY_PER_EVENT` environment variable: + +```shell +ACTOR_TEST_PAY_PER_EVENT=true npm start +``` + +If you also wish to see a log of all the events charged throughout the run, the Apify SDK keeps a log of charged events in a so called charging dataset. Your charging dataset can be found under the `charging_log` name (unless you change your storage settings, this dataset is stored in `storage/datasets/charging_log/`). Please note that this log is not available when running the Actor in production on the Apify platform. + +Because pricing configuration is stored by the Apify platform, all events will have a default price of $1. diff --git a/uv.lock b/uv.lock index ce374801..6e690635 100644 --- a/uv.lock +++ b/uv.lock @@ -37,7 +37,7 @@ wheels = [ [[package]] name = "apify" -version = "2.3.0" +version = "2.3.1" source = { editable = "." } dependencies = [ { name = "apify-client" }, From c3e0a8b30cb0f90c73118fddac9f4add02985553 Mon Sep 17 00:00:00 2001 From: Jan Buchar Date: Tue, 25 Feb 2025 13:47:49 +0100 Subject: [PATCH 2/5] Add components --- website/src/components/ApiLink.jsx | 32 ++++++ website/src/components/Gradients.jsx | 20 ++++ website/src/components/Highlights.jsx | 104 ++++++++++++++++++ website/src/components/Highlights.module.css | 46 ++++++++ website/src/components/RunnableCodeBlock.jsx | 44 ++++++++ .../components/RunnableCodeBlock.module.css | 39 +++++++ 6 files changed, 285 insertions(+) create mode 100644 website/src/components/ApiLink.jsx create mode 100644 website/src/components/Gradients.jsx create mode 100644 website/src/components/Highlights.jsx create mode 100644 website/src/components/Highlights.module.css create mode 100644 website/src/components/RunnableCodeBlock.jsx create mode 100644 website/src/components/RunnableCodeBlock.module.css diff --git a/website/src/components/ApiLink.jsx b/website/src/components/ApiLink.jsx new file mode 100644 index 00000000..af50edc5 --- /dev/null +++ b/website/src/components/ApiLink.jsx @@ -0,0 +1,32 @@ +import React from 'react'; +import Link from '@docusaurus/Link'; +// eslint-disable-next-line import/no-extraneous-dependencies +import { useDocsVersion } from '@docusaurus/theme-common/internal'; +import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; + +// const pkg = require('../../../packages/crawlee/package.json'); +// +// const [v1, v2] = pkg.version.split('.'); +// const stable = [v1, v2].join('.'); + +const ApiLink = ({ to, children }) => { + return ( + {children} + ); + + // const version = useDocsVersion(); + // const { siteConfig } = useDocusaurusContext(); + // + // // if (siteConfig.presets[0][1].docs.disableVersioning || version.version === stable) { + // if (siteConfig.presets[0][1].docs.disableVersioning) { + // return ( + // {children} + // ); + // } + // + // return ( + // {children} + // ); +}; + +export default ApiLink; diff --git a/website/src/components/Gradients.jsx b/website/src/components/Gradients.jsx new file mode 100644 index 00000000..aadc0fe4 --- /dev/null +++ b/website/src/components/Gradients.jsx @@ -0,0 +1,20 @@ +import React from 'react'; + +export default function Gradients() { + return ( + + + + + + + + + + + + + ); +} diff --git a/website/src/components/Highlights.jsx b/website/src/components/Highlights.jsx new file mode 100644 index 00000000..f58ee1f5 --- /dev/null +++ b/website/src/components/Highlights.jsx @@ -0,0 +1,104 @@ +import React from 'react'; +import clsx from 'clsx'; +import styles from './Highlights.module.css'; +import Gradients from './Gradients'; + +const FeatureList = [ + { + title: 'Python with type hints', + Svg: require('../../static/img/features/runs-on-py.svg').default, + description: ( + <> + Crawlee for Python is written in a modern way using type hints, providing code completion in your IDE + and helping you catch bugs early on build time. + + ), + }, + // { + // title: 'HTTP scraping', + // Svg: require('../../static/img/features/fingerprints.svg').default, + // description: ( + // <> + // Crawlee makes HTTP requests that mimic browser headers and TLS fingerprints. + // It also rotates them automatically based on data about real-world traffic. Popular HTML + // parsers Cheerio  + // and JSDOM are included. + // + // ), + // }, + { + title: 'Headless browsers', + Svg: require('../../static/img/features/works-everywhere.svg').default, + description: ( + <> + Switch your crawlers from HTTP to a headless browser in 3 lines of code. + Crawlee builds on top of Playwright and adds its own features. Chrome, Firefox and more. + + ), + + // TODO: this is not true yet + // Crawlee builds on top of Playwright and adds its own anti-blocking features and human-like fingerprints. Chrome, Firefox and more. + }, + { + title: 'Automatic scaling and proxy management', + Svg: require('../../static/img/features/auto-scaling.svg').default, + description: ( + <> + Crawlee automatically manages concurrency based on available system resources and  + smartly rotates proxies. + Proxies that often time-out, return network errors or bad HTTP codes like 401 or 403 are discarded. + + ), + }, + // { + // title: 'Queue and Storage', + // Svg: require('../../static/img/features/storage.svg').default, + // description: ( + // <> + // You can save files, screenshots and JSON results to disk with one line of code + // or plug an adapter for your DB. Your URLs are kept in a queue that ensures their + // uniqueness and that you don't lose progress when something fails. + // + // ), + // }, + // { + // title: 'Helpful utils and configurability', + // Svg: require('../../static/img/features/node-requests.svg').default, + // description: ( + // <> + // Crawlee includes tools for extracting social handles or phone numbers, infinite scrolling, blocking + // unwanted assets and many more. It works great out of the box, but also provides  + // rich configuration options. + // + // ), + // }, +]; + +function Feature({ Svg, title, description }) { + return ( +
+
+
+ {Svg ? : null} +
+

{title}

+

{description}

+
+
+ ); +} + +export default function Highlights() { + return ( +
+ +
+
+ {FeatureList.map((props, idx) => ( + + ))} +
+
+
+ ); +} diff --git a/website/src/components/Highlights.module.css b/website/src/components/Highlights.module.css new file mode 100644 index 00000000..447d29da --- /dev/null +++ b/website/src/components/Highlights.module.css @@ -0,0 +1,46 @@ +.features { + display: flex; + align-items: center; + width: 100%; + font-size: 18px; + line-height: 32px; + color: #41465d; +} + +html[data-theme="dark"] .features { + color: #b3b8d2; +} + +.feature svg { + height: 60px; + width: 60px; +} + +.features svg path:nth-child(1) { + fill: url(#gradient-1) !important; +} + +.features svg path:nth-child(n + 1) { + fill: url(#gradient-2) !important; +} + +html[data-theme="dark"] .featureIcon { + background: #272c3d; +} + +.featureIcon { + display: flex; + justify-content: center; + align-items: center; + margin-bottom: 24px; + border-radius: 8px; + background-color: #f2f3fb; + width: 48px; + height: 48px; +} + +.features h3 { + font-weight: 700; + font-size: 18px; + line-height: 32px; +} diff --git a/website/src/components/RunnableCodeBlock.jsx b/website/src/components/RunnableCodeBlock.jsx new file mode 100644 index 00000000..c7b8e2d6 --- /dev/null +++ b/website/src/components/RunnableCodeBlock.jsx @@ -0,0 +1,44 @@ +import React from 'react'; +import clsx from 'clsx'; +import CodeBlock from '@theme/CodeBlock'; +import Link from '@docusaurus/Link'; +import styles from './RunnableCodeBlock.module.css'; + +const EXAMPLE_RUNNERS = { + playwright: '6i5QsHBMtm3hKph70', + puppeteer: '7tWSD8hrYzuc9Lte7', + cheerio: 'kk67IcZkKSSBTslXI', +}; + +const RunnableCodeBlock = ({ children, actor, hash, type, ...props }) => { + hash = hash ?? children.hash; + + if (!children.code) { + throw new Error(`RunnableCodeBlock requires "code" and "hash" props +Make sure you are importing the code block contents with the roa-loader.`); + } + + if (!hash) { + return ( + + { children.code } + + ); + } + + const href = `https://console.apify.com/actors/${actor ?? EXAMPLE_RUNNERS[type ?? 'playwright']}?runConfig=${hash}&asrc=run_on_apify`; + + return ( +
+ + Run on + + + + { children.code } + +
+ ); +}; + +export default RunnableCodeBlock; diff --git a/website/src/components/RunnableCodeBlock.module.css b/website/src/components/RunnableCodeBlock.module.css new file mode 100644 index 00000000..5cbeabc5 --- /dev/null +++ b/website/src/components/RunnableCodeBlock.module.css @@ -0,0 +1,39 @@ +.button { + display: inline-block; + padding: 3px 10px; + position: absolute; + top: 9px; + right: 9px; + z-index: 1; + font-size: 16px; + line-height: 28px; + background: var(--prism-background-color); + color: var(--prism-color); + border: 1px solid var(--ifm-color-emphasis-300); + border-radius: var(--ifm-global-radius); + opacity: 0.7; + font-weight: 600; + width: 155px; +} + +@media screen and (max-width: 768px) { + .button { + display: none; + } +} + +.button svg { + height: 20px; + position: absolute; + top: 7.5px; + right: 0; +} + +.button:hover { + opacity: 1; + color: var(--prism-color); +} + +.container { + position: relative; +} From 326ef1bbc8d6957470f6eecfdff2b56d1d9bbe72 Mon Sep 17 00:00:00 2001 From: Jan Buchar Date: Thu, 6 Mar 2025 14:30:32 +0100 Subject: [PATCH 3/5] Update docs/02_guides/pay_per_event.mdx Co-authored-by: Vlada Dusek --- docs/02_guides/pay_per_event.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/02_guides/pay_per_event.mdx b/docs/02_guides/pay_per_event.mdx index 7482f10d..f7e5f551 100644 --- a/docs/02_guides/pay_per_event.mdx +++ b/docs/02_guides/pay_per_event.mdx @@ -1,6 +1,6 @@ --- id: pay-per-event -title: Pay-per-event Monetization +title: Pay-per-event monetization description: Monetize your Actors using the pay-per-event pricing model --- From c92e6e363391f8aa1206958d5e86e06264c2d0ca Mon Sep 17 00:00:00 2001 From: Jan Buchar Date: Thu, 6 Mar 2025 17:34:25 +0100 Subject: [PATCH 4/5] Address review feedback --- .../pay_per_event.mdx => 03_concepts/12_pay_per_event.mdx} | 2 +- docs/{02_guides => 03_concepts}/code/actor_charge.py | 0 .../{02_guides => 03_concepts}/code/conditional_actor_charge.py | 0 website/src/components/ApiLink.jsx | 2 +- 4 files changed, 2 insertions(+), 2 deletions(-) rename docs/{02_guides/pay_per_event.mdx => 03_concepts/12_pay_per_event.mdx} (98%) rename docs/{02_guides => 03_concepts}/code/actor_charge.py (100%) rename docs/{02_guides => 03_concepts}/code/conditional_actor_charge.py (100%) diff --git a/docs/02_guides/pay_per_event.mdx b/docs/03_concepts/12_pay_per_event.mdx similarity index 98% rename from docs/02_guides/pay_per_event.mdx rename to docs/03_concepts/12_pay_per_event.mdx index f7e5f551..01cf922c 100644 --- a/docs/02_guides/pay_per_event.mdx +++ b/docs/03_concepts/12_pay_per_event.mdx @@ -38,7 +38,7 @@ When you plan to start using the pay-per-event pricing model for an Actor that i It is encouraged to test your monetization code on your machine before releasing it to the public. To tell your Actor that it should work in pay-per-event mode, pass it the `ACTOR_TEST_PAY_PER_EVENT` environment variable: ```shell -ACTOR_TEST_PAY_PER_EVENT=true npm start +ACTOR_TEST_PAY_PER_EVENT=true python -m youractor ``` If you also wish to see a log of all the events charged throughout the run, the Apify SDK keeps a log of charged events in a so called charging dataset. Your charging dataset can be found under the `charging_log` name (unless you change your storage settings, this dataset is stored in `storage/datasets/charging_log/`). Please note that this log is not available when running the Actor in production on the Apify platform. diff --git a/docs/02_guides/code/actor_charge.py b/docs/03_concepts/code/actor_charge.py similarity index 100% rename from docs/02_guides/code/actor_charge.py rename to docs/03_concepts/code/actor_charge.py diff --git a/docs/02_guides/code/conditional_actor_charge.py b/docs/03_concepts/code/conditional_actor_charge.py similarity index 100% rename from docs/02_guides/code/conditional_actor_charge.py rename to docs/03_concepts/code/conditional_actor_charge.py diff --git a/website/src/components/ApiLink.jsx b/website/src/components/ApiLink.jsx index af50edc5..ffe4e1a8 100644 --- a/website/src/components/ApiLink.jsx +++ b/website/src/components/ApiLink.jsx @@ -11,7 +11,7 @@ import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; const ApiLink = ({ to, children }) => { return ( - {children} + {children} ); // const version = useDocsVersion(); From d099e173c372b820de968fa183daca0654f93771 Mon Sep 17 00:00:00 2001 From: Jan Buchar Date: Fri, 7 Mar 2025 12:06:13 +0100 Subject: [PATCH 5/5] review feedback --- docs/03_concepts/12_pay_per_event.mdx | 2 +- website/src/components/ApiLink.jsx | 22 ---------------------- 2 files changed, 1 insertion(+), 23 deletions(-) diff --git a/docs/03_concepts/12_pay_per_event.mdx b/docs/03_concepts/12_pay_per_event.mdx index 01cf922c..0d8edbbc 100644 --- a/docs/03_concepts/12_pay_per_event.mdx +++ b/docs/03_concepts/12_pay_per_event.mdx @@ -27,7 +27,7 @@ If you need finer control over charging, you can access call `ChargingManager.get_pricing_info` method which returns information about the current pricing model. +When you plan to start using the pay-per-event pricing model for an Actor that is already monetized with a different pricing model, your source code will need support both pricing models during the transition period enforced by the Apify platform. Arguably the most frequent case is the transition from the pay-per-result model which utilizes the `ACTOR_MAX_PAID_DATASET_ITEMS` environment variable to prevent returning unpaid dataset items. The following is an example how to handle such scenarios. The key part is the `ChargingManager.get_pricing_info()` method which returns information about the current pricing model. {ConditionalActorChargeSource} diff --git a/website/src/components/ApiLink.jsx b/website/src/components/ApiLink.jsx index ffe4e1a8..44bba352 100644 --- a/website/src/components/ApiLink.jsx +++ b/website/src/components/ApiLink.jsx @@ -1,32 +1,10 @@ import React from 'react'; import Link from '@docusaurus/Link'; -// eslint-disable-next-line import/no-extraneous-dependencies -import { useDocsVersion } from '@docusaurus/theme-common/internal'; -import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; - -// const pkg = require('../../../packages/crawlee/package.json'); -// -// const [v1, v2] = pkg.version.split('.'); -// const stable = [v1, v2].join('.'); const ApiLink = ({ to, children }) => { return ( {children} ); - - // const version = useDocsVersion(); - // const { siteConfig } = useDocusaurusContext(); - // - // // if (siteConfig.presets[0][1].docs.disableVersioning || version.version === stable) { - // if (siteConfig.presets[0][1].docs.disableVersioning) { - // return ( - // {children} - // ); - // } - // - // return ( - // {children} - // ); }; export default ApiLink;