diff --git a/docs/03_concepts/01_actor_lifecycle.mdx b/docs/02_concepts/01_actor_lifecycle.mdx
similarity index 100%
rename from docs/03_concepts/01_actor_lifecycle.mdx
rename to docs/02_concepts/01_actor_lifecycle.mdx
diff --git a/docs/03_concepts/02_actor_input.mdx b/docs/02_concepts/02_actor_input.mdx
similarity index 100%
rename from docs/03_concepts/02_actor_input.mdx
rename to docs/02_concepts/02_actor_input.mdx
diff --git a/docs/03_concepts/03_storages.mdx b/docs/02_concepts/03_storages.mdx
similarity index 100%
rename from docs/03_concepts/03_storages.mdx
rename to docs/02_concepts/03_storages.mdx
diff --git a/docs/03_concepts/04_actor_events.mdx b/docs/02_concepts/04_actor_events.mdx
similarity index 100%
rename from docs/03_concepts/04_actor_events.mdx
rename to docs/02_concepts/04_actor_events.mdx
diff --git a/docs/03_concepts/05_proxy_management.mdx b/docs/02_concepts/05_proxy_management.mdx
similarity index 100%
rename from docs/03_concepts/05_proxy_management.mdx
rename to docs/02_concepts/05_proxy_management.mdx
diff --git a/docs/03_concepts/06_interacting_with_other_actors.mdx b/docs/02_concepts/06_interacting_with_other_actors.mdx
similarity index 100%
rename from docs/03_concepts/06_interacting_with_other_actors.mdx
rename to docs/02_concepts/06_interacting_with_other_actors.mdx
diff --git a/docs/03_concepts/07_webhooks.mdx b/docs/02_concepts/07_webhooks.mdx
similarity index 100%
rename from docs/03_concepts/07_webhooks.mdx
rename to docs/02_concepts/07_webhooks.mdx
diff --git a/docs/03_concepts/08_access_apify_api.mdx b/docs/02_concepts/08_access_apify_api.mdx
similarity index 100%
rename from docs/03_concepts/08_access_apify_api.mdx
rename to docs/02_concepts/08_access_apify_api.mdx
diff --git a/docs/03_concepts/09_running_webserver.mdx b/docs/02_concepts/09_running_webserver.mdx
similarity index 100%
rename from docs/03_concepts/09_running_webserver.mdx
rename to docs/02_concepts/09_running_webserver.mdx
diff --git a/docs/03_concepts/10_logging.mdx b/docs/02_concepts/10_logging.mdx
similarity index 100%
rename from docs/03_concepts/10_logging.mdx
rename to docs/02_concepts/10_logging.mdx
diff --git a/docs/03_concepts/11_configuration.mdx b/docs/02_concepts/11_configuration.mdx
similarity index 100%
rename from docs/03_concepts/11_configuration.mdx
rename to docs/02_concepts/11_configuration.mdx
diff --git a/docs/03_concepts/12_pay_per_event.mdx b/docs/02_concepts/12_pay_per_event.mdx
similarity index 100%
rename from docs/03_concepts/12_pay_per_event.mdx
rename to docs/02_concepts/12_pay_per_event.mdx
diff --git a/docs/03_concepts/code/01_context_manager.py b/docs/02_concepts/code/01_context_manager.py
similarity index 100%
rename from docs/03_concepts/code/01_context_manager.py
rename to docs/02_concepts/code/01_context_manager.py
diff --git a/docs/03_concepts/code/01_init_exit.py b/docs/02_concepts/code/01_init_exit.py
similarity index 100%
rename from docs/03_concepts/code/01_init_exit.py
rename to docs/02_concepts/code/01_init_exit.py
diff --git a/docs/03_concepts/code/01_reboot.py b/docs/02_concepts/code/01_reboot.py
similarity index 100%
rename from docs/03_concepts/code/01_reboot.py
rename to docs/02_concepts/code/01_reboot.py
diff --git a/docs/03_concepts/code/01_status_message.py b/docs/02_concepts/code/01_status_message.py
similarity index 100%
rename from docs/03_concepts/code/01_status_message.py
rename to docs/02_concepts/code/01_status_message.py
diff --git a/docs/03_concepts/code/02_input.py b/docs/02_concepts/code/02_input.py
similarity index 100%
rename from docs/03_concepts/code/02_input.py
rename to docs/02_concepts/code/02_input.py
diff --git a/docs/03_concepts/code/03_dataset_exports.py b/docs/02_concepts/code/03_dataset_exports.py
similarity index 100%
rename from docs/03_concepts/code/03_dataset_exports.py
rename to docs/02_concepts/code/03_dataset_exports.py
diff --git a/docs/03_concepts/code/03_dataset_read_write.py b/docs/02_concepts/code/03_dataset_read_write.py
similarity index 100%
rename from docs/03_concepts/code/03_dataset_read_write.py
rename to docs/02_concepts/code/03_dataset_read_write.py
diff --git a/docs/03_concepts/code/03_deleting_storages.py b/docs/02_concepts/code/03_deleting_storages.py
similarity index 100%
rename from docs/03_concepts/code/03_deleting_storages.py
rename to docs/02_concepts/code/03_deleting_storages.py
diff --git a/docs/03_concepts/code/03_kvs_iterating.py b/docs/02_concepts/code/03_kvs_iterating.py
similarity index 100%
rename from docs/03_concepts/code/03_kvs_iterating.py
rename to docs/02_concepts/code/03_kvs_iterating.py
diff --git a/docs/03_concepts/code/03_kvs_public_url.py b/docs/02_concepts/code/03_kvs_public_url.py
similarity index 100%
rename from docs/03_concepts/code/03_kvs_public_url.py
rename to docs/02_concepts/code/03_kvs_public_url.py
diff --git a/docs/03_concepts/code/03_kvs_read_write.py b/docs/02_concepts/code/03_kvs_read_write.py
similarity index 100%
rename from docs/03_concepts/code/03_kvs_read_write.py
rename to docs/02_concepts/code/03_kvs_read_write.py
diff --git a/docs/03_concepts/code/03_opening_storages.py b/docs/02_concepts/code/03_opening_storages.py
similarity index 100%
rename from docs/03_concepts/code/03_opening_storages.py
rename to docs/02_concepts/code/03_opening_storages.py
diff --git a/docs/03_concepts/code/03_rq.py b/docs/02_concepts/code/03_rq.py
similarity index 100%
rename from docs/03_concepts/code/03_rq.py
rename to docs/02_concepts/code/03_rq.py
diff --git a/docs/03_concepts/code/04_actor_events.py b/docs/02_concepts/code/04_actor_events.py
similarity index 100%
rename from docs/03_concepts/code/04_actor_events.py
rename to docs/02_concepts/code/04_actor_events.py
diff --git a/docs/03_concepts/code/05_apify_proxy.py b/docs/02_concepts/code/05_apify_proxy.py
similarity index 100%
rename from docs/03_concepts/code/05_apify_proxy.py
rename to docs/02_concepts/code/05_apify_proxy.py
diff --git a/docs/03_concepts/code/05_apify_proxy_config.py b/docs/02_concepts/code/05_apify_proxy_config.py
similarity index 100%
rename from docs/03_concepts/code/05_apify_proxy_config.py
rename to docs/02_concepts/code/05_apify_proxy_config.py
diff --git a/docs/03_concepts/code/05_custom_proxy.py b/docs/02_concepts/code/05_custom_proxy.py
similarity index 100%
rename from docs/03_concepts/code/05_custom_proxy.py
rename to docs/02_concepts/code/05_custom_proxy.py
diff --git a/docs/03_concepts/code/05_custom_proxy_function.py b/docs/02_concepts/code/05_custom_proxy_function.py
similarity index 100%
rename from docs/03_concepts/code/05_custom_proxy_function.py
rename to docs/02_concepts/code/05_custom_proxy_function.py
diff --git a/docs/03_concepts/code/05_proxy_actor_input.py b/docs/02_concepts/code/05_proxy_actor_input.py
similarity index 100%
rename from docs/03_concepts/code/05_proxy_actor_input.py
rename to docs/02_concepts/code/05_proxy_actor_input.py
diff --git a/docs/03_concepts/code/05_proxy_httpx.py b/docs/02_concepts/code/05_proxy_httpx.py
similarity index 100%
rename from docs/03_concepts/code/05_proxy_httpx.py
rename to docs/02_concepts/code/05_proxy_httpx.py
diff --git a/docs/03_concepts/code/05_proxy_rotation.py b/docs/02_concepts/code/05_proxy_rotation.py
similarity index 100%
rename from docs/03_concepts/code/05_proxy_rotation.py
rename to docs/02_concepts/code/05_proxy_rotation.py
diff --git a/docs/03_concepts/code/06_interacting_call.py b/docs/02_concepts/code/06_interacting_call.py
similarity index 100%
rename from docs/03_concepts/code/06_interacting_call.py
rename to docs/02_concepts/code/06_interacting_call.py
diff --git a/docs/03_concepts/code/06_interacting_call_task.py b/docs/02_concepts/code/06_interacting_call_task.py
similarity index 100%
rename from docs/03_concepts/code/06_interacting_call_task.py
rename to docs/02_concepts/code/06_interacting_call_task.py
diff --git a/docs/03_concepts/code/06_interacting_metamorph.py b/docs/02_concepts/code/06_interacting_metamorph.py
similarity index 100%
rename from docs/03_concepts/code/06_interacting_metamorph.py
rename to docs/02_concepts/code/06_interacting_metamorph.py
diff --git a/docs/03_concepts/code/06_interacting_start.py b/docs/02_concepts/code/06_interacting_start.py
similarity index 100%
rename from docs/03_concepts/code/06_interacting_start.py
rename to docs/02_concepts/code/06_interacting_start.py
diff --git a/docs/03_concepts/code/07_webhook.py b/docs/02_concepts/code/07_webhook.py
similarity index 100%
rename from docs/03_concepts/code/07_webhook.py
rename to docs/02_concepts/code/07_webhook.py
diff --git a/docs/03_concepts/code/07_webhook_preventing.py b/docs/02_concepts/code/07_webhook_preventing.py
similarity index 100%
rename from docs/03_concepts/code/07_webhook_preventing.py
rename to docs/02_concepts/code/07_webhook_preventing.py
diff --git a/docs/03_concepts/code/08_actor_client.py b/docs/02_concepts/code/08_actor_client.py
similarity index 100%
rename from docs/03_concepts/code/08_actor_client.py
rename to docs/02_concepts/code/08_actor_client.py
diff --git a/docs/03_concepts/code/08_actor_new_client.py b/docs/02_concepts/code/08_actor_new_client.py
similarity index 100%
rename from docs/03_concepts/code/08_actor_new_client.py
rename to docs/02_concepts/code/08_actor_new_client.py
diff --git a/docs/03_concepts/code/09_webserver.py b/docs/02_concepts/code/09_webserver.py
similarity index 100%
rename from docs/03_concepts/code/09_webserver.py
rename to docs/02_concepts/code/09_webserver.py
diff --git a/docs/03_concepts/code/10_log_config.py b/docs/02_concepts/code/10_log_config.py
similarity index 100%
rename from docs/03_concepts/code/10_log_config.py
rename to docs/02_concepts/code/10_log_config.py
diff --git a/docs/03_concepts/code/10_logger_usage.py b/docs/02_concepts/code/10_logger_usage.py
similarity index 100%
rename from docs/03_concepts/code/10_logger_usage.py
rename to docs/02_concepts/code/10_logger_usage.py
diff --git a/docs/03_concepts/code/10_redirect_log.py b/docs/02_concepts/code/10_redirect_log.py
similarity index 100%
rename from docs/03_concepts/code/10_redirect_log.py
rename to docs/02_concepts/code/10_redirect_log.py
diff --git a/docs/03_concepts/code/10_redirect_log_existing_run.py b/docs/02_concepts/code/10_redirect_log_existing_run.py
similarity index 100%
rename from docs/03_concepts/code/10_redirect_log_existing_run.py
rename to docs/02_concepts/code/10_redirect_log_existing_run.py
diff --git a/docs/03_concepts/code/11_config.py b/docs/02_concepts/code/11_config.py
similarity index 100%
rename from docs/03_concepts/code/11_config.py
rename to docs/02_concepts/code/11_config.py
diff --git a/docs/03_concepts/code/actor_charge.py b/docs/02_concepts/code/actor_charge.py
similarity index 100%
rename from docs/03_concepts/code/actor_charge.py
rename to docs/02_concepts/code/actor_charge.py
diff --git a/docs/03_concepts/code/conditional_actor_charge.py b/docs/02_concepts/code/conditional_actor_charge.py
similarity index 100%
rename from docs/03_concepts/code/conditional_actor_charge.py
rename to docs/02_concepts/code/conditional_actor_charge.py
diff --git a/docs/02_guides/01_beautifulsoup_httpx.mdx b/docs/02_guides/01_beautifulsoup_httpx.mdx
deleted file mode 100644
index 4ecabd6e..00000000
--- a/docs/02_guides/01_beautifulsoup_httpx.mdx
+++ /dev/null
@@ -1,30 +0,0 @@
----
-id: beautifulsoup-httpx
-title: Using BeautifulSoup with HTTPX
----
-
-import CodeBlock from '@theme/CodeBlock';
-
-import BeautifulSoupHttpxExample from '!!raw-loader!./code/01_beautifulsoup_httpx.py';
-
-In this guide, you'll learn how to use the [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/) library with the [HTTPX](https://www.python-httpx.org/) library in your Apify Actors.
-
-## Introduction
-
-`BeautifulSoup` is a Python library for extracting data from HTML and XML files. It provides simple methods and Pythonic idioms for navigating, searching, and modifying a website's element tree, enabling efficient data extraction.
-
-`HTTPX` is a modern, high-level HTTP client library for Python. It provides a simple interface for making HTTP requests and supports both synchronous and asynchronous requests.
-
-To create an `Actor` which uses those libraries, start from the [BeautifulSoup & Python](https://apify.com/templates/categories/python) Actor template. This template includes the `BeautifulSoup` and `HTTPX` libraries preinstalled, allowing you to begin development immediately.
-
-## Example Actor
-
-Below is a simple Actor that recursively scrapes titles from all linked websites, up to a specified maximum depth, starting from URLs provided in the Actor input. It uses `HTTPX` for fetching pages and `BeautifulSoup` for parsing their content to extract titles and links to other pages.
-
-
- {BeautifulSoupHttpxExample}
-
-
-## Conclusion
-
-In this guide, you learned how to use the `BeautifulSoup` with the `HTTPX` in your Apify Actors. By combining these libraries, you can efficiently extract data from HTML or XML files, making it easy to build web scraping tasks in Python. See the [Actor templates](https://apify.com/templates/categories/python) to get started with your own scraping tasks. If you have questions or need assistance, feel free to reach out on our [GitHub](https://github.com/apify/apify-sdk-python) or join our [Discord community](https://discord.com/invite/jyEM2PRvMU). Happy scraping!
diff --git a/docs/02_guides/02_crawlee.mdx b/docs/02_guides/02_crawlee.mdx
deleted file mode 100644
index b040cad2..00000000
--- a/docs/02_guides/02_crawlee.mdx
+++ /dev/null
@@ -1,37 +0,0 @@
----
-id: crawlee
-title: Using Crawlee
----
-
-import CodeBlock from '@theme/CodeBlock';
-
-import CrawleeBeautifulSoupExample from '!!raw-loader!./code/02_crawlee_beautifulsoup.py';
-import CrawleePlaywrightExample from '!!raw-loader!./code/02_crawlee_playwright.py';
-
-In this guide you'll learn how to use the [Crawlee](https://crawlee.dev/python) library in your Apify Actors.
-
-## Introduction
-
-`Crawlee` is a Python library for web scraping and browser automation that provides a robust and flexible framework for building web scraping tasks. It seamlessly integrates with the Apify platform and supports a variety of scraping techniques, from static HTML parsing to dynamic JavaScript-rendered content handling. Crawlee offers a range of crawlers, including HTTP-based crawlers like [`HttpCrawler`](https://crawlee.dev/python/api/class/HttpCrawler), [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler) and [`ParselCrawler`](https://crawlee.dev/python/api/class/ParselCrawler), and browser-based crawlers like [`PlaywrightCrawler`](https://crawlee.dev/python/api/class/PlaywrightCrawler), to suit different scraping needs.
-
-In this guide, you'll learn how to use Crawlee with `BeautifulSoupCrawler` and `PlaywrightCrawler` to build Apify Actors for web scraping.
-
-## Actor with BeautifulSoupCrawler
-
-The `BeautifulSoupCrawler` is ideal for extracting data from static HTML pages. It uses `BeautifulSoup` for parsing and [`HttpxHttpClient`](https://crawlee.dev/python/api/class/HttpxHttpClient) for HTTP communication, ensuring efficient and lightweight scraping. If you do not need to execute JavaScript on the page, `BeautifulSoupCrawler` is a great choice for your scraping tasks. Below is an example of how to use `BeautifulSoupCrawler` in an Apify Actor.
-
-
- {CrawleeBeautifulSoupExample}
-
-
-## Actor with PlaywrightCrawler
-
-The `PlaywrightCrawler` is built for handling dynamic web pages that rely on JavaScript for content generation. Using the [Playwright](https://playwright.dev/) library, it provides a browser-based automation environment to interact with complex websites. Below is an example of how to use `PlaywrightCrawler` in an Apify Actor.
-
-
- {CrawleePlaywrightExample}
-
-
-## Conclusion
-
-In this guide, you learned how to use the `Crawlee` library in your Apify Actors. By using the `BeautifulSoupCrawler` and `PlaywrightCrawler` crawlers, you can efficiently scrape static or dynamic web pages, making it easy to build web scraping tasks in Python. See the [Actor templates](https://apify.com/templates/categories/python) to get started with your own scraping tasks. If you have questions or need assistance, feel free to reach out on our [GitHub](https://github.com/apify/apify-sdk-python) or join our [Discord community](https://discord.com/invite/jyEM2PRvMU). Happy scraping!
diff --git a/docs/02_guides/code/02_crawlee_beautifulsoup.py b/docs/02_guides/code/02_crawlee_beautifulsoup.py
deleted file mode 100644
index e2dba8a1..00000000
--- a/docs/02_guides/code/02_crawlee_beautifulsoup.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from __future__ import annotations
-
-from crawlee.crawlers import BeautifulSoupCrawler, BeautifulSoupCrawlingContext
-
-from apify import Actor
-
-
-async def main() -> None:
- # Enter the context of the Actor.
- async with Actor:
- # Retrieve the Actor input, and use default values if not provided.
- actor_input = await Actor.get_input() or {}
- start_urls = [
- url.get('url')
- for url in actor_input.get(
- 'start_urls',
- [{'url': 'https://apify.com'}],
- )
- ]
-
- # Exit if no start URLs are provided.
- if not start_urls:
- Actor.log.info('No start URLs specified in Actor input, exiting...')
- await Actor.exit()
-
- # Create a crawler.
- crawler = BeautifulSoupCrawler(
- # Limit the crawl to max requests.
- # Remove or increase it for crawling all links.
- max_requests_per_crawl=50,
- )
-
- # Define a request handler, which will be called for every request.
- @crawler.router.default_handler
- async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
- url = context.request.url
- Actor.log.info(f'Scraping {url}...')
-
- # Extract the desired data.
- data = {
- 'url': context.request.url,
- 'title': context.soup.title.string if context.soup.title else None,
- 'h1s': [h1.text for h1 in context.soup.find_all('h1')],
- 'h2s': [h2.text for h2 in context.soup.find_all('h2')],
- 'h3s': [h3.text for h3 in context.soup.find_all('h3')],
- }
-
- # Store the extracted data to the default dataset.
- await context.push_data(data)
-
- # Enqueue additional links found on the current page.
- await context.enqueue_links()
-
- # Run the crawler with the starting requests.
- await crawler.run(start_urls)
diff --git a/docs/02_guides/code/02_crawlee_playwright.py b/docs/02_guides/code/02_crawlee_playwright.py
deleted file mode 100644
index 2f0f110f..00000000
--- a/docs/02_guides/code/02_crawlee_playwright.py
+++ /dev/null
@@ -1,68 +0,0 @@
-from __future__ import annotations
-
-from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
-
-from apify import Actor
-
-
-async def main() -> None:
- # Enter the context of the Actor.
- async with Actor:
- # Retrieve the Actor input, and use default values if not provided.
- actor_input = await Actor.get_input() or {}
- start_urls = [
- url.get('url')
- for url in actor_input.get(
- 'start_urls',
- [{'url': 'https://apify.com'}],
- )
- ]
-
- # Exit if no start URLs are provided.
- if not start_urls:
- Actor.log.info('No start URLs specified in Actor input, exiting...')
- await Actor.exit()
-
- # Create a crawler.
- crawler = PlaywrightCrawler(
- # Limit the crawl to max requests.
- # Remove or increase it for crawling all links.
- max_requests_per_crawl=50,
- headless=True,
- browser_launch_options={
- 'args': ['--disable-gpu'],
- },
- )
-
- # Define a request handler, which will be called for every request.
- @crawler.router.default_handler
- async def request_handler(context: PlaywrightCrawlingContext) -> None:
- url = context.request.url
- Actor.log.info(f'Scraping {url}...')
-
- # Extract the desired data.
- data = {
- 'url': context.request.url,
- 'title': await context.page.title(),
- 'h1s': [
- await h1.text_content()
- for h1 in await context.page.locator('h1').all()
- ],
- 'h2s': [
- await h2.text_content()
- for h2 in await context.page.locator('h2').all()
- ],
- 'h3s': [
- await h3.text_content()
- for h3 in await context.page.locator('h3').all()
- ],
- }
-
- # Store the extracted data to the default dataset.
- await context.push_data(data)
-
- # Enqueue additional links found on the current page.
- await context.enqueue_links()
-
- # Run the crawler with the starting requests.
- await crawler.run(start_urls)
diff --git a/docs/03_guides/01_beautifulsoup_httpx.mdx b/docs/03_guides/01_beautifulsoup_httpx.mdx
new file mode 100644
index 00000000..b6a69c01
--- /dev/null
+++ b/docs/03_guides/01_beautifulsoup_httpx.mdx
@@ -0,0 +1,30 @@
+---
+id: beautifulsoup-httpx
+title: Using BeautifulSoup with HTTPX
+---
+
+import CodeBlock from '@theme/CodeBlock';
+
+import BeautifulSoupHttpxExample from '!!raw-loader!./code/01_beautifulsoup_httpx.py';
+
+In this guide, you'll learn how to use the [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/) library with the [HTTPX](https://www.python-httpx.org/) library in your Apify Actors.
+
+## Introduction
+
+[BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/) is a Python library for extracting data from HTML and XML files. It provides simple methods and Pythonic idioms for navigating, searching, and modifying a website's element tree, enabling efficient data extraction.
+
+[HTTPX](https://www.python-httpx.org/) is a modern, high-level HTTP client library for Python. It provides a simple interface for making HTTP requests and supports both synchronous and asynchronous requests.
+
+To create an Actor which uses those libraries, start from the [BeautifulSoup & Python](https://apify.com/templates/categories/python) Actor template. This template includes the [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/) and [HTTPX](https://www.python-httpx.org/) libraries preinstalled, allowing you to begin development immediately.
+
+## Example Actor
+
+Below is a simple Actor that recursively scrapes titles from all linked websites, up to a specified maximum depth, starting from URLs provided in the Actor input. It uses [HTTPX](https://www.python-httpx.org/) for fetching pages and [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/) for parsing their content to extract titles and links to other pages.
+
+
+ {BeautifulSoupHttpxExample}
+
+
+## Conclusion
+
+In this guide, you learned how to use the [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/) with the [HTTPX](https://www.python-httpx.org/) in your Apify Actors. By combining these libraries, you can efficiently extract data from HTML or XML files, making it easy to build web scraping tasks in Python. See the [Actor templates](https://apify.com/templates/categories/python) to get started with your own scraping tasks. If you have questions or need assistance, feel free to reach out on our [GitHub](https://github.com/apify/apify-sdk-python) or join our [Discord community](https://discord.com/invite/jyEM2PRvMU). Happy scraping!
diff --git a/docs/03_guides/02_parsel_impit.mdx b/docs/03_guides/02_parsel_impit.mdx
new file mode 100644
index 00000000..2ac4d610
--- /dev/null
+++ b/docs/03_guides/02_parsel_impit.mdx
@@ -0,0 +1,28 @@
+---
+id: parsel-impit
+title: Using Parsel with Impit
+---
+
+import CodeBlock from '@theme/CodeBlock';
+
+import ParselImpitExample from '!!raw-loader!./code/02_parsel_impit.py';
+
+In this guide, you'll learn how to combine the [Parsel](https://github.com/scrapy/parsel) and [Impit](https://github.com/apify/impit) libraries when building Apify Actors.
+
+## Introduction
+
+[Parsel](https://github.com/scrapy/parsel) is a Python library for extracting data from HTML and XML documents using CSS selectors and [XPath](https://en.wikipedia.org/wiki/XPath) expressions. It offers an intuitive API for navigating and extracting structured data, making it a popular choice for web scraping. Compared to [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/), it also delivers better performance.
+
+[Impit](https://github.com/apify/impit) is Apify's high-performance HTTP client for Python. It supports both synchronous and asynchronous workflows and is built for large-scale web scraping, where making thousands of requests efficiently is essential. With built-in browser impersonation and anti-blocking features, it simplifies handling modern websites.
+
+## Example Actor
+
+The following example shows a simple Actor that recursively scrapes titles from linked pages, up to a user-defined maximum depth. It uses [Impit](https://github.com/apify/impit) to fetch pages and [Parsel](https://github.com/scrapy/parsel) to extract titles and discover new links.
+
+
+ {ParselImpitExample}
+
+
+## Conclusion
+
+In this guide, you learned how to use [Parsel](https://github.com/scrapy/parsel) with [Impit](https://github.com/apify/impit) in your Apify Actors. By combining these libraries, you get a powerful and efficient solution for web scraping: [Parsel](https://github.com/scrapy/parsel) provides excellent CSS selector and XPath support for data extraction, while [Impit](https://github.com/apify/impit) offers a fast and simple HTTP client built by Apify. This combination makes it easy to build scalable web scraping tasks in Python. See the [Actor templates](https://apify.com/templates/categories/python) to get started with your own scraping tasks. If you have questions or need assistance, feel free to reach out on our [GitHub](https://github.com/apify/apify-sdk-python) or join our [Discord community](https://discord.com/invite/jyEM2PRvMU). Happy scraping!
diff --git a/docs/02_guides/03_playwright.mdx b/docs/03_guides/03_playwright.mdx
similarity index 100%
rename from docs/02_guides/03_playwright.mdx
rename to docs/03_guides/03_playwright.mdx
diff --git a/docs/02_guides/04_selenium.mdx b/docs/03_guides/04_selenium.mdx
similarity index 100%
rename from docs/02_guides/04_selenium.mdx
rename to docs/03_guides/04_selenium.mdx
diff --git a/docs/03_guides/05_crawlee.mdx b/docs/03_guides/05_crawlee.mdx
new file mode 100644
index 00000000..6b513417
--- /dev/null
+++ b/docs/03_guides/05_crawlee.mdx
@@ -0,0 +1,46 @@
+---
+id: crawlee
+title: Using Crawlee
+---
+
+import CodeBlock from '@theme/CodeBlock';
+
+import CrawleeBeautifulSoupExample from '!!raw-loader!./code/05_crawlee_beautifulsoup.py';
+import CrawleeParselExample from '!!raw-loader!./code/05_crawlee_parsel.py';
+import CrawleePlaywrightExample from '!!raw-loader!./code/05_crawlee_playwright.py';
+
+In this guide you'll learn how to use the [Crawlee](https://crawlee.dev/python) library in your Apify Actors.
+
+## Introduction
+
+[Crawlee](https://crawlee.dev/python) is a Python library for web scraping and browser automation that provides a robust and flexible framework for building web scraping tasks. It seamlessly integrates with the Apify platform and supports a variety of scraping techniques, from static HTML parsing to dynamic JavaScript-rendered content handling. Crawlee offers a range of crawlers, including HTTP-based crawlers like [`HttpCrawler`](https://crawlee.dev/python/api/class/HttpCrawler), [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler) and [`ParselCrawler`](https://crawlee.dev/python/api/class/ParselCrawler), and browser-based crawlers like [`PlaywrightCrawler`](https://crawlee.dev/python/api/class/PlaywrightCrawler), to suit different scraping needs.
+
+In this guide, you'll learn how to use Crawlee with [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler), [`ParselCrawler`](https://crawlee.dev/python/api/class/ParselCrawler), and [`PlaywrightCrawler`](https://crawlee.dev/python/api/class/PlaywrightCrawler) to build Apify Actors for web scraping.
+
+## Actor with BeautifulSoupCrawler
+
+The [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler) is ideal for extracting data from static HTML pages. It uses [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/bs4/doc/) for parsing and [`ImpitHttpClient`](https://crawlee.dev/python/api/class/ImpitHttpClient) for HTTP communication, ensuring efficient and lightweight scraping. If you do not need to execute JavaScript on the page, [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler) is a great choice for your scraping tasks. Below is an example of how to use it` in an Apify Actor.
+
+
+ {CrawleeBeautifulSoupExample}
+
+
+## Actor with ParselCrawler
+
+The [`ParselCrawler`](https://crawlee.dev/python/api/class/ParselCrawler) works in the same way as [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler), but it uses the [Parsel](https://parsel.readthedocs.io/en/latest/) library for HTML parsing. This allows for more powerful and flexible data extraction using [XPath](https://en.wikipedia.org/wiki/XPath) selectors. It should be faster than [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler). Below is an example of how to use [`ParselCrawler`](https://crawlee.dev/python/api/class/ParselCrawler) in an Apify Actor.
+
+
+ {CrawleeParselExample}
+
+
+## Actor with PlaywrightCrawler
+
+The [`PlaywrightCrawler`](https://crawlee.dev/python/api/class/PlaywrightCrawler) is built for handling dynamic web pages that rely on JavaScript for content rendering. Using the [Playwright](https://playwright.dev/) library, it provides a browser-based automation environment to interact with complex websites. Below is an example of how to use [`PlaywrightCrawler`](https://crawlee.dev/python/api/class/PlaywrightCrawler) in an Apify Actor.
+
+
+ {CrawleePlaywrightExample}
+
+
+## Conclusion
+
+In this guide, you learned how to use the [Crawlee](https://crawlee.dev/python) library in your Apify Actors. By using the [`BeautifulSoupCrawler`](https://crawlee.dev/python/api/class/BeautifulSoupCrawler), [`ParselCrawler`](https://crawlee.dev/python/api/class/ParselCrawler), and [`PlaywrightCrawler`](https://crawlee.dev/python/api/class/PlaywrightCrawler) crawlers, you can efficiently scrape static or dynamic web pages, making it easy to build web scraping tasks in Python. See the [Actor templates](https://apify.com/templates/categories/python) to get started with your own scraping tasks. If you have questions or need assistance, feel free to reach out on our [GitHub](https://github.com/apify/apify-sdk-python) or join our [Discord community](https://discord.com/invite/jyEM2PRvMU). Happy scraping!
diff --git a/docs/02_guides/05_scrapy.mdx b/docs/03_guides/06_scrapy.mdx
similarity index 100%
rename from docs/02_guides/05_scrapy.mdx
rename to docs/03_guides/06_scrapy.mdx
diff --git a/docs/02_guides/code/01_beautifulsoup_httpx.py b/docs/03_guides/code/01_beautifulsoup_httpx.py
similarity index 96%
rename from docs/02_guides/code/01_beautifulsoup_httpx.py
rename to docs/03_guides/code/01_beautifulsoup_httpx.py
index 36d3bca7..157948d0 100644
--- a/docs/02_guides/code/01_beautifulsoup_httpx.py
+++ b/docs/03_guides/code/01_beautifulsoup_httpx.py
@@ -1,9 +1,8 @@
-from __future__ import annotations
-
+import asyncio
from urllib.parse import urljoin
+import httpx
from bs4 import BeautifulSoup
-from httpx import AsyncClient
from apify import Actor, Request
@@ -32,7 +31,7 @@ async def main() -> None:
await request_queue.add_request(new_request)
# Create an HTTPX client to fetch the HTML content of the URLs.
- async with AsyncClient() as client:
+ async with httpx.AsyncClient() as client:
# Process the URLs from the request queue.
while request := await request_queue.fetch_next_request():
url = request.url
@@ -83,3 +82,7 @@ async def main() -> None:
finally:
# Mark the request as handled to ensure it is not processed again.
await request_queue.mark_request_as_handled(new_request)
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/03_guides/code/02_parsel_impit.py b/docs/03_guides/code/02_parsel_impit.py
new file mode 100644
index 00000000..21b5e74f
--- /dev/null
+++ b/docs/03_guides/code/02_parsel_impit.py
@@ -0,0 +1,94 @@
+import asyncio
+from urllib.parse import urljoin
+
+import impit
+import parsel
+
+from apify import Actor, Request
+
+
+async def main() -> None:
+ # Enter the context of the Actor.
+ async with Actor:
+ # Retrieve the Actor input, and use default values if not provided.
+ actor_input = await Actor.get_input() or {}
+ start_urls = actor_input.get('start_urls', [{'url': 'https://apify.com'}])
+ max_depth = actor_input.get('max_depth', 1)
+
+ # Exit if no start URLs are provided.
+ if not start_urls:
+ Actor.log.info('No start URLs specified in Actor input, exiting...')
+ await Actor.exit()
+
+ # Open the default request queue for handling URLs to be processed.
+ request_queue = await Actor.open_request_queue()
+
+ # Enqueue the start URLs with an initial crawl depth of 0.
+ for start_url in start_urls:
+ url = start_url.get('url')
+ Actor.log.info(f'Enqueuing {url} ...')
+ new_request = Request.from_url(url, user_data={'depth': 0})
+ await request_queue.add_request(new_request)
+
+ # Create an Impit client to fetch the HTML content of the URLs.
+ async with impit.AsyncClient() as client:
+ # Process the URLs from the request queue.
+ while request := await request_queue.fetch_next_request():
+ url = request.url
+
+ if not isinstance(request.user_data['depth'], (str, int)):
+ raise TypeError('Request.depth is an unexpected type.')
+
+ depth = int(request.user_data['depth'])
+ Actor.log.info(f'Scraping {url} (depth={depth}) ...')
+
+ try:
+ # Fetch the HTTP response from the specified URL using Impit.
+ response = await client.get(url)
+
+ # Parse the HTML content using Parsel Selector.
+ selector = parsel.Selector(text=response.text)
+
+ # If the current depth is less than max_depth, find nested links
+ # and enqueue them.
+ if depth < max_depth:
+ # Extract all links using CSS selector
+ links = selector.css('a::attr(href)').getall()
+ for link_href in links:
+ link_url = urljoin(url, link_href)
+
+ if link_url.startswith(('http://', 'https://')):
+ Actor.log.info(f'Enqueuing {link_url} ...')
+ new_request = Request.from_url(
+ link_url,
+ user_data={'depth': depth + 1},
+ )
+ await request_queue.add_request(new_request)
+
+ # Extract the desired data using Parsel selectors.
+ title = selector.css('title::text').get()
+ h1s = selector.css('h1::text').getall()
+ h2s = selector.css('h2::text').getall()
+ h3s = selector.css('h3::text').getall()
+
+ data = {
+ 'url': url,
+ 'title': title,
+ 'h1s': h1s,
+ 'h2s': h2s,
+ 'h3s': h3s,
+ }
+
+ # Store the extracted data to the default dataset.
+ await Actor.push_data(data)
+
+ except Exception:
+ Actor.log.exception(f'Cannot extract data from {url}.')
+
+ finally:
+ # Mark the request as handled to ensure it is not processed again.
+ await request_queue.mark_request_as_handled(request)
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_guides/code/03_playwright.py b/docs/03_guides/code/03_playwright.py
similarity index 98%
rename from docs/02_guides/code/03_playwright.py
rename to docs/03_guides/code/03_playwright.py
index 78ebdda3..14868ad8 100644
--- a/docs/02_guides/code/03_playwright.py
+++ b/docs/03_guides/code/03_playwright.py
@@ -1,5 +1,4 @@
-from __future__ import annotations
-
+import asyncio
from urllib.parse import urljoin
from playwright.async_api import async_playwright
@@ -92,3 +91,7 @@ async def main() -> None:
await page.close()
# Mark the request as handled to ensure it is not processed again.
await request_queue.mark_request_as_handled(request)
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_guides/code/04_selenium.py b/docs/03_guides/code/04_selenium.py
similarity index 98%
rename from docs/02_guides/code/04_selenium.py
rename to docs/03_guides/code/04_selenium.py
index 75c55b2f..8cffe606 100644
--- a/docs/02_guides/code/04_selenium.py
+++ b/docs/03_guides/code/04_selenium.py
@@ -1,5 +1,3 @@
-from __future__ import annotations
-
import asyncio
from urllib.parse import urljoin
@@ -102,3 +100,7 @@ async def main() -> None:
await request_queue.mark_request_as_handled(request)
driver.quit()
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/03_guides/code/05_crawlee_beautifulsoup.py b/docs/03_guides/code/05_crawlee_beautifulsoup.py
new file mode 100644
index 00000000..4d3a81d7
--- /dev/null
+++ b/docs/03_guides/code/05_crawlee_beautifulsoup.py
@@ -0,0 +1,55 @@
+import asyncio
+
+from crawlee.crawlers import BeautifulSoupCrawler, BeautifulSoupCrawlingContext
+
+from apify import Actor
+
+# Create a crawler.
+crawler = BeautifulSoupCrawler(
+ # Limit the crawl to max requests. Remove or increase it for crawling all links.
+ max_requests_per_crawl=50,
+)
+
+
+# Define a request handler, which will be called for every request.
+@crawler.router.default_handler
+async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
+ Actor.log.info(f'Scraping {context.request.url}...')
+
+ # Extract the desired data.
+ data = {
+ 'url': context.request.url,
+ 'title': context.soup.title.string if context.soup.title else None,
+ 'h1s': [h1.text for h1 in context.soup.find_all('h1')],
+ 'h2s': [h2.text for h2 in context.soup.find_all('h2')],
+ 'h3s': [h3.text for h3 in context.soup.find_all('h3')],
+ }
+
+ # Store the extracted data to the default dataset.
+ await context.push_data(data)
+
+ # Enqueue additional links found on the current page.
+ await context.enqueue_links(strategy='same-domain')
+
+
+async def main() -> None:
+ # Enter the context of the Actor.
+ async with Actor:
+ # Retrieve the Actor input, and use default values if not provided.
+ actor_input = await Actor.get_input() or {}
+ start_urls = [
+ url.get('url')
+ for url in actor_input.get('start_urls', [{'url': 'https://apify.com'}])
+ ]
+
+ # Exit if no start URLs are provided.
+ if not start_urls:
+ Actor.log.info('No start URLs specified in Actor input, exiting...')
+ await Actor.exit()
+
+ # Run the crawler with the starting requests.
+ await crawler.run(start_urls)
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/03_guides/code/05_crawlee_parsel.py b/docs/03_guides/code/05_crawlee_parsel.py
new file mode 100644
index 00000000..31f39d8b
--- /dev/null
+++ b/docs/03_guides/code/05_crawlee_parsel.py
@@ -0,0 +1,55 @@
+import asyncio
+
+from crawlee.crawlers import ParselCrawler, ParselCrawlingContext
+
+from apify import Actor
+
+# Create a crawler.
+crawler = ParselCrawler(
+ # Limit the crawl to max requests. Remove or increase it for crawling all links.
+ max_requests_per_crawl=50,
+)
+
+
+# Define a request handler, which will be called for every request.
+@crawler.router.default_handler
+async def request_handler(context: ParselCrawlingContext) -> None:
+ Actor.log.info(f'Scraping {context.request.url}...')
+
+ # Extract the desired data.
+ data = {
+ 'url': context.request.url,
+ 'title': context.selector.xpath('//title/text()').get(),
+ 'h1s': context.selector.xpath('//h1/text()').getall(),
+ 'h2s': context.selector.xpath('//h2/text()').getall(),
+ 'h3s': context.selector.xpath('//h3/text()').getall(),
+ }
+
+ # Store the extracted data to the default dataset.
+ await context.push_data(data)
+
+ # Enqueue additional links found on the current page.
+ await context.enqueue_links(strategy='same-domain')
+
+
+async def main() -> None:
+ # Enter the context of the Actor.
+ async with Actor:
+ # Retrieve the Actor input, and use default values if not provided.
+ actor_input = await Actor.get_input() or {}
+ start_urls = [
+ url.get('url')
+ for url in actor_input.get('start_urls', [{'url': 'https://apify.com'}])
+ ]
+
+ # Exit if no start URLs are provided.
+ if not start_urls:
+ Actor.log.info('No start URLs specified in Actor input, exiting...')
+ await Actor.exit()
+
+ # Run the crawler with the starting requests.
+ await crawler.run(start_urls)
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/03_guides/code/05_crawlee_playwright.py b/docs/03_guides/code/05_crawlee_playwright.py
new file mode 100644
index 00000000..be4ea29e
--- /dev/null
+++ b/docs/03_guides/code/05_crawlee_playwright.py
@@ -0,0 +1,58 @@
+import asyncio
+
+from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
+
+from apify import Actor
+
+# Create a crawler.
+crawler = PlaywrightCrawler(
+ # Limit the crawl to max requests. Remove or increase it for crawling all links.
+ max_requests_per_crawl=50,
+ # Run the browser in a headless mode.
+ headless=True,
+ browser_launch_options={'args': ['--disable-gpu']},
+)
+
+
+# Define a request handler, which will be called for every request.
+@crawler.router.default_handler
+async def request_handler(context: PlaywrightCrawlingContext) -> None:
+ Actor.log.info(f'Scraping {context.request.url}...')
+
+ # Extract the desired data.
+ data = {
+ 'url': context.request.url,
+ 'title': await context.page.title(),
+ 'h1s': [await h1.text_content() for h1 in await context.page.locator('h1').all()],
+ 'h2s': [await h2.text_content() for h2 in await context.page.locator('h2').all()],
+ 'h3s': [await h3.text_content() for h3 in await context.page.locator('h3').all()],
+ }
+
+ # Store the extracted data to the default dataset.
+ await context.push_data(data)
+
+ # Enqueue additional links found on the current page.
+ await context.enqueue_links(strategy='same-domain')
+
+
+async def main() -> None:
+ # Enter the context of the Actor.
+ async with Actor:
+ # Retrieve the Actor input, and use default values if not provided.
+ actor_input = await Actor.get_input() or {}
+ start_urls = [
+ url.get('url')
+ for url in actor_input.get('start_urls', [{'url': 'https://apify.com'}])
+ ]
+
+ # Exit if no start URLs are provided.
+ if not start_urls:
+ Actor.log.info('No start URLs specified in Actor input, exiting...')
+ await Actor.exit()
+
+ # Run the crawler with the starting requests.
+ await crawler.run(start_urls)
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/docs/02_guides/code/scrapy_project/src/__init__.py b/docs/03_guides/code/scrapy_project/src/__init__.py
similarity index 100%
rename from docs/02_guides/code/scrapy_project/src/__init__.py
rename to docs/03_guides/code/scrapy_project/src/__init__.py
diff --git a/docs/02_guides/code/scrapy_project/src/__main__.py b/docs/03_guides/code/scrapy_project/src/__main__.py
similarity index 100%
rename from docs/02_guides/code/scrapy_project/src/__main__.py
rename to docs/03_guides/code/scrapy_project/src/__main__.py
diff --git a/docs/02_guides/code/scrapy_project/src/items.py b/docs/03_guides/code/scrapy_project/src/items.py
similarity index 100%
rename from docs/02_guides/code/scrapy_project/src/items.py
rename to docs/03_guides/code/scrapy_project/src/items.py
diff --git a/docs/02_guides/code/scrapy_project/src/main.py b/docs/03_guides/code/scrapy_project/src/main.py
similarity index 100%
rename from docs/02_guides/code/scrapy_project/src/main.py
rename to docs/03_guides/code/scrapy_project/src/main.py
diff --git a/docs/02_guides/code/scrapy_project/src/py.typed b/docs/03_guides/code/scrapy_project/src/py.typed
similarity index 100%
rename from docs/02_guides/code/scrapy_project/src/py.typed
rename to docs/03_guides/code/scrapy_project/src/py.typed
diff --git a/docs/02_guides/code/scrapy_project/src/settings.py b/docs/03_guides/code/scrapy_project/src/settings.py
similarity index 100%
rename from docs/02_guides/code/scrapy_project/src/settings.py
rename to docs/03_guides/code/scrapy_project/src/settings.py
diff --git a/docs/02_guides/code/scrapy_project/src/spiders/__init__.py b/docs/03_guides/code/scrapy_project/src/spiders/__init__.py
similarity index 100%
rename from docs/02_guides/code/scrapy_project/src/spiders/__init__.py
rename to docs/03_guides/code/scrapy_project/src/spiders/__init__.py
diff --git a/docs/02_guides/code/scrapy_project/src/spiders/py.typed b/docs/03_guides/code/scrapy_project/src/spiders/py.typed
similarity index 100%
rename from docs/02_guides/code/scrapy_project/src/spiders/py.typed
rename to docs/03_guides/code/scrapy_project/src/spiders/py.typed
diff --git a/docs/02_guides/code/scrapy_project/src/spiders/title.py b/docs/03_guides/code/scrapy_project/src/spiders/title.py
similarity index 100%
rename from docs/02_guides/code/scrapy_project/src/spiders/title.py
rename to docs/03_guides/code/scrapy_project/src/spiders/title.py
diff --git a/tests/integration/test_actor_scrapy.py b/tests/integration/test_actor_scrapy.py
index 9365521e..410ea904 100644
--- a/tests/integration/test_actor_scrapy.py
+++ b/tests/integration/test_actor_scrapy.py
@@ -11,7 +11,7 @@ async def test_actor_scrapy_title_spider(
make_actor: MakeActorFunction,
run_actor: RunActorFunction,
) -> None:
- base_path = Path('docs/02_guides/code/scrapy_project')
+ base_path = Path('docs/03_guides/code/scrapy_project')
actor_source_files = {
'src/__init__.py': (base_path / 'src/__init__.py').read_text(),
diff --git a/website/sidebars.js b/website/sidebars.js
index f6b2040e..c4a31842 100644
--- a/website/sidebars.js
+++ b/website/sidebars.js
@@ -13,23 +13,23 @@ module.exports = {
},
{
type: 'category',
- label: 'Guides',
+ label: 'Concepts',
collapsed: true,
items: [
{
type: 'autogenerated',
- dirName: '02_guides',
+ dirName: '02_concepts',
},
],
},
{
type: 'category',
- label: 'Concepts',
+ label: 'Guides',
collapsed: true,
items: [
{
type: 'autogenerated',
- dirName: '03_concepts',
+ dirName: '03_guides',
},
],
},