docs: Update Playwright home page code example (#1548)

vdusek · web-flow · commit b8afba7744f0 · 2025-11-11T09:41:54.000+01:00
- Set headless mode to True by default to improve performance when running via "Run on Apify", preventing slow execution and potential timeout errors. - Align the Python example more closely with its JavaScript counterpart to keep the documentation consistent across languages. - Add `asyncio.run(main())` so the snippet is fully copy-and-play ready. - Also, a minor update regarding the docs code examples (same as in apify/apify-sdk-python#673).
diff --git a/.github/workflows/run_code_checks.yaml b/.github/workflows/run_code_checks.yaml
@@ -40,3 +40,4 @@ jobs:
   docs_check:
     name: Docs check
     uses: apify/workflows/.github/workflows/python_docs_check.yaml@main
+    secrets: inherit
diff --git a/Makefile b/Makefile
@@ -4,9 +4,6 @@
 # This is default for local testing, but GitHub workflows override it to a higher value in CI
 E2E_TESTS_CONCURRENCY = 1
 
-# Placeholder token; replace with a real one for local docs testing if needed
-APIFY_TOKEN = apify_api_token_placeholder
-
 clean:
 	rm -rf .mypy_cache .pytest_cache .ruff_cache build dist htmlcov .coverage
 
@@ -58,4 +55,4 @@ build-docs:
 	cd website && corepack enable && yarn && uv run yarn build
 
 run-docs: build-api-reference
-	export APIFY_SIGNING_TOKEN=$(APIFY_TOKEN) && cd website && corepack enable && yarn && uv run yarn start
+	cd website && corepack enable && yarn && uv run yarn start
diff --git a/website/src/pages/home_page_example.py b/website/src/pages/home_page_example.py
@@ -1,37 +1,42 @@
+import asyncio
+
 from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
 
 
 async def main() -> None:
     crawler = PlaywrightCrawler(
         max_requests_per_crawl=10,  # Limit the max requests per crawl.
-        headless=False,  # Show the browser window.
-        browser_type='firefox',  # Use the Firefox browser.
+        headless=True,  # Run in headless mode (set to False to see the browser).
+        browser_type='firefox',  # Use Firefox browser.
     )
 
     # Define the default request handler, which will be called for every request.
     @crawler.router.default_handler
     async def request_handler(context: PlaywrightCrawlingContext) -> None:
         context.log.info(f'Processing {context.request.url} ...')
 
-        # Extract and enqueue all links found on the page.
-        await context.enqueue_links()
-
         # Extract data from the page using Playwright API.
         data = {
             'url': context.request.url,
             'title': await context.page.title(),
-            'content': (await context.page.content())[:100],
         }
 
         # Push the extracted data to the default dataset.
         await context.push_data(data)
 
+        # Extract all links on the page and enqueue them.
+        await context.enqueue_links()
+
     # Run the crawler with the initial list of URLs.
     await crawler.run(['https://crawlee.dev'])
 
-    # Export the entire dataset to a JSON file.
-    await crawler.export_data('results.json')
+    # Export the entire dataset to a CSV file.
+    await crawler.export_data('results.csv')
 
-    # Or work with the data directly.
+    # Or access the data directly.
     data = await crawler.get_data()
     crawler.log.info(f'Extracted data: {data.items}')
+
+
+if __name__ == '__main__':
+    asyncio.run(main())