apify · vdusek · Feb 18, 2025 · Feb 11, 2025 · Feb 18, 2025 · Feb 18, 2025
diff --git a/docs/01_overview/code/01_introduction.py b/docs/01_overview/code/01_introduction.py
@@ -10,5 +10,8 @@ async def main() -> None:
         async with httpx.AsyncClient() as client:
             response = await client.get(actor_input['url'])
         soup = BeautifulSoup(response.content, 'html.parser')
-        data = {'url': actor_input['url'], 'title': soup.title.string if soup.title else None}
+        data = {
+            'url': actor_input['url'],
+            'title': soup.title.string if soup.title else None,
+        }
         await Actor.push_data(data)
diff --git a/docs/02_guides/code/02_crawlee_beautifulsoup.py b/docs/02_guides/code/02_crawlee_beautifulsoup.py
@@ -25,7 +25,8 @@ async def main() -> None:
 
         # Create a crawler.
         crawler = BeautifulSoupCrawler(
-            # Limit the crawl to max requests. Remove or increase it for crawling all links.
+            # Limit the crawl to max requests.
+            # Remove or increase it for crawling all links.
             max_requests_per_crawl=50,
         )
 

diff --git a/docs/02_guides/code/02_crawlee_playwright.py b/docs/02_guides/code/02_crawlee_playwright.py
@@ -25,7 +25,8 @@ async def main() -> None:
 
         # Create a crawler.
         crawler = PlaywrightCrawler(
-            # Limit the crawl to max requests. Remove or increase it for crawling all links.
+            # Limit the crawl to max requests.
+            # Remove or increase it for crawling all links.
             max_requests_per_crawl=50,
             headless=True,
             browser_launch_options={
@@ -43,9 +44,18 @@ async def request_handler(context: PlaywrightCrawlingContext) -> None:
             data = {
                 'url': context.request.url,
                 'title': await context.page.title(),
-                'h1s': [await h1.text_content() for h1 in await context.page.locator('h1').all()],
-                'h2s': [await h2.text_content() for h2 in await context.page.locator('h2').all()],
-                'h3s': [await h3.text_content() for h3 in await context.page.locator('h3').all()],
+                'h1s': [
+                    await h1.text_content()
+                    for h1 in await context.page.locator('h1').all()
+                ],
+                'h2s': [
+                    await h2.text_content()
+                    for h2 in await context.page.locator('h2').all()
+                ],
+                'h3s': [
+                    await h3.text_content()
+                    for h3 in await context.page.locator('h3').all()
+                ],
             }
 
             # Store the extracted data to the default dataset.

diff --git a/docs/02_guides/code/scrapy_project/src/__main__.py b/docs/02_guides/code/scrapy_project/src/__main__.py
@@ -2,7 +2,8 @@
 
 from twisted.internet import asyncioreactor
 
-# Install Twisted's asyncio reactor before importing any other Twisted or Scrapy components.
+# Install Twisted's asyncio reactor before importing any other Twisted or
+# Scrapy components.
 asyncioreactor.install()  # type: ignore[no-untyped-call]
 
 import os

diff --git a/docs/02_guides/code/scrapy_project/src/spiders/title.py b/docs/02_guides/code/scrapy_project/src/spiders/title.py
@@ -60,7 +60,8 @@ def parse(self, response: Response) -> Generator[TitleItem | Request, None, None
         title = response.css('title::text').extract_first()
         yield TitleItem(url=url, title=title)
 
-        # Extract all links from the page, create `Request` objects out of them, and yield them.
+        # Extract all links from the page, create `Request` objects out of them,
+        # and yield them.
         for link_href in response.css('a::attr("href")'):
             link_url = urljoin(response.url, link_href.get())
             if link_url.startswith(('http://', 'https://')):

diff --git a/docs/03_concepts/code/03_rq.py b/docs/03_concepts/code/03_rq.py
@@ -19,7 +19,9 @@ async def main() -> None:
         await queue.add_request(Request.from_url('http://example.com/0'), forefront=True)
 
         # If you try to add an existing request again, it will not do anything
-        add_request_info = await queue.add_request(Request.from_url('http://different-example.com/5'))
+        add_request_info = await queue.add_request(
+            Request.from_url('http://different-example.com/5')
+        )
         Actor.log.info(f'Add request info: {add_request_info}')
 
         processed_request = await queue.get_request(add_request_info.id)
@@ -29,8 +31,8 @@ async def main() -> None:
         while not await queue.is_finished():
             # Fetch the next unhandled request in the queue
             request = await queue.fetch_next_request()
-            # This can happen due to the eventual consistency of the underlying request queue storage,
-            # best solution is just to sleep a bit
+            # This can happen due to the eventual consistency of the underlying request
+            # queue storage, best solution is just to sleep a bit.
             if request is None:
                 await asyncio.sleep(1)
                 continue
@@ -45,6 +47,7 @@ async def main() -> None:
                 Actor.log.info('Request successful.')
                 await queue.mark_request_as_handled(request)
             else:
-                # If processing the request was unsuccessful, reclaim it so it can be processed again
+                # If processing the request was unsuccessful, reclaim it so it can be
+                # processed again.
                 Actor.log.warning('Request failed, will retry!')
                 await queue.reclaim_request(request)
diff --git a/docs/03_concepts/code/05_proxy_actor_input.py b/docs/03_concepts/code/05_proxy_actor_input.py
@@ -5,7 +5,9 @@ async def main() -> None:
     async with Actor:
         actor_input = await Actor.get_input() or {}
         proxy_settings = actor_input.get('proxySettings')
-        proxy_configuration = await Actor.create_proxy_configuration(actor_proxy_input=proxy_settings)
+        proxy_configuration = await Actor.create_proxy_configuration(
+            actor_proxy_input=proxy_settings
+        )
 
         if not proxy_configuration:
             raise RuntimeError('No proxy configuration available.')

diff --git a/docs/03_concepts/code/05_proxy_rotation.py b/docs/03_concepts/code/05_proxy_rotation.py
@@ -17,7 +17,15 @@ async def main() -> None:
         proxy_url = await proxy_configuration.new_url()  # http://proxy-2.com
         proxy_url = await proxy_configuration.new_url()  # http://proxy-1.com
         proxy_url = await proxy_configuration.new_url()  # http://proxy-2.com
-        proxy_url = await proxy_configuration.new_url(session_id='a')  # http://proxy-1.com
-        proxy_url = await proxy_configuration.new_url(session_id='b')  # http://proxy-2.com
-        proxy_url = await proxy_configuration.new_url(session_id='b')  # http://proxy-2.com
-        proxy_url = await proxy_configuration.new_url(session_id='a')  # http://proxy-1.com
+        proxy_url = await proxy_configuration.new_url(
+            session_id='a'
+        )  # http://proxy-1.com
+        proxy_url = await proxy_configuration.new_url(
+            session_id='b'
+        )  # http://proxy-2.com
+        proxy_url = await proxy_configuration.new_url(
+            session_id='b'
+        )  # http://proxy-2.com
+        proxy_url = await proxy_configuration.new_url(
+            session_id='a'
+        )  # http://proxy-1.com
diff --git a/docs/03_concepts/code/09_webserver.py b/docs/03_concepts/code/09_webserver.py
@@ -21,7 +21,9 @@ def run_server() -> None:
     # Start the HTTP server on the provided port,
     # and save a reference to the server.
     global http_server
-    with ThreadingHTTPServer(('', Actor.config.web_server_port), RequestHandler) as server:
+    with ThreadingHTTPServer(
+        ('', Actor.config.web_server_port), RequestHandler
+    ) as server:
         Actor.log.info(f'Server running on {Actor.config.web_server_port}')
         http_server = server
         server.serve_forever()

diff --git a/docs/pyproject.toml b/docs/pyproject.toml
@@ -0,0 +1,9 @@
+# Line lenght different from the rest of the code to make sure that the example codes visualised on the generated
+# documentation webpages are shown without vertical slider to make them more readable.
+
+[tool.ruff]
+# Inherit all from project top configuration file.
+extend = "../pyproject.toml"
+
+# Override just line length
+line-length = 90 # Maximum possible fit to the doc webpage. Longer lines need slider.