apify
diff --git a/‎docs/guides/code_examples/crawler_custom_parser/__init__.py‎ b/‎docs/guides/code_examples/crawler_custom_parser/__init__.py‎
diff --git a/‎…awler_custom_parser/selectolax_parser.py‎ ‎…s/crawler_custom_parser/lexbor_parser.py‎docs/guides/code_examples/httpcrawler_custom_parser/selectolax_parser.py renamed to docs/guides/code_examples/crawler_custom_parser/lexbor_parser.py b/‎…awler_custom_parser/selectolax_parser.py‎ ‎…s/crawler_custom_parser/lexbor_parser.py‎docs/guides/code_examples/httpcrawler_custom_parser/selectolax_parser.py renamed to docs/guides/code_examples/crawler_custom_parser/lexbor_parser.py
diff --git a/‎…httpcrawler_custom_parser/lxml_parser.py‎ ‎…les/crawler_custom_parser/lxml_parser.py‎docs/guides/code_examples/httpcrawler_custom_parser/lxml_parser.py renamed to docs/guides/code_examples/crawler_custom_parser/lxml_parser.py b/‎…httpcrawler_custom_parser/lxml_parser.py‎ ‎…les/crawler_custom_parser/lxml_parser.py‎docs/guides/code_examples/httpcrawler_custom_parser/lxml_parser.py renamed to docs/guides/code_examples/crawler_custom_parser/lxml_parser.py
diff --git a/‎…er_custom_parser/lxml_saxonche_parser.py‎ ‎…er_custom_parser/lxml_saxonche_parser.py‎docs/guides/code_examples/httpcrawler_custom_parser/lxml_saxonche_parser.py renamed to docs/guides/code_examples/crawler_custom_parser/lxml_saxonche_parser.py b/‎…er_custom_parser/lxml_saxonche_parser.py‎ ‎…er_custom_parser/lxml_saxonche_parser.py‎docs/guides/code_examples/httpcrawler_custom_parser/lxml_saxonche_parser.py renamed to docs/guides/code_examples/crawler_custom_parser/lxml_saxonche_parser.py
diff --git a/‎…pcrawler_custom_parser/pyquery_parser.py‎ ‎…/crawler_custom_parser/pyquery_parser.py‎docs/guides/code_examples/httpcrawler_custom_parser/pyquery_parser.py renamed to docs/guides/code_examples/crawler_custom_parser/pyquery_parser.py b/‎…pcrawler_custom_parser/pyquery_parser.py‎ ‎…/crawler_custom_parser/pyquery_parser.py‎docs/guides/code_examples/httpcrawler_custom_parser/pyquery_parser.py renamed to docs/guides/code_examples/crawler_custom_parser/pyquery_parser.py
diff --git a/‎…rawler_custom_parser/scrapling_parser.py‎ ‎…rawler_custom_parser/scrapling_parser.py‎docs/guides/code_examples/httpcrawler_custom_parser/scrapling_parser.py renamed to docs/guides/code_examples/crawler_custom_parser/scrapling_parser.py b/‎…rawler_custom_parser/scrapling_parser.py‎ ‎…rawler_custom_parser/scrapling_parser.py‎docs/guides/code_examples/httpcrawler_custom_parser/scrapling_parser.py renamed to docs/guides/code_examples/crawler_custom_parser/scrapling_parser.py
diff --git a/‎docs/guides/code_examples/crawler_custom_parser/selectolax_context.py‎
Lines changed: 33 additions & 0 deletions b/‎docs/guides/code_examples/crawler_custom_parser/selectolax_context.py‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎docs/guides/code_examples/crawler_custom_parser/selectolax_crawler.py‎
Lines changed: 42 additions & 0 deletions b/‎docs/guides/code_examples/crawler_custom_parser/selectolax_crawler.py‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎docs/guides/code_examples/crawler_custom_parser/selectolax_crawler_run.py‎
Lines changed: 27 additions & 0 deletions b/‎docs/guides/code_examples/crawler_custom_parser/selectolax_crawler_run.py‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎docs/guides/code_examples/crawler_custom_parser/selectolax_parser.py‎
Lines changed: 60 additions & 0 deletions b/‎docs/guides/code_examples/crawler_custom_parser/selectolax_parser.py‎
Lines changed: 60 additions & 0 deletions
@@ -0,0 +1,33 @@
+from dataclasses import dataclass, fields
+
+from selectolax.lexbor import LexborHTMLParser
+from typing_extensions import Self
+
+from crawlee.crawlers._abstract_http import ParsedHttpCrawlingContext
+
+
+@dataclass(frozen=True)
+class SelectolaxLexborContext(ParsedHttpCrawlingContext[LexborHTMLParser]):
+    """Crawling context providing access to the parsed page.
+
+    This context is passed to request handlers and includes all standard
+    context methods (push_data, enqueue_links, etc.) plus custom helpers.
+    """
+
+    @property
+    def parser(self) -> LexborHTMLParser:
+        """Convenient alias for accessing the parsed document."""
+        return self.parsed_content
+
+    @classmethod
+    def from_parsed_http_crawling_context(
+        cls, context: ParsedHttpCrawlingContext[LexborHTMLParser]
+    ) -> Self:
+        """Create custom context from the base context.
+
+        Copies all fields from the base context to preserve framework
+        functionality while adding custom interface.
+        """
+        return cls(
+            **{field.name: getattr(context, field.name) for field in fields(context)}
+        )
@@ -0,0 +1,42 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from selectolax.lexbor import LexborHTMLParser, LexborNode
+
+from crawlee.crawlers import AbstractHttpCrawler, HttpCrawlerOptions
+
+from .selectolax_context import SelectolaxLexborContext
+from .selectolax_parser import SelectolaxLexborParser
+
+if TYPE_CHECKING:
+    from collections.abc import AsyncGenerator
+
+    from typing_extensions import Unpack
+
+    from crawlee.crawlers._abstract_http import ParsedHttpCrawlingContext
+
+
+class SelectolaxLexborCrawler(
+    AbstractHttpCrawler[SelectolaxLexborContext, LexborHTMLParser, LexborNode]
+):
+    """Custom crawler using Selectolax Lexbor for HTML parsing."""
+
+    def __init__(
+        self,
+        **kwargs: Unpack[HttpCrawlerOptions[SelectolaxLexborContext]],
+    ) -> None:
+        # Final step converts the base context to custom context type.
+        async def final_step(
+            context: ParsedHttpCrawlingContext[LexborHTMLParser],
+        ) -> AsyncGenerator[SelectolaxLexborContext, None]:
+            yield SelectolaxLexborContext.from_parsed_http_crawling_context(context)
+
+        # Build context pipeline: HTTP request -> parsing -> custom context.
+        kwargs['_context_pipeline'] = (
+            self._create_static_content_crawler_pipeline().compose(final_step)
+        )
+        super().__init__(
+            parser=SelectolaxLexborParser(),
+            **kwargs,
+        )
@@ -0,0 +1,27 @@
+import asyncio
+
+from .selectolax_crawler import SelectolaxLexborContext, SelectolaxLexborCrawler
+
+
+async def main() -> None:
+    crawler = SelectolaxLexborCrawler(
+        max_requests_per_crawl=10,
+    )
+
+    @crawler.router.default_handler
+    async def handle_request(context: SelectolaxLexborContext) -> None:
+        context.log.info(f'Processing {context.request.url} ...')
+
+        data = {
+            'url': context.request.url,
+            'title': context.parser.css_first('title').text(),
+        }
+
+        await context.push_data(data)
+        await context.enqueue_links()
+
+    await crawler.run(['https://crawlee.dev/'])
+
+
+if __name__ == '__main__':
+    asyncio.run(main())
@@ -0,0 +1,60 @@
+from __future__ import annotations
+
+import asyncio
+from typing import TYPE_CHECKING
+
+from selectolax.lexbor import LexborHTMLParser, LexborNode
+from typing_extensions import override
+
+from crawlee.crawlers._abstract_http import AbstractHttpParser
+
+if TYPE_CHECKING:
+    from collections.abc import Iterable, Sequence
+
+    from crawlee.http_clients import HttpResponse
+
+
+class SelectolaxLexborParser(AbstractHttpParser[LexborHTMLParser, LexborNode]):
+    """Parser for parsing HTTP response using Selectolax Lexbor."""
+
+    @override
+    async def parse(self, response: HttpResponse) -> LexborHTMLParser:
+        """Parse HTTP response body into a document object."""
+        response_body = await response.read()
+        # Run parsing in a thread to avoid blocking the event loop.
+        return await asyncio.to_thread(lambda: LexborHTMLParser(response_body))
+
+    @override
+    async def parse_text(self, text: str) -> LexborHTMLParser:
+        """Parse raw HTML string into a document object."""
+        return LexborHTMLParser(text)
+
+    @override
+    async def select(
+        self, parsed_content: LexborHTMLParser, selector: str
+    ) -> Sequence[LexborNode]:
+        """Select elements matching a CSS selector."""
+        return tuple(match for match in parsed_content.css(selector))
+
+    @override
+    def is_matching_selector(
+        self, parsed_content: LexborHTMLParser, selector: str
+    ) -> bool:
+        """Check if any element matches the selector."""
+        return parsed_content.css_first(selector) is not None
+
+    @override
+    def find_links(
+        self, parsed_content: LexborHTMLParser, selector: str
+    ) -> Iterable[str]:
+        """Extract href attributes from elements matching the selector.
+
+        Used by `enqueue_links` helper to discover URLs.
+        """
+        link: LexborNode
+        urls: list[str] = []
+        for link in parsed_content.css(selector):
+            url = link.attributes.get('href')
+            if url:
+                urls.append(url.strip())
+        return urls