From 0b8fca9ee484a546be06a1989a2db9c361f11b1b Mon Sep 17 00:00:00 2001
From: Thalison Fernandes <thalissfernandes99@gmail.com>
Date: Sun, 22 Mar 2026 17:05:39 -0300
Subject: [PATCH 01/21] feat: add pydantic as required dependency

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 9e85e867..b791b7a8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,6 +15,7 @@ websockets = "^14"
 aiohttp = "^3.9.5"
 aiofiles = "^25.1.0"
 typing_extensions = "^4.14.0"
+pydantic = "^2.0"
 
 
 [tool.poetry.group.dev.dependencies]

From a11380c8e1321694b58d1ce4b282bf6f9d2177d2 Mon Sep 17 00:00:00 2001
From: Thalison Fernandes <thalissfernandes99@gmail.com>
Date: Sun, 22 Mar 2026 17:05:43 -0300
Subject: [PATCH 02/21] feat(extractor): add extraction exception hierarchy

---
 pydoll/extractor/exceptions.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 pydoll/extractor/exceptions.py

diff --git a/pydoll/extractor/exceptions.py b/pydoll/extractor/exceptions.py
new file mode 100644
index 00000000..eaa66ddb
--- /dev/null
+++ b/pydoll/extractor/exceptions.py
@@ -0,0 +1,23 @@
+"""Exception classes for the extractor module."""
+
+from __future__ import annotations
+
+from pydoll.exceptions import PydollException
+
+
+class ExtractionException(PydollException):
+    """Base class for exceptions related to data extraction."""
+
+    message = 'An extraction error occurred'
+
+
+class FieldExtractionFailed(ExtractionException):
+    """Raised when a required field cannot be extracted and has no default."""
+
+    message = 'Failed to extract required field'
+
+
+class InvalidExtractionModel(ExtractionException):
+    """Raised when an ExtractionModel definition is invalid."""
+
+    message = 'Invalid extraction model definition'

From e84cc1a95a061b860356b86e3d651811e02f0faa Mon Sep 17 00:00:00 2001
From: Thalison Fernandes <thalissfernandes99@gmail.com>
Date: Sun, 22 Mar 2026 17:05:47 -0300
Subject: [PATCH 03/21] feat(extractor): add Field descriptor and
 ExtractionMetadata

---
 pydoll/extractor/field.py | 109 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)
 create mode 100644 pydoll/extractor/field.py

diff --git a/pydoll/extractor/field.py b/pydoll/extractor/field.py
new file mode 100644
index 00000000..9dfd28df
--- /dev/null
+++ b/pydoll/extractor/field.py
@@ -0,0 +1,109 @@
+"""Field descriptor and extraction metadata for ExtractionModel fields."""
+
+from __future__ import annotations
+
+import itertools
+from dataclasses import dataclass
+from typing import Callable, Optional, Union
+
+from pydantic import Field as PydanticField
+from pydantic.fields import FieldInfo
+from pydantic_core import PydanticUndefined
+
+from pydoll.extractor.exceptions import InvalidExtractionModel
+
+# Module-level registry: stores ExtractionMetadata keyed by a unique int.
+# Field() registers metadata and stores the key in pydantic's json_schema_extra.
+# ExtractionModel.__init_subclass__ reads the key to retrieve metadata.
+_FIELD_METADATA_REGISTRY: dict[int, ExtractionMetadata] = {}
+_field_id_counter = itertools.count(1)
+
+
+@dataclass(frozen=True)
+class ExtractionMetadata:
+    """Immutable extraction metadata attached to a pydantic field.
+
+    Stored in a module-level registry by Field() and retrieved by
+    ExtractionModel.__init_subclass__ via the registry key stored
+    in the field's json_schema_extra.
+    """
+
+    selector: Optional[str] = None
+    attribute: Optional[str] = None
+    transform: Optional[Callable[[str], Union[str, int, float, bool, object]]] = None
+
+    @property
+    def has_selector(self) -> bool:
+        """Whether this field has a CSS or XPath selector."""
+        return self.selector is not None
+
+
+def pop_field_metadata(key: int) -> Optional[ExtractionMetadata]:
+    """Retrieve and remove ExtractionMetadata from the registry by key.
+
+    Uses pop to prevent the registry from growing indefinitely.
+    Each key is consumed exactly once during model class creation.
+
+    Args:
+        key: Registry key stored in json_schema_extra['_extraction_key'].
+
+    Returns:
+        ExtractionMetadata if found, None otherwise.
+    """
+    return _FIELD_METADATA_REGISTRY.pop(key, None)
+
+
+def Field(
+    *,
+    selector: Optional[str] = None,
+    attribute: Optional[str] = None,
+    description: Optional[str] = None,
+    default: object = PydanticUndefined,
+    transform: Optional[Callable[[str], Union[str, int, float, bool, object]]] = None,
+) -> FieldInfo:
+    """Define extraction metadata for a model field.
+
+    Wraps pydantic.Field() and registers ExtractionMetadata for the engine.
+    Auto-detects CSS vs XPath from selector syntax (same logic as Tab.query()).
+
+    At least one of ``selector`` or ``description`` must be provided:
+    - selector only: extracted via CSS/XPath.
+    - description only: metadata for future LLM extraction.
+    - both: CSS extraction with LLM fallback in future auto strategy.
+
+    Args:
+        selector: CSS or XPath selector (auto-detected, like Tab.query()).
+        attribute: HTML attribute to extract (default: innerText).
+        description: Semantic description of the field.
+        default: Default value if extraction fails. PydanticUndefined means required.
+        transform: Post-processing callable applied to raw extracted string.
+
+    Returns:
+        Pydantic FieldInfo with extraction registry key in json_schema_extra.
+
+    Raises:
+        InvalidExtractionModel: If neither selector nor description is provided.
+    """
+    if selector is None and description is None:
+        raise InvalidExtractionModel('Field must have at least a selector or a description')
+
+    metadata = ExtractionMetadata(
+        selector=selector,
+        attribute=attribute,
+        transform=transform,
+    )
+
+    key = _register_metadata(metadata)
+
+    return PydanticField(
+        default=default,
+        description=description,
+        json_schema_extra={'_extraction_key': key},
+    )
+
+
+def _register_metadata(metadata: ExtractionMetadata) -> int:
+    """Register ExtractionMetadata and return its unique key."""
+    key = next(_field_id_counter)
+    _FIELD_METADATA_REGISTRY[key] = metadata
+    return key

From 0bb1bd35a56299c05cd4c5e2f7759f7d1bbe034c Mon Sep 17 00:00:00 2001
From: Thalison Fernandes <thalissfernandes99@gmail.com>
Date: Sun, 22 Mar 2026 17:05:52 -0300
Subject: [PATCH 04/21] feat(extractor): add ExtractionModel base class

---
 pydoll/extractor/model.py | 95 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)
 create mode 100644 pydoll/extractor/model.py

diff --git a/pydoll/extractor/model.py b/pydoll/extractor/model.py
new file mode 100644
index 00000000..9c7c1769
--- /dev/null
+++ b/pydoll/extractor/model.py
@@ -0,0 +1,95 @@
+"""ExtractionModel base class for declarative data extraction."""
+
+from __future__ import annotations
+
+from typing import ClassVar, Optional
+
+from pydantic import BaseModel, ConfigDict
+
+from pydoll.extractor.exceptions import InvalidExtractionModel
+from pydoll.extractor.field import ExtractionMetadata, pop_field_metadata
+
+
+class ExtractionModel(BaseModel):
+    """Base class for declarative extraction models.
+
+    Inherits from pydantic.BaseModel, gaining automatic validation,
+    type coercion, serialization (model_dump, model_dump_json), and
+    JSON Schema generation (model_json_schema).
+
+    Subclasses define fields using Field() descriptors with selectors
+    and/or semantic descriptions. The extraction engine uses this
+    metadata to extract structured data from web pages.
+
+    Example::
+
+        class Article(ExtractionModel):
+            title: str = Field(selector='h1', description='Article title')
+            author: str = Field(selector='.author', description='Author name')
+    """
+
+    _extraction_fields_cache: ClassVar[Optional[dict[str, ExtractionMetadata]]] = None
+
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+    @classmethod
+    def get_extraction_fields(cls) -> dict[str, ExtractionMetadata]:
+        """Get extraction metadata for all fields, collecting lazily on first access.
+
+        Each subclass gets its own cache, even if a parent class has already
+        been collected. This ensures inherited fields are included correctly.
+
+        Returns:
+            Dictionary mapping field name to ExtractionMetadata.
+
+        Raises:
+            InvalidExtractionModel: If a field has metadata but lacks
+                both selector and description.
+        """
+        # Check own __dict__ to avoid inheriting parent's cache via MRO
+        own_cache = cls.__dict__.get('_extraction_fields_cache')
+        if own_cache is None:
+            cls._extraction_fields_cache = _collect_extraction_metadata(cls)
+        return cls._extraction_fields_cache
+
+
+def _collect_extraction_metadata(
+    cls: type[ExtractionModel],
+) -> dict[str, ExtractionMetadata]:
+    """Read ExtractionMetadata from pydantic FieldInfo objects via registry.
+
+    For each field, checks if json_schema_extra contains an _extraction_key
+    that maps to a registered ExtractionMetadata. Validates that each
+    extraction field has at least a selector or a description.
+
+    Args:
+        cls: ExtractionModel subclass to inspect.
+
+    Returns:
+        Dictionary mapping field name to ExtractionMetadata.
+
+    Raises:
+        InvalidExtractionModel: If a field has metadata but lacks
+            both selector and description.
+    """
+    result: dict[str, ExtractionMetadata] = {}
+    for name, field_info in cls.model_fields.items():
+        extra = field_info.json_schema_extra
+        if not isinstance(extra, dict):
+            continue
+
+        key = extra.get('_extraction_key')
+        if not isinstance(key, int):
+            continue
+
+        metadata = pop_field_metadata(key)
+        if metadata is None:
+            continue
+
+        if not metadata.has_selector and not field_info.description:
+            raise InvalidExtractionModel(
+                f'Field "{name}" must have at least a selector or a description'
+            )
+
+        result[name] = metadata
+    return result

From c852fe4a2f41dd47314e2de2f49b818632e670b4 Mon Sep 17 00:00:00 2001
From: Thalison Fernandes <thalissfernandes99@gmail.com>
Date: Sun, 22 Mar 2026 17:06:00 -0300
Subject: [PATCH 05/21] feat(extractor): add extraction engine with CSS/XPath
 support

---
 pydoll/extractor/engine.py | 313 +++++++++++++++++++++++++++++++++++++
 1 file changed, 313 insertions(+)
 create mode 100644 pydoll/extractor/engine.py

diff --git a/pydoll/extractor/engine.py b/pydoll/extractor/engine.py
new file mode 100644
index 00000000..b1f7a6b0
--- /dev/null
+++ b/pydoll/extractor/engine.py
@@ -0,0 +1,313 @@
+"""Extraction engine that orchestrates DOM querying and model building."""
+
+from __future__ import annotations
+
+import logging
+import types
+from typing import TYPE_CHECKING, Optional, TypeVar, Union, get_args, get_origin
+
+from pydoll.extractor.exceptions import FieldExtractionFailed
+from pydoll.extractor.field import ExtractionMetadata
+from pydoll.extractor.model import ExtractionModel
+
+if TYPE_CHECKING:
+    from pydoll.browser.tab import Tab
+    from pydoll.elements.mixins.find_elements_mixin import FindElementsMixin
+    from pydoll.elements.web_element import WebElement
+
+logger = logging.getLogger(__name__)
+
+T = TypeVar('T', bound='ExtractionModel')
+
+
+class ExtractionEngine:
+    """Orchestrates extraction by querying the DOM and building model instances.
+
+    Internal engine used by Tab.extract() and Tab.extract_all().
+    Users do not interact with it directly.
+    """
+
+    def __init__(self, tab: Tab) -> None:
+        self._tab = tab
+
+    async def extract(
+        self,
+        model: type[T],
+        *,
+        scope: Optional[str] = None,
+        timeout: int = 0,
+    ) -> T:
+        """Extract a single model instance from the page.
+
+        Args:
+            model: ExtractionModel subclass to populate.
+            scope: Optional CSS/XPath selector to limit extraction region.
+            timeout: Seconds to wait for elements to appear (0 = no wait).
+
+        Returns:
+            Populated model instance.
+
+        Raises:
+            FieldExtractionFailed: If a required field cannot be extracted.
+        """
+        context: FindElementsMixin = self._tab
+        if scope is not None:
+            context = await self._tab.query(scope, timeout=timeout)
+
+        values = await self._extract_fields(model, context, timeout)
+        return _build_instance(model, values)
+
+    async def extract_all(
+        self,
+        model: type[T],
+        *,
+        scope: str,
+        timeout: int = 0,
+        limit: Optional[int] = None,
+    ) -> list[T]:
+        """Extract multiple model instances from repeated containers.
+
+        Each element matching scope generates one model instance.
+
+        Args:
+            model: ExtractionModel subclass to populate.
+            scope: CSS/XPath selector for the repeated container (required).
+            timeout: Seconds to wait for elements to appear (0 = no wait).
+            limit: Maximum number of items to extract (None = all).
+
+        Returns:
+            List of populated model instances.
+        """
+        containers = await self._tab.query(scope, find_all=True, timeout=timeout, raise_exc=False)
+        if not containers:
+            return []
+
+        if limit is not None:
+            containers = containers[:limit]
+
+        results: list[T] = []
+        for container in containers:
+            values = await self._extract_fields(model, container, timeout)
+            results.append(_build_instance(model, values))
+
+        return results
+
+    async def _extract_fields(
+        self,
+        model: type[T],
+        context: FindElementsMixin,
+        timeout: int,
+    ) -> dict[str, str | list[str] | object]:
+        """Extract all fields from the DOM within the given context.
+
+        Args:
+            model: ExtractionModel subclass with extraction fields.
+            context: Tab or WebElement to scope queries within.
+            timeout: Seconds to wait for each element to appear.
+
+        Returns:
+            Dictionary of field name -> extracted value.
+        """
+        values: dict[str, str | list[str] | object] = {}
+
+        for name, metadata in model.get_extraction_fields().items():
+            if not metadata.has_selector:
+                logger.debug(f'Skipping field "{name}" (no selector)')
+                continue
+
+            field_info = model.model_fields[name]
+            annotation = field_info.annotation
+
+            try:
+                value = await self._extract_field(metadata, annotation, context, timeout)
+                values[name] = value
+            except Exception as exc:
+                if not field_info.is_required():
+                    logger.debug(f'Optional field "{name}" extraction failed: {exc}')
+                    continue
+                raise FieldExtractionFailed(
+                    f'Required field "{name}" could not be extracted: {exc}'
+                ) from exc
+
+        return values
+
+    async def _extract_field(
+        self,
+        metadata: ExtractionMetadata,
+        annotation: type,
+        context: FindElementsMixin,
+        timeout: int,
+    ) -> str | list[str] | object:
+        """Extract a single field value from the DOM.
+
+        Handles scalar types, list types, nested ExtractionModel,
+        and list[ExtractionModel].
+
+        Args:
+            metadata: Extraction metadata with selector/attribute/transform.
+            annotation: The field's resolved type annotation.
+            context: Tab or WebElement to query within.
+            timeout: Seconds to wait for the element to appear.
+
+        Returns:
+            Extracted and optionally transformed value.
+        """
+        unwrapped = _unwrap_optional(annotation)
+
+        if _is_list_type(unwrapped):
+            return await self._extract_list_field(metadata, unwrapped, context, timeout)
+
+        if _is_extraction_model(unwrapped):
+            return await self._extract_nested_model(metadata, unwrapped, context, timeout)
+
+        return await _extract_scalar_field(metadata, context, timeout)
+
+    async def _extract_list_field(
+        self,
+        metadata: ExtractionMetadata,
+        annotation: type,
+        context: FindElementsMixin,
+        timeout: int,
+    ) -> list[Union[str, int, float, bool, object]]:
+        """Extract a list of values from multiple matching elements."""
+        elements = await context.query(
+            metadata.selector, find_all=True, timeout=timeout, raise_exc=False
+        )
+        if not elements:
+            return []
+
+        inner_type = _get_inner_type(annotation)
+
+        if _is_extraction_model(inner_type):
+            results: list[Union[str, int, float, bool, object]] = []
+            for element in elements:
+                field_values = await self._extract_fields(inner_type, element, timeout)
+                results.append(_build_instance(inner_type, field_values))
+            return results
+
+        scalar_values: list[Union[str, int, float, bool, object]] = []
+        for element in elements:
+            raw = await _extract_value(element, metadata)
+            scalar_values.append(_apply_transform(raw, metadata))
+        return scalar_values
+
+    async def _extract_nested_model(
+        self,
+        metadata: ExtractionMetadata,
+        model: type[T],
+        context: FindElementsMixin,
+        timeout: int,
+    ) -> T:
+        """Extract a nested ExtractionModel by scoping to the selector element."""
+        scope_element = await context.query(metadata.selector, timeout=timeout, raise_exc=True)
+        values = await self._extract_fields(model, scope_element, timeout)
+        return _build_instance(model, values)
+
+
+async def _extract_scalar_field(
+    metadata: ExtractionMetadata,
+    context: FindElementsMixin,
+    timeout: int,
+) -> Union[str, int, float, bool]:
+    """Extract a single scalar value from the DOM."""
+    element = await context.query(metadata.selector, timeout=timeout, raise_exc=True)
+    raw = await _extract_value(element, metadata)
+    return _apply_transform(raw, metadata)
+
+
+async def _extract_value(
+    element: WebElement,
+    metadata: ExtractionMetadata,
+) -> str:
+    """Read raw string value from a WebElement.
+
+    If metadata.attribute is set, reads that HTML attribute.
+    Otherwise reads element.text (innerText).
+
+    Args:
+        element: WebElement to read from.
+        metadata: Field metadata with optional attribute name.
+
+    Returns:
+        Raw string value before transform.
+    """
+    if metadata.attribute is not None:
+        return element.get_attribute(metadata.attribute) or ''
+    return await element.text
+
+
+def _apply_transform(
+    raw: str,
+    metadata: ExtractionMetadata,
+) -> Union[str, int, float, bool]:
+    """Apply metadata.transform to the raw extracted string.
+
+    Args:
+        raw: Raw string from the DOM.
+        metadata: Field metadata with optional transform callable.
+
+    Returns:
+        Transformed value, or raw string if no transform.
+    """
+    if metadata.transform is not None:
+        return metadata.transform(raw)
+    return raw
+
+
+def _build_instance(
+    model: type[T],
+    values: dict[str, str | list[str] | object],
+) -> T:
+    """Build model instance from extracted values.
+
+    Pydantic handles validation, type coercion, and defaults.
+
+    Args:
+        model: ExtractionModel subclass.
+        values: Field name -> value mapping.
+
+    Returns:
+        Populated model instance.
+
+    Raises:
+        FieldExtractionFailed: If pydantic validation fails.
+    """
+    try:
+        return model(**values)
+    except Exception as exc:
+        raise FieldExtractionFailed(f'Failed to build {model.__name__}: {exc}') from exc
+
+
+def _unwrap_optional(annotation: type) -> type:
+    """Unwrap Optional[X] or X | None to X. Returns annotation unchanged otherwise.
+
+    Handles both typing.Optional (Union) and PEP 604 syntax (types.UnionType).
+    """
+    origin = get_origin(annotation)
+    if origin is Union or isinstance(annotation, types.UnionType):
+        args = get_args(annotation)
+        non_none = [a for a in args if a is not type(None)]
+        if len(non_none) == 1:
+            return non_none[0]
+    return annotation
+
+
+def _is_list_type(annotation: type) -> bool:
+    """Check if annotation is list[X]."""
+    return get_origin(annotation) is list
+
+
+def _get_inner_type(annotation: type) -> type:
+    """Get X from list[X]."""
+    args = get_args(annotation)
+    if args:
+        return args[0]
+    return str
+
+
+def _is_extraction_model(annotation: type) -> bool:
+    """Check if annotation is an ExtractionModel subclass."""
+    try:
+        return isinstance(annotation, type) and issubclass(annotation, ExtractionModel)
+    except TypeError:
+        return False

From 9ac22216ba5e6063474d413a48e4a34f722f1edd Mon Sep 17 00:00:00 2001
From: Thalison Fernandes <thalissfernandes99@gmail.com>
Date: Sun, 22 Mar 2026 17:06:04 -0300
Subject: [PATCH 06/21] feat(extractor): add module public API exports

---
 pydoll/extractor/__init__.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 pydoll/extractor/__init__.py

diff --git a/pydoll/extractor/__init__.py b/pydoll/extractor/__init__.py
new file mode 100644
index 00000000..ccd8bebc
--- /dev/null
+++ b/pydoll/extractor/__init__.py
@@ -0,0 +1,16 @@
+from pydoll.extractor.exceptions import (
+    ExtractionException,
+    FieldExtractionFailed,
+    InvalidExtractionModel,
+)
+from pydoll.extractor.field import ExtractionMetadata, Field
+from pydoll.extractor.model import ExtractionModel
+
+__all__ = [
+    'ExtractionException',
+    'ExtractionMetadata',
+    'ExtractionModel',
+    'Field',
+    'FieldExtractionFailed',
+    'InvalidExtractionModel',
+]

From 5f367629acb8ac0d3c64af865268dba2e4092c2f Mon Sep 17 00:00:00 2001
From: Thalison Fernandes <thalissfernandes99@gmail.com>
Date: Sun, 22 Mar 2026 17:06:10 -0300
Subject: [PATCH 07/21] feat(extractor): integrate extract and extract_all into
 Tab

---
 pydoll/browser/tab.py | 64 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/pydoll/browser/tab.py b/pydoll/browser/tab.py
index b3ca5977..76e1bc97 100644
--- a/pydoll/browser/tab.py
+++ b/pydoll/browser/tab.py
@@ -20,6 +20,7 @@
     Callable,
     Optional,
     TypeAlias,
+    TypeVar,
     Union,
     cast,
     overload,
@@ -86,6 +87,8 @@
     rewrite_html_urls,
 )
 
+from pydoll.extractor.engine import ExtractionEngine
+
 if TYPE_CHECKING:
     from pydoll.browser.chromium.base import Browser
     from pydoll.protocol.base import EmptyResponse, Response
@@ -124,6 +127,8 @@
 
 IFrame: TypeAlias = 'Tab'
 
+T = TypeVar('T')
+
 _CLOUDFLARE_CHALLENGE_DOMAIN = 'challenges.cloudflare.com'
 _CLOUDFLARE_IFRAME_SELECTOR = f'iframe[src*="{_CLOUDFLARE_CHALLENGE_DOMAIN}"]'
 _CLOUDFLARE_CHECKBOX_SELECTOR = 'span.cb-i'
@@ -176,6 +181,7 @@ def __init__(
         self._scroll: Optional[ScrollAPI] = None
         self._keyboard: Optional[KeyboardAPI] = None
         self._mouse: MouseAPI = MouseAPI(self)
+        self._extraction_engine: Optional[ExtractionEngine] = None
         logger.debug(
             (
                 f'Tab initialized: target_id={self._target_id}, '
@@ -255,6 +261,64 @@ def mouse(self) -> MouseAPI:
         """
         return self._mouse
 
+    @property
+    def _extractor(self) -> ExtractionEngine:
+        """Lazy-initialized extraction engine."""
+        if self._extraction_engine is None:
+            self._extraction_engine = ExtractionEngine(self)
+        return self._extraction_engine
+
+    async def extract(
+        self,
+        model: type[T],
+        *,
+        scope: Optional[str] = None,
+        timeout: int = 0,
+    ) -> T:
+        """Extract structured data from the page into a typed model.
+
+        Args:
+            model: ExtractionModel subclass defining the extraction schema.
+            scope: Optional CSS/XPath selector to limit extraction region.
+            timeout: Seconds to wait for elements (0 = no wait).
+
+        Returns:
+            Populated model instance with extracted data.
+
+        Raises:
+            FieldExtractionFailed: If a required field cannot be extracted.
+            InvalidExtractionModel: If model definition is invalid.
+        """
+        return await self._extractor.extract(
+            model, scope=scope, timeout=timeout
+        )
+
+    async def extract_all(
+        self,
+        model: type[T],
+        *,
+        scope: str,
+        timeout: int = 0,
+        limit: Optional[int] = None,
+    ) -> list[T]:
+        """Extract multiple items from repeated containers on the page.
+
+        Each element matching the scope selector generates one model instance.
+        Fields are resolved relative to each scope container.
+
+        Args:
+            model: ExtractionModel subclass defining the extraction schema.
+            scope: CSS/XPath selector for the repeated container (required).
+            timeout: Seconds to wait for elements (0 = no wait).
+            limit: Maximum number of items to extract (None = all).
+
+        Returns:
+            List of populated model instances.
+        """
+        return await self._extractor.extract_all(
+            model, scope=scope, timeout=timeout, limit=limit
+        )
+
     @property
     def intercept_file_chooser_dialog_enabled(self) -> bool:
         """Whether file chooser dialog interception is active."""

From e21b1a31cb9be77e8cb4b2ba479a015441dfdf93 Mon Sep 17 00:00:00 2001
From: Thalison Fernandes <thalissfernandes99@gmail.com>
Date: Sun, 22 Mar 2026 17:06:19 -0300
Subject: [PATCH 08/21] test(extractor): add integration tests with real
 browser

---
 tests/pages/test_extractor.html               |  98 ++++
 tests/test_extractor/__init__.py              |   0
 .../test_extractor_integration.py             | 527 ++++++++++++++++++
 3 files changed, 625 insertions(+)
 create mode 100644 tests/pages/test_extractor.html
 create mode 100644 tests/test_extractor/__init__.py
 create mode 100644 tests/test_extractor/test_extractor_integration.py

diff --git a/tests/pages/test_extractor.html b/tests/pages/test_extractor.html
new file mode 100644
index 00000000..7e83de1f
--- /dev/null
+++ b/tests/pages/test_extractor.html
@@ -0,0 +1,98 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <title>Extractor Test Page</title>
+</head>
+<body>
+    <!-- Single article for extract() tests -->
+    <article id="main-article">
+        <h1 class="article-title">Understanding Web Scraping</h1>
+        <div class="author-card">
+            <span class="name">Jane Doe</span>
+            <img class="avatar" src="https://example.com/jane.jpg" alt="Jane Doe photo">
+            <p class="bio">Senior developer and open source contributor.</p>
+        </div>
+        <time class="published" datetime="2025-03-15">March 15, 2025</time>
+        <div class="article-body">
+            <p>Web scraping is a technique for extracting data from websites.</p>
+        </div>
+        <div class="tag-list">
+            <span class="tag">python</span>
+            <span class="tag">automation</span>
+            <span class="tag">web</span>
+        </div>
+        <img class="hero-image" src="https://example.com/hero.jpg" alt="Hero image" data-id="img-42">
+        <span class="price">R$ 1.234,56</span>
+        <a class="source-link" href="https://example.com/article/1">Original source</a>
+    </article>
+
+    <!-- Repeated quotes for extract_all() tests -->
+    <section id="quotes-section">
+        <div class="quote">
+            <p class="text">The only way to do great work is to love what you do.</p>
+            <span class="author">Steve Jobs</span>
+            <span class="year">2005</span>
+        </div>
+        <div class="quote">
+            <p class="text">Innovation distinguishes between a leader and a follower.</p>
+            <span class="author">Steve Jobs</span>
+            <span class="year">2001</span>
+        </div>
+        <div class="quote">
+            <p class="text">Stay hungry, stay foolish.</p>
+            <span class="author">Steve Jobs</span>
+            <span class="year">2005</span>
+        </div>
+    </section>
+
+    <!-- Products for extract_all with nested models -->
+    <section id="products-section">
+        <div class="product-card">
+            <h3 class="product-name">Laptop Pro</h3>
+            <span class="product-price">R$ 5.999,00</span>
+            <div class="product-meta">
+                <span class="brand">TechCorp</span>
+                <span class="sku">SKU-001</span>
+            </div>
+        </div>
+        <div class="product-card">
+            <h3 class="product-name">Mouse Wireless</h3>
+            <span class="product-price">R$ 89,90</span>
+            <div class="product-meta">
+                <span class="brand">PeripheralCo</span>
+                <span class="sku">SKU-002</span>
+            </div>
+        </div>
+    </section>
+
+    <!-- Article with multiple authors for list[NestedModel] tests -->
+    <article id="multi-author-article">
+        <h2 class="title">Collaborative Research Paper</h2>
+        <div class="contributors">
+            <div class="contributor">
+                <span class="name">Alice Smith</span>
+                <span class="role">Lead Researcher</span>
+            </div>
+            <div class="contributor">
+                <span class="name">Bob Johnson</span>
+                <span class="role">Data Analyst</span>
+            </div>
+            <div class="contributor">
+                <span class="name">Carol Williams</span>
+                <span class="role">Reviewer</span>
+            </div>
+        </div>
+    </article>
+
+    <!-- Element with empty text -->
+    <div id="empty-element"></div>
+
+    <!-- XPath test section -->
+    <div id="xpath-section">
+        <div class="level-1">
+            <span class="deep-value">Found via XPath</span>
+        </div>
+    </div>
+</body>
+</html>
diff --git a/tests/test_extractor/__init__.py b/tests/test_extractor/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_extractor/test_extractor_integration.py b/tests/test_extractor/test_extractor_integration.py
new file mode 100644
index 00000000..c2fc2505
--- /dev/null
+++ b/tests/test_extractor/test_extractor_integration.py
@@ -0,0 +1,527 @@
+"""Integration tests for the extractor module using a real browser."""
+
+from __future__ import annotations
+
+import asyncio
+from pathlib import Path
+from typing import Optional
+
+import pytest
+
+from pydoll.browser.chromium import Chrome
+from pydoll.extractor import (
+    ExtractionModel,
+    Field,
+    FieldExtractionFailed,
+    InvalidExtractionModel,
+)
+
+TEST_PAGE = Path(__file__).parent.parent / 'pages' / 'test_extractor.html'
+FILE_URL = f'file://{TEST_PAGE.absolute()}'
+
+
+# ---------------------------------------------------------------------------
+# Model definitions
+# ---------------------------------------------------------------------------
+
+
+def parse_price(raw: str) -> float:
+    """Parse Brazilian currency format to float."""
+    return float(raw.replace('R$', '').replace('.', '').replace(',', '.').strip())
+
+
+class SimpleArticle(ExtractionModel):
+    title: str = Field(selector='h1.article-title', description='Title')
+    body: str = Field(selector='.article-body', description='Body text')
+
+
+class ArticleWithAttributes(ExtractionModel):
+    title: str = Field(selector='h1.article-title', description='Title')
+    published_at: str = Field(
+        selector='time.published',
+        attribute='datetime',
+        description='Publication date',
+    )
+    image_src: str = Field(
+        selector='.hero-image',
+        attribute='src',
+        description='Hero image URL',
+    )
+    image_alt: str = Field(
+        selector='.hero-image',
+        attribute='alt',
+        description='Hero image alt text',
+    )
+    image_data_id: str = Field(
+        selector='.hero-image',
+        attribute='data-id',
+        description='Hero image data-id',
+    )
+    link_href: str = Field(
+        selector='.source-link',
+        attribute='href',
+        description='Source link URL',
+    )
+
+
+class ArticleWithTags(ExtractionModel):
+    title: str = Field(selector='h1.article-title', description='Title')
+    tags: list[str] = Field(selector='.tag-list .tag', description='Tags')
+
+
+class ArticleWithTransform(ExtractionModel):
+    title: str = Field(selector='h1.article-title', description='Title')
+    price: float = Field(
+        selector='.price',
+        description='Product price in BRL',
+        transform=parse_price,
+    )
+
+
+class AuthorModel(ExtractionModel):
+    name: str = Field(selector='.name', description='Author name')
+    avatar_url: str = Field(
+        selector='img.avatar',
+        attribute='src',
+        description='Avatar URL',
+    )
+    bio: str = Field(selector='.bio', description='Short author bio')
+
+
+class ArticleWithNestedAuthor(ExtractionModel):
+    title: str = Field(selector='h1.article-title', description='Title')
+    author: AuthorModel = Field(
+        selector='.author-card',
+        description='Author info block',
+    )
+
+
+class ArticleWithOptional(ExtractionModel):
+    title: str = Field(selector='h1.article-title', description='Title')
+    subtitle: Optional[str] = Field(
+        selector='.nonexistent-subtitle',
+        description='Optional subtitle',
+        default=None,
+    )
+    missing_with_default: str = Field(
+        selector='.nonexistent-field',
+        description='Missing field with default',
+        default='fallback_value',
+    )
+
+
+class QuoteModel(ExtractionModel):
+    text: str = Field(selector='.text', description='Quote text')
+    author: str = Field(selector='.author', description='Quote author')
+
+
+class QuoteWithYear(ExtractionModel):
+    text: str = Field(selector='.text', description='Quote text')
+    author: str = Field(selector='.author', description='Quote author')
+    year: int = Field(
+        selector='.year',
+        description='Year of the quote',
+        transform=int,
+    )
+
+
+class ProductMeta(ExtractionModel):
+    brand: str = Field(selector='.brand', description='Brand name')
+    sku: str = Field(selector='.sku', description='Product SKU')
+
+
+class ProductModel(ExtractionModel):
+    name: str = Field(selector='.product-name', description='Product name')
+    price: float = Field(
+        selector='.product-price',
+        description='Product price',
+        transform=parse_price,
+    )
+    meta: ProductMeta = Field(
+        selector='.product-meta',
+        description='Product metadata',
+    )
+
+
+class Contributor(ExtractionModel):
+    name: str = Field(selector='.name', description='Contributor name')
+    role: str = Field(selector='.role', description='Contributor role')
+
+
+class MultiAuthorArticle(ExtractionModel):
+    title: str = Field(selector='.title', description='Article title')
+    contributors: list[Contributor] = Field(
+        selector='.contributor',
+        description='List of contributors',
+    )
+
+
+class XPathModel(ExtractionModel):
+    value: str = Field(
+        selector='//*[@id="xpath-section"]//span[@class="deep-value"]',
+        description='Value found via XPath',
+    )
+
+
+class DescriptionOnlyField(ExtractionModel):
+    title: str = Field(selector='h1.article-title', description='Title')
+    sentiment: str = Field(
+        description='Article sentiment (future LLM field)',
+        default='unknown',
+    )
+
+
+class RequiredFieldMissing(ExtractionModel):
+    title: str = Field(selector='h1.article-title', description='Title')
+    nonexistent: str = Field(
+        selector='.this-does-not-exist',
+        description='Required field that will not be found',
+    )
+
+
+class PEP604OptionalModel(ExtractionModel):
+    title: str = Field(selector='h1.article-title', description='Title')
+    subtitle: str | None = Field(
+        selector='.nonexistent-subtitle',
+        description='PEP 604 optional',
+        default=None,
+    )
+
+
+class BaseArticle(ExtractionModel):
+    title: str = Field(selector='h1.article-title', description='Title')
+
+
+class ExtendedArticle(BaseArticle):
+    body: str = Field(selector='.article-body', description='Body text')
+
+
+class ArticleWithBadTransform(ExtractionModel):
+    title: str = Field(selector='h1.article-title', description='Title')
+    broken_price: Optional[float] = Field(
+        selector='#empty-element',
+        description='Empty element with bad transform',
+        transform=lambda s: float(s),
+        default=None,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+class TestExtractSingle:
+    """Tests for tab.extract() — single item extraction."""
+
+    @pytest.mark.asyncio
+    async def test_extract_simple_text_fields(self, ci_chrome_options):
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            article = await tab.extract(SimpleArticle)
+            assert article.title == 'Understanding Web Scraping'
+            assert 'extracting data' in article.body
+
+    @pytest.mark.asyncio
+    async def test_extract_multiple_attributes(self, ci_chrome_options):
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            article = await tab.extract(ArticleWithAttributes)
+            assert article.published_at == '2025-03-15'
+            assert article.image_src == 'https://example.com/hero.jpg'
+            assert article.image_alt == 'Hero image'
+            assert article.image_data_id == 'img-42'
+            assert article.link_href == 'https://example.com/article/1'
+
+    @pytest.mark.asyncio
+    async def test_extract_list_of_strings(self, ci_chrome_options):
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            article = await tab.extract(ArticleWithTags)
+            assert article.tags == ['python', 'automation', 'web']
+
+    @pytest.mark.asyncio
+    async def test_extract_with_transform(self, ci_chrome_options):
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            article = await tab.extract(ArticleWithTransform)
+            assert article.price == 1234.56
+
+    @pytest.mark.asyncio
+    async def test_extract_nested_model(self, ci_chrome_options):
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            article = await tab.extract(ArticleWithNestedAuthor)
+            assert article.title == 'Understanding Web Scraping'
+            assert article.author.name == 'Jane Doe'
+            assert article.author.avatar_url == 'https://example.com/jane.jpg'
+            assert 'open source' in article.author.bio
+
+    @pytest.mark.asyncio
+    async def test_extract_optional_missing_fields(self, ci_chrome_options):
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            article = await tab.extract(ArticleWithOptional)
+            assert article.title == 'Understanding Web Scraping'
+            assert article.subtitle is None
+            assert article.missing_with_default == 'fallback_value'
+
+    @pytest.mark.asyncio
+    async def test_extract_with_xpath_selector(self, ci_chrome_options):
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            result = await tab.extract(XPathModel)
+            assert result.value == 'Found via XPath'
+
+    @pytest.mark.asyncio
+    async def test_extract_with_scope(self, ci_chrome_options):
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            article = await tab.extract(
+                SimpleArticle, scope='#main-article'
+            )
+            assert article.title == 'Understanding Web Scraping'
+
+    @pytest.mark.asyncio
+    async def test_extract_description_only_field_uses_default(self, ci_chrome_options):
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            article = await tab.extract(DescriptionOnlyField)
+            assert article.title == 'Understanding Web Scraping'
+            assert article.sentiment == 'unknown'
+
+    @pytest.mark.asyncio
+    async def test_extract_required_field_missing_raises(self, ci_chrome_options):
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            with pytest.raises(FieldExtractionFailed):
+                await tab.extract(RequiredFieldMissing)
+
+    @pytest.mark.asyncio
+    async def test_extract_model_dump(self, ci_chrome_options):
+        """Verify pydantic serialization works on extracted models."""
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            article = await tab.extract(ArticleWithNestedAuthor)
+            data = article.model_dump()
+            assert isinstance(data, dict)
+            assert data['title'] == 'Understanding Web Scraping'
+            assert isinstance(data['author'], dict)
+            assert data['author']['name'] == 'Jane Doe'
+
+
+class TestExtractAll:
+    """Tests for tab.extract_all() — multiple item extraction."""
+
+    @pytest.mark.asyncio
+    async def test_extract_all_basic(self, ci_chrome_options):
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            quotes = await tab.extract_all(QuoteModel, scope='.quote')
+            assert len(quotes) == 3
+            assert quotes[0].text == 'The only way to do great work is to love what you do.'
+            assert quotes[0].author == 'Steve Jobs'
+            assert quotes[2].text == 'Stay hungry, stay foolish.'
+
+    @pytest.mark.asyncio
+    async def test_extract_all_with_transform(self, ci_chrome_options):
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            quotes = await tab.extract_all(QuoteWithYear, scope='.quote')
+            assert len(quotes) == 3
+            assert quotes[0].year == 2005
+            assert quotes[1].year == 2001
+            assert isinstance(quotes[0].year, int)
+
+    @pytest.mark.asyncio
+    async def test_extract_all_with_limit(self, ci_chrome_options):
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            quotes = await tab.extract_all(
+                QuoteModel, scope='.quote', limit=2
+            )
+            assert len(quotes) == 2
+
+    @pytest.mark.asyncio
+    async def test_extract_all_with_nested_model(self, ci_chrome_options):
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            products = await tab.extract_all(
+                ProductModel, scope='.product-card'
+            )
+            assert len(products) == 2
+            assert products[0].name == 'Laptop Pro'
+            assert products[0].price == 5999.00
+            assert products[0].meta.brand == 'TechCorp'
+            assert products[0].meta.sku == 'SKU-001'
+            assert products[1].name == 'Mouse Wireless'
+            assert products[1].meta.brand == 'PeripheralCo'
+
+    @pytest.mark.asyncio
+    async def test_extract_all_no_matches_returns_empty(self, ci_chrome_options):
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            results = await tab.extract_all(
+                QuoteModel, scope='.nonexistent-container'
+            )
+            assert results == []
+
+
+class TestValidation:
+    """Tests for model validation at definition time."""
+
+    def test_field_without_selector_or_description_raises(self):
+        with pytest.raises(InvalidExtractionModel):
+            Field(selector=None, description=None)
+
+    def test_model_with_invalid_field_raises(self):
+        """Field with _extraction_metadata but no selector and no description should fail."""
+        # This is caught at Field() call time, not at class definition time,
+        # because Field() validates its arguments immediately.
+        with pytest.raises(InvalidExtractionModel):
+
+            class BadModel(ExtractionModel):
+                bad_field: str = Field()  # type: ignore[call-arg]
+
+
+class TestEdgeCases:
+    """Tests for edge cases and inheritance."""
+
+    @pytest.mark.asyncio
+    async def test_model_inheritance_includes_parent_fields(self, ci_chrome_options):
+        """ExtendedArticle should have both parent's title and own body fields."""
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            article = await tab.extract(ExtendedArticle)
+            assert article.title == 'Understanding Web Scraping'
+            assert 'extracting data' in article.body
+
+    @pytest.mark.asyncio
+    async def test_failed_transform_on_optional_field_uses_default(self, ci_chrome_options):
+        """Transform that throws on Optional field should fall back to default."""
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            article = await tab.extract(ArticleWithBadTransform)
+            assert article.title == 'Understanding Web Scraping'
+            assert article.broken_price is None
+
+    @pytest.mark.asyncio
+    async def test_extract_all_with_scope_returns_correct_count(self, ci_chrome_options):
+        """extract_all should only match elements within the page."""
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            products = await tab.extract_all(ProductModel, scope='.product-card')
+            assert len(products) == 2
+            # Verify each product has correct nested data
+            for product in products:
+                assert product.meta.brand
+                assert product.meta.sku
+
+    @pytest.mark.asyncio
+    async def test_empty_list_field(self, ci_chrome_options):
+        """list field with no matching elements should return empty list."""
+
+        class ModelWithEmptyList(ExtractionModel):
+            title: str = Field(selector='h1.article-title', description='Title')
+            items: list[str] = Field(
+                selector='.nonexistent-items .item',
+                description='Items that do not exist',
+            )
+
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            result = await tab.extract(ModelWithEmptyList)
+            assert result.title == 'Understanding Web Scraping'
+            assert result.items == []
+
+    @pytest.mark.asyncio
+    async def test_pep604_optional_syntax(self, ci_chrome_options):
+        """str | None (PEP 604) should be handled the same as Optional[str]."""
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            result = await tab.extract(PEP604OptionalModel)
+            assert result.title == 'Understanding Web Scraping'
+            assert result.subtitle is None
+
+    @pytest.mark.asyncio
+    async def test_list_of_nested_models(self, ci_chrome_options):
+        """list[ExtractionModel] should extract each item as a nested model."""
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            article = await tab.extract(
+                MultiAuthorArticle, scope='#multi-author-article'
+            )
+            assert article.title == 'Collaborative Research Paper'
+            assert len(article.contributors) == 3
+            assert article.contributors[0].name == 'Alice Smith'
+            assert article.contributors[0].role == 'Lead Researcher'
+            assert article.contributors[1].name == 'Bob Johnson'
+            assert article.contributors[1].role == 'Data Analyst'
+            assert article.contributors[2].name == 'Carol Williams'
+            assert article.contributors[2].role == 'Reviewer'

From 219267efd0aede6923501564128d1704e82bff9b Mon Sep 17 00:00:00 2001
From: Thalison Fernandes <thalissfernandes99@gmail.com>
Date: Sun, 22 Mar 2026 17:06:25 -0300
Subject: [PATCH 09/21] docs(extractor): add usage example with
 quotes.toscrape.com

---
 example_extractor.py | 87 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)
 create mode 100644 example_extractor.py

diff --git a/example_extractor.py b/example_extractor.py
new file mode 100644
index 00000000..18b54a47
--- /dev/null
+++ b/example_extractor.py
@@ -0,0 +1,87 @@
+"""Example: extracting quotes from quotes.toscrape.com using pydoll extractor."""
+
+from __future__ import annotations
+
+import asyncio
+from dataclasses import dataclass
+from datetime import datetime
+
+from pydoll.browser.chromium.chrome import Chrome
+from pydoll.extractor import ExtractionModel, Field
+
+
+@dataclass
+class BirthInfo:
+    """Structured birth information parsed from a raw date string."""
+
+    date: datetime
+
+    def __str__(self) -> str:
+        return f'{self.date.strftime("%B %d, %Y")}'
+
+
+def parse_birth_info(raw: str) -> BirthInfo:
+    """Transform raw date string into BirthInfo custom type."""
+    return BirthInfo(
+        date=datetime.strptime(raw.strip(), '%B %d, %Y'),
+    )
+
+
+def clean_location(raw: str) -> str:
+    """Remove 'in ' prefix from birth location."""
+    return raw.strip().removeprefix('in ')
+
+
+class Quote(ExtractionModel):
+    text: str = Field(selector='.text', description='The quote text')
+    author: str = Field(selector='.author', description='Who said the quote')
+    tags: list[str] = Field(selector='.tag', description='Associated tags')
+
+
+class AuthorInfo(ExtractionModel):
+    name: str = Field(selector='.author-title', description='Author full name')
+    birth: BirthInfo = Field(
+        selector='.author-born-date',
+        description='Author birth info as custom type',
+        transform=parse_birth_info,
+    )
+    birth_location: str = Field(
+        selector='.author-born-location',
+        description='Author birth location',
+        transform=clean_location,
+    )
+    bio: str = Field(
+        selector='.author-description',
+        description='Author biography',
+    )
+
+
+async def main():
+    async with Chrome() as browser:
+        tab = await browser.start()
+
+        await tab.go_to('https://quotes.toscrape.com')
+        quotes = await tab.extract_all(Quote, scope='.quote', timeout=5)
+        print(f'Found {len(quotes)} quotes\n')
+        for i, quote in enumerate(quotes, 1):
+            print(f'{i}. "{quote.text}"')
+            print(f'   — {quote.author}')
+            print(f'   Tags: {", ".join(quote.tags)}\n')
+
+        author_link = await tab.query('.quote .author + a')
+        href = author_link.get_attribute('href')
+        await tab.go_to(f'https://quotes.toscrape.com{href}')
+
+        author = await tab.extract(AuthorInfo, timeout=5)
+        print('--- Author info ---')
+        print(f'Name: {author.name}')
+        print(f'Birth: {author.birth}')
+        print(f'  type: {type(author.birth).__name__}')
+        print(f'  date.year: {author.birth.date.year}')
+        print(f'  date.month: {author.birth.date.month}')
+        print(f'Location: {author.birth_location}')
+        print(f'Bio: {author.bio[:100]}...\n')
+
+
+if __name__ == '__main__':
+    asyncio.run(main())

From a20858bcb73162f0d68858fb85adebef44ea2739 Mon Sep 17 00:00:00 2001
From: Thalison Fernandes <thalissfernandes99@gmail.com>
Date: Sun, 22 Mar 2026 17:06:55 -0300
Subject: [PATCH 10/21] Revert "docs(extractor): add usage example with
 quotes.toscrape.com"

This reverts commit 219267efd0aede6923501564128d1704e82bff9b.
---
 example_extractor.py | 87 --------------------------------------------
 1 file changed, 87 deletions(-)
 delete mode 100644 example_extractor.py

diff --git a/example_extractor.py b/example_extractor.py
deleted file mode 100644
index 18b54a47..00000000
--- a/example_extractor.py
+++ /dev/null
@@ -1,87 +0,0 @@
-"""Example: extracting quotes from quotes.toscrape.com using pydoll extractor."""
-
-from __future__ import annotations
-
-import asyncio
-from dataclasses import dataclass
-from datetime import datetime
-
-from pydoll.browser.chromium.chrome import Chrome
-from pydoll.extractor import ExtractionModel, Field
-
-
-@dataclass
-class BirthInfo:
-    """Structured birth information parsed from a raw date string."""
-
-    date: datetime
-
-    def __str__(self) -> str:
-        return f'{self.date.strftime("%B %d, %Y")}'
-
-
-def parse_birth_info(raw: str) -> BirthInfo:
-    """Transform raw date string into BirthInfo custom type."""
-    return BirthInfo(
-        date=datetime.strptime(raw.strip(), '%B %d, %Y'),
-    )
-
-
-def clean_location(raw: str) -> str:
-    """Remove 'in ' prefix from birth location."""
-    return raw.strip().removeprefix('in ')
-
-
-class Quote(ExtractionModel):
-    text: str = Field(selector='.text', description='The quote text')
-    author: str = Field(selector='.author', description='Who said the quote')
-    tags: list[str] = Field(selector='.tag', description='Associated tags')
-
-
-class AuthorInfo(ExtractionModel):
-    name: str = Field(selector='.author-title', description='Author full name')
-    birth: BirthInfo = Field(
-        selector='.author-born-date',
-        description='Author birth info as custom type',
-        transform=parse_birth_info,
-    )
-    birth_location: str = Field(
-        selector='.author-born-location',
-        description='Author birth location',
-        transform=clean_location,
-    )
-    bio: str = Field(
-        selector='.author-description',
-        description='Author biography',
-    )
-
-
-async def main():
-    async with Chrome() as browser:
-        tab = await browser.start()
-
-        await tab.go_to('https://quotes.toscrape.com')
-        quotes = await tab.extract_all(Quote, scope='.quote', timeout=5)
-        print(f'Found {len(quotes)} quotes\n')
-        for i, quote in enumerate(quotes, 1):
-            print(f'{i}. "{quote.text}"')
-            print(f'   — {quote.author}')
-            print(f'   Tags: {", ".join(quote.tags)}\n')
-
-        author_link = await tab.query('.quote .author + a')
-        href = author_link.get_attribute('href')
-        await tab.go_to(f'https://quotes.toscrape.com{href}')
-
-        author = await tab.extract(AuthorInfo, timeout=5)
-        print('--- Author info ---')
-        print(f'Name: {author.name}')
-        print(f'Birth: {author.birth}')
-        print(f'  type: {type(author.birth).__name__}')
-        print(f'  date.year: {author.birth.date.year}')
-        print(f'  date.month: {author.birth.date.month}')
-        print(f'Location: {author.birth_location}')
-        print(f'Bio: {author.bio[:100]}...\n')
-
-
-if __name__ == '__main__':
-    asyncio.run(main())

From 48e26ce3e758d5cb48c6be8ffc588537c80dd849 Mon Sep 17 00:00:00 2001
From: Thalison Fernandes <thalissfernandes99@gmail.com>
Date: Sun, 22 Mar 2026 17:08:47 -0300
Subject: [PATCH 11/21] chore(deps): update dependencies and add new packages
 to poetry.lock

---
 poetry.lock | 165 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 163 insertions(+), 2 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index fe3d5d67..68915f5d 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.1.4 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.3.2 and should not be changed by hand.
 
 [[package]]
 name = "aiofiles"
@@ -164,6 +164,18 @@ files = [
 [package.dependencies]
 frozenlist = ">=1.1.0"
 
+[[package]]
+name = "annotated-types"
+version = "0.7.0"
+description = "Reusable constraint types to use with typing.Annotated"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"},
+    {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"},
+]
+
 [[package]]
 name = "async-timeout"
 version = "5.0.1"
@@ -1298,6 +1310,140 @@ files = [
 dev = ["abi3audit", "black", "check-manifest", "coverage", "packaging", "pylint", "pyperf", "pypinfo", "pytest-cov", "requests", "rstcheck", "ruff", "sphinx", "sphinx_rtd_theme", "toml-sort", "twine", "virtualenv", "vulture", "wheel"]
 test = ["pytest", "pytest-xdist", "setuptools"]
 
+[[package]]
+name = "pydantic"
+version = "2.11.10"
+description = "Data validation using Python type hints"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "pydantic-2.11.10-py3-none-any.whl", hash = "sha256:802a655709d49bd004c31e865ef37da30b540786a46bfce02333e0e24b5fe29a"},
+    {file = "pydantic-2.11.10.tar.gz", hash = "sha256:dc280f0982fbda6c38fada4e476dc0a4f3aeaf9c6ad4c28df68a666ec3c61423"},
+]
+
+[package.dependencies]
+annotated-types = ">=0.6.0"
+pydantic-core = "2.33.2"
+typing-extensions = ">=4.12.2"
+typing-inspection = ">=0.4.0"
+
+[package.extras]
+email = ["email-validator (>=2.0.0)"]
+timezone = ["tzdata ; python_version >= \"3.9\" and platform_system == \"Windows\""]
+
+[[package]]
+name = "pydantic-core"
+version = "2.33.2"
+description = "Core functionality for Pydantic validation and serialization"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "pydantic_core-2.33.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2b3d326aaef0c0399d9afffeb6367d5e26ddc24d351dbc9c636840ac355dc5d8"},
+    {file = "pydantic_core-2.33.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e5b2671f05ba48b94cb90ce55d8bdcaaedb8ba00cc5359f6810fc918713983d"},
+    {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0069c9acc3f3981b9ff4cdfaf088e98d83440a4c7ea1bc07460af3d4dc22e72d"},
+    {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d53b22f2032c42eaaf025f7c40c2e3b94568ae077a606f006d206a463bc69572"},
+    {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0405262705a123b7ce9f0b92f123334d67b70fd1f20a9372b907ce1080c7ba02"},
+    {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4b25d91e288e2c4e0662b8038a28c6a07eaac3e196cfc4ff69de4ea3db992a1b"},
+    {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bdfe4b3789761f3bcb4b1ddf33355a71079858958e3a552f16d5af19768fef2"},
+    {file = "pydantic_core-2.33.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:efec8db3266b76ef9607c2c4c419bdb06bf335ae433b80816089ea7585816f6a"},
+    {file = "pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:031c57d67ca86902726e0fae2214ce6770bbe2f710dc33063187a68744a5ecac"},
+    {file = "pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:f8de619080e944347f5f20de29a975c2d815d9ddd8be9b9b7268e2e3ef68605a"},
+    {file = "pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:73662edf539e72a9440129f231ed3757faab89630d291b784ca99237fb94db2b"},
+    {file = "pydantic_core-2.33.2-cp310-cp310-win32.whl", hash = "sha256:0a39979dcbb70998b0e505fb1556a1d550a0781463ce84ebf915ba293ccb7e22"},
+    {file = "pydantic_core-2.33.2-cp310-cp310-win_amd64.whl", hash = "sha256:b0379a2b24882fef529ec3b4987cb5d003b9cda32256024e6fe1586ac45fc640"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:4c5b0a576fb381edd6d27f0a85915c6daf2f8138dc5c267a57c08a62900758c7"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e799c050df38a639db758c617ec771fd8fb7a5f8eaaa4b27b101f266b216a246"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc46a01bf8d62f227d5ecee74178ffc448ff4e5197c756331f71efcc66dc980f"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a144d4f717285c6d9234a66778059f33a89096dfb9b39117663fd8413d582dcc"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73cf6373c21bc80b2e0dc88444f41ae60b2f070ed02095754eb5a01df12256de"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dc625f4aa79713512d1976fe9f0bc99f706a9dee21dfd1810b4bbbf228d0e8a"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b21b5549499972441da4758d662aeea93f1923f953e9cbaff14b8b9565aef"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bdc25f3681f7b78572699569514036afe3c243bc3059d3942624e936ec93450e"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:fe5b32187cbc0c862ee201ad66c30cf218e5ed468ec8dc1cf49dec66e160cc4d"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:bc7aee6f634a6f4a95676fcb5d6559a2c2a390330098dba5e5a5f28a2e4ada30"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:235f45e5dbcccf6bd99f9f472858849f73d11120d76ea8707115415f8e5ebebf"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-win32.whl", hash = "sha256:6368900c2d3ef09b69cb0b913f9f8263b03786e5b2a387706c5afb66800efd51"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-win_amd64.whl", hash = "sha256:1e063337ef9e9820c77acc768546325ebe04ee38b08703244c1309cccc4f1bab"},
+    {file = "pydantic_core-2.33.2-cp311-cp311-win_arm64.whl", hash = "sha256:6b99022f1d19bc32a4c2a0d544fc9a76e3be90f0b3f4af413f87d38749300e65"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a7ec89dc587667f22b6a0b6579c249fca9026ce7c333fc142ba42411fa243cdc"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3c6db6e52c6d70aa0d00d45cdb9b40f0433b96380071ea80b09277dba021ddf7"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e61206137cbc65e6d5256e1166f88331d3b6238e082d9f74613b9b765fb9025"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb8c529b2819c37140eb51b914153063d27ed88e3bdc31b71198a198e921e011"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c52b02ad8b4e2cf14ca7b3d918f3eb0ee91e63b3167c32591e57c4317e134f8f"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:96081f1605125ba0855dfda83f6f3df5ec90c61195421ba72223de35ccfb2f88"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f57a69461af2a5fa6e6bbd7a5f60d3b7e6cebb687f55106933188e79ad155c1"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:572c7e6c8bb4774d2ac88929e3d1f12bc45714ae5ee6d9a788a9fb35e60bb04b"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:db4b41f9bd95fbe5acd76d89920336ba96f03e149097365afe1cb092fceb89a1"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:fa854f5cf7e33842a892e5c73f45327760bc7bc516339fda888c75ae60edaeb6"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5f483cfb75ff703095c59e365360cb73e00185e01aaea067cd19acffd2ab20ea"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-win32.whl", hash = "sha256:9cb1da0f5a471435a7bc7e439b8a728e8b61e59784b2af70d7c169f8dd8ae290"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-win_amd64.whl", hash = "sha256:f941635f2a3d96b2973e867144fde513665c87f13fe0e193c158ac51bfaaa7b2"},
+    {file = "pydantic_core-2.33.2-cp312-cp312-win_arm64.whl", hash = "sha256:cca3868ddfaccfbc4bfb1d608e2ccaaebe0ae628e1416aeb9c4d88c001bb45ab"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1082dd3e2d7109ad8b7da48e1d4710c8d06c253cbc4a27c1cff4fbcaa97a9e3f"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f517ca031dfc037a9c07e748cefd8d96235088b83b4f4ba8939105d20fa1dcd6"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a9f2c9dd19656823cb8250b0724ee9c60a82f3cdf68a080979d13092a3b0fef"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2b0a451c263b01acebe51895bfb0e1cc842a5c666efe06cdf13846c7418caa9a"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ea40a64d23faa25e62a70ad163571c0b342b8bf66d5fa612ac0dec4f069d916"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fb2d542b4d66f9470e8065c5469ec676978d625a8b7a363f07d9a501a9cb36a"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdac5d6ffa1b5a83bca06ffe7583f5576555e6c8b3a91fbd25ea7780f825f7d"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04a1a413977ab517154eebb2d326da71638271477d6ad87a769102f7c2488c56"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c8e7af2f4e0194c22b5b37205bfb293d166a7344a5b0d0eaccebc376546d77d5"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:5c92edd15cd58b3c2d34873597a1e20f13094f59cf88068adb18947df5455b4e"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:65132b7b4a1c0beded5e057324b7e16e10910c106d43675d9bd87d4f38dde162"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-win32.whl", hash = "sha256:52fb90784e0a242bb96ec53f42196a17278855b0f31ac7c3cc6f5c1ec4811849"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-win_amd64.whl", hash = "sha256:c083a3bdd5a93dfe480f1125926afcdbf2917ae714bdb80b36d34318b2bec5d9"},
+    {file = "pydantic_core-2.33.2-cp313-cp313-win_arm64.whl", hash = "sha256:e80b087132752f6b3d714f041ccf74403799d3b23a72722ea2e6ba2e892555b9"},
+    {file = "pydantic_core-2.33.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61c18fba8e5e9db3ab908620af374db0ac1baa69f0f32df4f61ae23f15e586ac"},
+    {file = "pydantic_core-2.33.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95237e53bb015f67b63c91af7518a62a8660376a6a0db19b89acc77a4d6199f5"},
+    {file = "pydantic_core-2.33.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:a2b911a5b90e0374d03813674bf0a5fbbb7741570dcd4b4e85a2e48d17def29d"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6fa6dfc3e4d1f734a34710f391ae822e0a8eb8559a85c6979e14e65ee6ba2954"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c54c939ee22dc8e2d545da79fc5381f1c020d6d3141d3bd747eab59164dc89fb"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:53a57d2ed685940a504248187d5685e49eb5eef0f696853647bf37c418c538f7"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:09fb9dd6571aacd023fe6aaca316bd01cf60ab27240d7eb39ebd66a3a15293b4"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0e6116757f7959a712db11f3e9c0a99ade00a5bbedae83cb801985aa154f071b"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d55ab81c57b8ff8548c3e4947f119551253f4e3787a7bbc0b6b3ca47498a9d3"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c20c462aa4434b33a2661701b861604913f912254e441ab8d78d30485736115a"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:44857c3227d3fb5e753d5fe4a3420d6376fa594b07b621e220cd93703fe21782"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-musllinux_1_1_armv7l.whl", hash = "sha256:eb9b459ca4df0e5c87deb59d37377461a538852765293f9e6ee834f0435a93b9"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9fcd347d2cc5c23b06de6d3b7b8275be558a0c90549495c699e379a80bf8379e"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-win32.whl", hash = "sha256:83aa99b1285bc8f038941ddf598501a86f1536789740991d7d8756e34f1e74d9"},
+    {file = "pydantic_core-2.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:f481959862f57f29601ccced557cc2e817bce7533ab8e01a797a48b49c9692b3"},
+    {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5c4aa4e82353f65e548c476b37e64189783aa5384903bfea4f41580f255fddfa"},
+    {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d946c8bf0d5c24bf4fe333af284c59a19358aa3ec18cb3dc4370080da1e8ad29"},
+    {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87b31b6846e361ef83fedb187bb5b4372d0da3f7e28d85415efa92d6125d6e6d"},
+    {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa9d91b338f2df0508606f7009fde642391425189bba6d8c653afd80fd6bb64e"},
+    {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2058a32994f1fde4ca0480ab9d1e75a0e8c87c22b53a3ae66554f9af78f2fe8c"},
+    {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:0e03262ab796d986f978f79c943fc5f620381be7287148b8010b4097f79a39ec"},
+    {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:1a8695a8d00c73e50bff9dfda4d540b7dee29ff9b8053e38380426a85ef10052"},
+    {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:fa754d1850735a0b0e03bcffd9d4b4343eb417e47196e4485d9cca326073a42c"},
+    {file = "pydantic_core-2.33.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a11c8d26a50bfab49002947d3d237abe4d9e4b5bdc8846a63537b6488e197808"},
+    {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:dd14041875d09cc0f9308e37a6f8b65f5585cf2598a53aa0123df8b129d481f8"},
+    {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d87c561733f66531dced0da6e864f44ebf89a8fba55f31407b00c2f7f9449593"},
+    {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f82865531efd18d6e07a04a17331af02cb7a651583c418df8266f17a63c6612"},
+    {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bfb5112df54209d820d7bf9317c7a6c9025ea52e49f46b6a2060104bba37de7"},
+    {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:64632ff9d614e5eecfb495796ad51b0ed98c453e447a76bcbeeb69615079fc7e"},
+    {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f889f7a40498cc077332c7ab6b4608d296d852182211787d4f3ee377aaae66e8"},
+    {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:de4b83bb311557e439b9e186f733f6c645b9417c84e2eb8203f3f820a4b988bf"},
+    {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82f68293f055f51b51ea42fafc74b6aad03e70e191799430b90c13d643059ebb"},
+    {file = "pydantic_core-2.33.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:329467cecfb529c925cf2bbd4d60d2c509bc2fb52a20c1045bf09bb70971a9c1"},
+    {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:87acbfcf8e90ca885206e98359d7dca4bcbb35abdc0ff66672a293e1d7a19101"},
+    {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:7f92c15cd1e97d4b12acd1cc9004fa092578acfa57b67ad5e43a197175d01a64"},
+    {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3f26877a748dc4251cfcfda9dfb5f13fcb034f5308388066bcfe9031b63ae7d"},
+    {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac89aea9af8cd672fa7b510e7b8c33b0bba9a43186680550ccf23020f32d535"},
+    {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:970919794d126ba8645f3837ab6046fb4e72bbc057b3709144066204c19a455d"},
+    {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3eb3fe62804e8f859c49ed20a8451342de53ed764150cb14ca71357c765dc2a6"},
+    {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:3abcd9392a36025e3bd55f9bd38d908bd17962cc49bc6da8e7e96285336e2bca"},
+    {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:3a1c81334778f9e3af2f8aeb7a960736e5cab1dfebfb26aabca09afd2906c039"},
+    {file = "pydantic_core-2.33.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2807668ba86cb38c6817ad9bc66215ab8584d1d304030ce4f0887336f28a5e27"},
+    {file = "pydantic_core-2.33.2.tar.gz", hash = "sha256:7cb8bc3605c29176e1b105350d2e6474142d7c1bd1d9327c4a9bdb46bf827acc"},
+]
+
+[package.dependencies]
+typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
+
 [[package]]
 name = "pygments"
 version = "2.19.1"
@@ -1622,6 +1768,21 @@ files = [
     {file = "typing_extensions-4.14.0.tar.gz", hash = "sha256:8676b788e32f02ab42d9e7c61324048ae4c6d844a399eebace3d4979d75ceef4"},
 ]
 
+[[package]]
+name = "typing-inspection"
+version = "0.4.2"
+description = "Runtime typing introspection tools"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7"},
+    {file = "typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464"},
+]
+
+[package.dependencies]
+typing-extensions = ">=4.12.0"
+
 [[package]]
 name = "urllib3"
 version = "2.5.0"
@@ -1884,4 +2045,4 @@ propcache = ">=0.2.1"
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.10"
-content-hash = "dac71b7eddd06503d90c5616f9c7923453903687a2376caae5f8cfdceca50271"
+content-hash = "fd1a2369a47e4708c57ed5fc35afeb0e18dc02724d681e39c558861d82d42718"

From 4c9ed4d9e16edc355e9aa39e59535934e8d26026 Mon Sep 17 00:00:00 2001
From: Thalison Fernandes <thalissfernandes99@gmail.com>
Date: Sun, 22 Mar 2026 17:14:30 -0300
Subject: [PATCH 12/21] fix(extractor): resolve mypy type errors

---
 pydoll/browser/tab.py      | 14 +++++--------
 pydoll/extractor/engine.py | 43 +++++++++++++++++++++++++-------------
 pydoll/extractor/field.py  | 17 ++++++++-------
 pydoll/extractor/model.py  |  9 +++++---
 4 files changed, 50 insertions(+), 33 deletions(-)

diff --git a/pydoll/browser/tab.py b/pydoll/browser/tab.py
index 76e1bc97..50b0d861 100644
--- a/pydoll/browser/tab.py
+++ b/pydoll/browser/tab.py
@@ -61,6 +61,7 @@
     WaitElementTimeout,
     WebSocketConnectionClosed,
 )
+from pydoll.extractor.engine import ExtractionEngine
 from pydoll.interactions import KeyboardAPI, MouseAPI, ScrollAPI
 from pydoll.interactions.iframe import IFrameContext
 from pydoll.protocol.browser.types import DownloadBehavior, DownloadProgressState
@@ -87,10 +88,9 @@
     rewrite_html_urls,
 )
 
-from pydoll.extractor.engine import ExtractionEngine
-
 if TYPE_CHECKING:
     from pydoll.browser.chromium.base import Browser
+    from pydoll.extractor.model import ExtractionModel
     from pydoll.protocol.base import EmptyResponse, Response
     from pydoll.protocol.browser.events import (
         DownloadProgressEvent,
@@ -127,7 +127,7 @@
 
 IFrame: TypeAlias = 'Tab'
 
-T = TypeVar('T')
+T = TypeVar('T', bound='ExtractionModel')
 
 _CLOUDFLARE_CHALLENGE_DOMAIN = 'challenges.cloudflare.com'
 _CLOUDFLARE_IFRAME_SELECTOR = f'iframe[src*="{_CLOUDFLARE_CHALLENGE_DOMAIN}"]'
@@ -289,9 +289,7 @@ async def extract(
             FieldExtractionFailed: If a required field cannot be extracted.
             InvalidExtractionModel: If model definition is invalid.
         """
-        return await self._extractor.extract(
-            model, scope=scope, timeout=timeout
-        )
+        return await self._extractor.extract(model, scope=scope, timeout=timeout)
 
     async def extract_all(
         self,
@@ -315,9 +313,7 @@ async def extract_all(
         Returns:
             List of populated model instances.
         """
-        return await self._extractor.extract_all(
-            model, scope=scope, timeout=timeout, limit=limit
-        )
+        return await self._extractor.extract_all(model, scope=scope, timeout=timeout, limit=limit)
 
     @property
     def intercept_file_chooser_dialog_enabled(self) -> bool:
diff --git a/pydoll/extractor/engine.py b/pydoll/extractor/engine.py
index b1f7a6b0..78719e25 100644
--- a/pydoll/extractor/engine.py
+++ b/pydoll/extractor/engine.py
@@ -78,10 +78,12 @@ async def extract_all(
         Returns:
             List of populated model instances.
         """
-        containers = await self._tab.query(scope, find_all=True, timeout=timeout, raise_exc=False)
-        if not containers:
+        found = await self._tab.query(scope, find_all=True, timeout=timeout, raise_exc=False)
+        if found is None or not found:
             return []
 
+        containers: list[WebElement] = found if isinstance(found, list) else [found]
+
         if limit is not None:
             containers = containers[:limit]
 
@@ -97,7 +99,7 @@ async def _extract_fields(
         model: type[T],
         context: FindElementsMixin,
         timeout: int,
-    ) -> dict[str, str | list[str] | object]:
+    ) -> dict[str, Union[str, int, float, bool, list[str], object]]:
         """Extract all fields from the DOM within the given context.
 
         Args:
@@ -108,7 +110,7 @@ async def _extract_fields(
         Returns:
             Dictionary of field name -> extracted value.
         """
-        values: dict[str, str | list[str] | object] = {}
+        values: dict[str, Union[str, int, float, bool, list[str], object]] = {}
 
         for name, metadata in model.get_extraction_fields().items():
             if not metadata.has_selector:
@@ -117,6 +119,8 @@ async def _extract_fields(
 
             field_info = model.model_fields[name]
             annotation = field_info.annotation
+            if annotation is None:
+                continue
 
             try:
                 value = await self._extract_field(metadata, annotation, context, timeout)
@@ -137,7 +141,7 @@ async def _extract_field(
         annotation: type,
         context: FindElementsMixin,
         timeout: int,
-    ) -> str | list[str] | object:
+    ) -> Union[str, int, float, bool, list[str], object]:
         """Extract a single field value from the DOM.
 
         Handles scalar types, list types, nested ExtractionModel,
@@ -170,12 +174,15 @@ async def _extract_list_field(
         timeout: int,
     ) -> list[Union[str, int, float, bool, object]]:
         """Extract a list of values from multiple matching elements."""
-        elements = await context.query(
-            metadata.selector, find_all=True, timeout=timeout, raise_exc=False
-        )
-        if not elements:
+        selector = metadata.selector
+        if selector is None:
+            return []
+
+        found = await context.query(selector, find_all=True, timeout=timeout, raise_exc=False)
+        if found is None or not found:
             return []
 
+        elements: list[WebElement] = found if isinstance(found, list) else [found]
         inner_type = _get_inner_type(annotation)
 
         if _is_extraction_model(inner_type):
@@ -199,7 +206,11 @@ async def _extract_nested_model(
         timeout: int,
     ) -> T:
         """Extract a nested ExtractionModel by scoping to the selector element."""
-        scope_element = await context.query(metadata.selector, timeout=timeout, raise_exc=True)
+        selector = metadata.selector
+        if selector is None:
+            raise FieldExtractionFailed('Nested model field has no selector')
+
+        scope_element = await context.query(selector, timeout=timeout, raise_exc=True)
         values = await self._extract_fields(model, scope_element, timeout)
         return _build_instance(model, values)
 
@@ -208,9 +219,13 @@ async def _extract_scalar_field(
     metadata: ExtractionMetadata,
     context: FindElementsMixin,
     timeout: int,
-) -> Union[str, int, float, bool]:
+) -> Union[str, int, float, bool, object]:
     """Extract a single scalar value from the DOM."""
-    element = await context.query(metadata.selector, timeout=timeout, raise_exc=True)
+    selector = metadata.selector
+    if selector is None:
+        raise FieldExtractionFailed('Scalar field has no selector')
+
+    element = await context.query(selector, timeout=timeout, raise_exc=True)
     raw = await _extract_value(element, metadata)
     return _apply_transform(raw, metadata)
 
@@ -239,7 +254,7 @@ async def _extract_value(
 def _apply_transform(
     raw: str,
     metadata: ExtractionMetadata,
-) -> Union[str, int, float, bool]:
+) -> Union[str, int, float, bool, object]:
     """Apply metadata.transform to the raw extracted string.
 
     Args:
@@ -256,7 +271,7 @@ def _apply_transform(
 
 def _build_instance(
     model: type[T],
-    values: dict[str, str | list[str] | object],
+    values: dict[str, Union[str, int, float, bool, list[str], object]],
 ) -> T:
     """Build model instance from extracted values.
 
diff --git a/pydoll/extractor/field.py b/pydoll/extractor/field.py
index 9dfd28df..b8dfd953 100644
--- a/pydoll/extractor/field.py
+++ b/pydoll/extractor/field.py
@@ -4,7 +4,7 @@
 
 import itertools
 from dataclasses import dataclass
-from typing import Callable, Optional, Union
+from typing import Callable, Optional, Union, cast
 
 from pydantic import Field as PydanticField
 from pydantic.fields import FieldInfo
@@ -14,7 +14,7 @@
 
 # Module-level registry: stores ExtractionMetadata keyed by a unique int.
 # Field() registers metadata and stores the key in pydantic's json_schema_extra.
-# ExtractionModel.__init_subclass__ reads the key to retrieve metadata.
+# ExtractionModel.get_extraction_fields() reads the key to retrieve metadata.
 _FIELD_METADATA_REGISTRY: dict[int, ExtractionMetadata] = {}
 _field_id_counter = itertools.count(1)
 
@@ -24,7 +24,7 @@ class ExtractionMetadata:
     """Immutable extraction metadata attached to a pydantic field.
 
     Stored in a module-level registry by Field() and retrieved by
-    ExtractionModel.__init_subclass__ via the registry key stored
+    ExtractionModel.get_extraction_fields() via the registry key stored
     in the field's json_schema_extra.
     """
 
@@ -95,10 +95,13 @@ def Field(
 
     key = _register_metadata(metadata)
 
-    return PydanticField(
-        default=default,
-        description=description,
-        json_schema_extra={'_extraction_key': key},
+    return cast(
+        FieldInfo,
+        PydanticField(
+            default=default,
+            description=description,
+            json_schema_extra={'_extraction_key': key},
+        ),
     )
 
 
diff --git a/pydoll/extractor/model.py b/pydoll/extractor/model.py
index 9c7c1769..fa6cad88 100644
--- a/pydoll/extractor/model.py
+++ b/pydoll/extractor/model.py
@@ -48,9 +48,12 @@ def get_extraction_fields(cls) -> dict[str, ExtractionMetadata]:
         """
         # Check own __dict__ to avoid inheriting parent's cache via MRO
         own_cache = cls.__dict__.get('_extraction_fields_cache')
-        if own_cache is None:
-            cls._extraction_fields_cache = _collect_extraction_metadata(cls)
-        return cls._extraction_fields_cache
+        if own_cache is not None:
+            return own_cache
+
+        result = _collect_extraction_metadata(cls)
+        cls._extraction_fields_cache = result
+        return result
 
 
 def _collect_extraction_metadata(

From 510b4610680258f3464c9cab081348a08cdf62c4 Mon Sep 17 00:00:00 2001
From: Thalison Fernandes <thalissfernandes99@gmail.com>
Date: Sun, 22 Mar 2026 17:14:35 -0300
Subject: [PATCH 13/21] style: apply ruff formatting

---
 pydoll/commands/accessibility_commands.py | 4 +---
 pydoll/interactions/iframe.py             | 4 ++--
 pydoll/protocol/accessibility/methods.py  | 4 +---
 3 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/pydoll/commands/accessibility_commands.py b/pydoll/commands/accessibility_commands.py
index 6dfef34b..29780f33 100644
--- a/pydoll/commands/accessibility_commands.py
+++ b/pydoll/commands/accessibility_commands.py
@@ -161,9 +161,7 @@ def get_ax_node_and_ancestors(
             params['backendNodeId'] = backend_node_id
         if object_id is not None:
             params['objectId'] = object_id
-        return Command(
-            method=AccessibilityMethod.GET_AX_NODE_AND_ANCESTORS, params=params
-        )
+        return Command(method=AccessibilityMethod.GET_AX_NODE_AND_ANCESTORS, params=params)
 
     @staticmethod
     def get_child_ax_nodes(
diff --git a/pydoll/interactions/iframe.py b/pydoll/interactions/iframe.py
index f4ee21b2..3215d659 100644
--- a/pydoll/interactions/iframe.py
+++ b/pydoll/interactions/iframe.py
@@ -51,8 +51,8 @@ async def resolve(self) -> IFrameContext:
         """
         base_handler, base_session_id = self._get_base_session()
         node_info = await self._describe_element_node(base_handler, base_session_id)
-        frame_id, document_url, content_frame_id, backend_node_id = (
-            self._extract_frame_metadata(node_info)
+        frame_id, document_url, content_frame_id, backend_node_id = self._extract_frame_metadata(
+            node_info
         )
 
         if not frame_id and backend_node_id is not None:
diff --git a/pydoll/protocol/accessibility/methods.py b/pydoll/protocol/accessibility/methods.py
index 514e758c..71a849a3 100644
--- a/pydoll/protocol/accessibility/methods.py
+++ b/pydoll/protocol/accessibility/methods.py
@@ -117,8 +117,6 @@ class QueryAXTreeResult(TypedDict):
 GetPartialAXTreeCommand = Command[GetPartialAXTreeParams, GetPartialAXTreeResponse]
 GetFullAXTreeCommand = Command[GetFullAXTreeParams, GetFullAXTreeResponse]
 GetRootAXNodeCommand = Command[GetRootAXNodeParams, GetRootAXNodeResponse]
-GetAXNodeAndAncestorsCommand = Command[
-    GetAXNodeAndAncestorsParams, GetAXNodeAndAncestorsResponse
-]
+GetAXNodeAndAncestorsCommand = Command[GetAXNodeAndAncestorsParams, GetAXNodeAndAncestorsResponse]
 GetChildAXNodesCommand = Command[GetChildAXNodesParams, GetChildAXNodesResponse]
 QueryAXTreeCommand = Command[QueryAXTreeParams, QueryAXTreeResponse]

From 533603c5a19c90286729b5f1baf41d5a362b123b Mon Sep 17 00:00:00 2001
From: Thalison Fernandes <thalissfernandes99@gmail.com>
Date: Sun, 22 Mar 2026 17:54:30 -0300
Subject: [PATCH 14/21] refactor(extractor): use asyncio.gather for concurrent
 field extraction

---
 pydoll/extractor/engine.py | 63 +++++++++++++++++++++++++-------------
 1 file changed, 41 insertions(+), 22 deletions(-)

diff --git a/pydoll/extractor/engine.py b/pydoll/extractor/engine.py
index 78719e25..cd71394a 100644
--- a/pydoll/extractor/engine.py
+++ b/pydoll/extractor/engine.py
@@ -2,18 +2,19 @@
 
 from __future__ import annotations
 
+import asyncio
 import logging
 import types
 from typing import TYPE_CHECKING, Optional, TypeVar, Union, get_args, get_origin
 
+from pydoll.elements.mixins.find_elements_mixin import FindElementsMixin
+from pydoll.elements.web_element import WebElement
 from pydoll.extractor.exceptions import FieldExtractionFailed
 from pydoll.extractor.field import ExtractionMetadata
 from pydoll.extractor.model import ExtractionModel
 
 if TYPE_CHECKING:
     from pydoll.browser.tab import Tab
-    from pydoll.elements.mixins.find_elements_mixin import FindElementsMixin
-    from pydoll.elements.web_element import WebElement
 
 logger = logging.getLogger(__name__)
 
@@ -52,7 +53,12 @@ async def extract(
         """
         context: FindElementsMixin = self._tab
         if scope is not None:
-            context = await self._tab.query(scope, timeout=timeout)
+            result = await self._tab.query(scope, timeout=timeout)
+            if not isinstance(result, WebElement):
+                raise ValueError(
+                    f'Expected a single element for scope "{scope}", got {type(result)}'
+                )
+            context = result
 
         values = await self._extract_fields(model, context, timeout)
         return _build_instance(model, values)
@@ -87,12 +93,11 @@ async def extract_all(
         if limit is not None:
             containers = containers[:limit]
 
-        results: list[T] = []
-        for container in containers:
-            values = await self._extract_fields(model, container, timeout)
-            results.append(_build_instance(model, values))
-
-        return results
+        extraction_tasks = [
+            self._extract_fields(model, container, timeout) for container in containers
+        ]
+        all_values = await asyncio.gather(*extraction_tasks)
+        return [_build_instance(model, values) for values in all_values]
 
     async def _extract_fields(
         self,
@@ -100,7 +105,10 @@ async def _extract_fields(
         context: FindElementsMixin,
         timeout: int,
     ) -> dict[str, Union[str, int, float, bool, list[str], object]]:
-        """Extract all fields from the DOM within the given context.
+        """Extract all fields from the DOM concurrently.
+
+        Launches all field extractions in parallel using asyncio.gather,
+        then collects results and handles errors per field.
 
         Args:
             model: ExtractionModel subclass with extraction fields.
@@ -110,7 +118,8 @@ async def _extract_fields(
         Returns:
             Dictionary of field name -> extracted value.
         """
-        values: dict[str, Union[str, int, float, bool, list[str], object]] = {}
+        field_names: list[str] = []
+        tasks: list[asyncio.Task[Union[str, int, float, bool, list[str], object]]] = []
 
         for name, metadata in model.get_extraction_fields().items():
             if not metadata.has_selector:
@@ -122,16 +131,22 @@ async def _extract_fields(
             if annotation is None:
                 continue
 
-            try:
-                value = await self._extract_field(metadata, annotation, context, timeout)
-                values[name] = value
-            except Exception as exc:
+            field_names.append(name)
+            tasks.append(self._extract_field(metadata, annotation, context, timeout))
+
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+
+        values: dict[str, Union[str, int, float, bool, list[str], object]] = {}
+        for name, result in zip(field_names, results):
+            if isinstance(result, BaseException):
+                field_info = model.model_fields[name]
                 if not field_info.is_required():
-                    logger.debug(f'Optional field "{name}" extraction failed: {exc}')
+                    logger.debug(f'Optional field "{name}" extraction failed: {result}')
                     continue
                 raise FieldExtractionFailed(
-                    f'Required field "{name}" could not be extracted: {exc}'
-                ) from exc
+                    f'Required field "{name}" could not be extracted: {result}'
+                ) from result
+            values[name] = result
 
         return values
 
@@ -210,8 +225,10 @@ async def _extract_nested_model(
         if selector is None:
             raise FieldExtractionFailed('Nested model field has no selector')
 
-        scope_element = await context.query(selector, timeout=timeout, raise_exc=True)
-        values = await self._extract_fields(model, scope_element, timeout)
+        result = await context.query(selector, timeout=timeout, raise_exc=True)
+        if not isinstance(result, WebElement):
+            raise ValueError(f'Expected a single element for "{selector}", got {type(result)}')
+        values = await self._extract_fields(model, result, timeout)
         return _build_instance(model, values)
 
 
@@ -225,8 +242,10 @@ async def _extract_scalar_field(
     if selector is None:
         raise FieldExtractionFailed('Scalar field has no selector')
 
-    element = await context.query(selector, timeout=timeout, raise_exc=True)
-    raw = await _extract_value(element, metadata)
+    result = await context.query(selector, timeout=timeout, raise_exc=True)
+    if not isinstance(result, WebElement):
+        raise ValueError(f'Expected a single element for "{selector}", got {type(result)}')
+    raw = await _extract_value(result, metadata)
     return _apply_transform(raw, metadata)
 
 

From dffc2ac283217100b18e2ca32d010093f2849d13 Mon Sep 17 00:00:00 2001
From: Thalison Fernandes <thalissfernandes99@gmail.com>
Date: Sun, 22 Mar 2026 17:54:35 -0300
Subject: [PATCH 15/21] test(extractor): add concurrent extraction tests

---
 .../test_extractor_integration.py             | 115 ++++++++++++++++++
 1 file changed, 115 insertions(+)

diff --git a/tests/test_extractor/test_extractor_integration.py b/tests/test_extractor/test_extractor_integration.py
index c2fc2505..c62c33d7 100644
--- a/tests/test_extractor/test_extractor_integration.py
+++ b/tests/test_extractor/test_extractor_integration.py
@@ -525,3 +525,118 @@ async def test_list_of_nested_models(self, ci_chrome_options):
             assert article.contributors[1].role == 'Data Analyst'
             assert article.contributors[2].name == 'Carol Williams'
             assert article.contributors[2].role == 'Reviewer'
+
+
+class TestConcurrentExtraction:
+    """Tests that validate concurrent field and container extraction."""
+
+    @pytest.mark.asyncio
+    async def test_many_fields_extracted_concurrently(self, ci_chrome_options):
+        """Model with many fields should extract them all concurrently."""
+
+        class FullArticle(ExtractionModel):
+            title: str = Field(selector='h1.article-title', description='Title')
+            body: str = Field(selector='.article-body', description='Body')
+            author_name: str = Field(selector='.author-card .name', description='Author')
+            author_bio: str = Field(selector='.author-card .bio', description='Bio')
+            avatar: str = Field(
+                selector='.author-card img.avatar',
+                attribute='src',
+                description='Avatar',
+            )
+            published: str = Field(
+                selector='time.published',
+                attribute='datetime',
+                description='Date',
+            )
+            image_src: str = Field(
+                selector='.hero-image',
+                attribute='src',
+                description='Image',
+            )
+            image_alt: str = Field(
+                selector='.hero-image',
+                attribute='alt',
+                description='Alt',
+            )
+            price: str = Field(selector='.price', description='Price')
+            link: str = Field(
+                selector='.source-link',
+                attribute='href',
+                description='Link',
+            )
+            tags: list[str] = Field(selector='.tag-list .tag', description='Tags')
+
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            article = await tab.extract(FullArticle)
+            assert article.title == 'Understanding Web Scraping'
+            assert article.author_name == 'Jane Doe'
+            assert article.published == '2025-03-15'
+            assert article.image_src == 'https://example.com/hero.jpg'
+            assert article.image_alt == 'Hero image'
+            assert 'R$' in article.price
+            assert len(article.tags) == 3
+
+    @pytest.mark.asyncio
+    async def test_extract_all_containers_concurrently(self, ci_chrome_options):
+        """extract_all should process all containers concurrently."""
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            quotes = await tab.extract_all(QuoteWithYear, scope='.quote')
+            assert len(quotes) == 3
+            # All quotes should have been extracted correctly
+            assert quotes[0].year == 2005
+            assert quotes[1].year == 2001
+            assert quotes[2].year == 2005
+            assert quotes[0].author == 'Steve Jobs'
+            assert quotes[1].author == 'Steve Jobs'
+
+    @pytest.mark.asyncio
+    async def test_concurrent_nested_with_multiple_containers(self, ci_chrome_options):
+        """extract_all with nested models should handle concurrency correctly."""
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            products = await tab.extract_all(ProductModel, scope='.product-card')
+            assert len(products) == 2
+            # Both products extracted concurrently with nested meta
+            assert products[0].name == 'Laptop Pro'
+            assert products[0].meta.brand == 'TechCorp'
+            assert products[1].name == 'Mouse Wireless'
+            assert products[1].meta.brand == 'PeripheralCo'
+
+    @pytest.mark.asyncio
+    async def test_concurrent_with_mixed_required_optional(self, ci_chrome_options):
+        """Concurrent extraction with mix of required and optional fields."""
+
+        class MixedModel(ExtractionModel):
+            title: str = Field(selector='h1.article-title', description='Title')
+            missing_1: Optional[str] = Field(
+                selector='.nonexistent-1', description='Missing 1', default=None
+            )
+            body: str = Field(selector='.article-body', description='Body')
+            missing_2: Optional[str] = Field(
+                selector='.nonexistent-2', description='Missing 2', default=None
+            )
+            tags: list[str] = Field(selector='.tag-list .tag', description='Tags')
+
+        async with Chrome(options=ci_chrome_options) as browser:
+            tab = await browser.start()
+            await tab.go_to(FILE_URL)
+            await asyncio.sleep(0.5)
+
+            result = await tab.extract(MixedModel)
+            assert result.title == 'Understanding Web Scraping'
+            assert result.missing_1 is None
+            assert 'extracting data' in result.body
+            assert result.missing_2 is None
+            assert len(result.tags) == 3

From 67a742176743080b6fd54ec814e06e85898e3645 Mon Sep 17 00:00:00 2001
From: Thalison Fernandes <thalissfernandes99@gmail.com>
Date: Sun, 22 Mar 2026 17:54:42 -0300
Subject: [PATCH 16/21] docs: restructure README with extraction showcase and
 updated positioning

---
 README.md | 200 ++++++++++++++++++++++++++++++++----------------------
 1 file changed, 120 insertions(+), 80 deletions(-)

diff --git a/README.md b/README.md
index f29538b9..5ea3142e 100644
--- a/README.md
+++ b/README.md
@@ -22,9 +22,9 @@
     <a href="#support">Support</a>
 </p>
 
-Pydoll automates Chromium-based browsers (Chrome, Edge) by connecting directly to the Chrome DevTools Protocol over WebSocket. No WebDriver binary, no `navigator.webdriver` flag, no compatibility issues.
+Pydoll automates Chromium-based browsers (Chrome, Edge) by connecting directly to the Chrome DevTools Protocol over WebSocket. **No WebDriver binary, no `navigator.webdriver` flag, no compatibility issues.**
 
-It combines a high-level API for common tasks with low-level CDP access for fine-grained control over network, fingerprinting, and browser behavior. The entire codebase is async-native and fully type-checked with mypy.
+It combines a high-level API for stealthy automation with low-level CDP access for fine-grained control over network, fingerprinting, and browser behavior. And with its new **Pydantic-powered extraction engine**, it maps the DOM directly to structured Python objects, delivering an unmatched Developer Experience (DX).
 
 ### Top Sponsors
 
@@ -48,11 +48,11 @@ It combines a high-level API for common tasks with low-level CDP access for fine
 
 ### Why Pydoll
 
-- **Stealth-first**: Human-like mouse movement, realistic typing, and granular [browser preference](https://pydoll.tech/docs/features/configuration/browser-preferences/) control for fingerprint management.
+- **Structured extraction**: Define a [Pydantic](https://docs.pydantic.dev/) model, call `tab.extract()`, get typed and validated data back. No manual element-by-element querying.
 - **Async and typed**: Built on `asyncio` from the ground up, 100% type-checked with `mypy`. Full IDE autocompletion and static error checking.
+- **Stealth built in**: Human-like mouse movement, realistic typing, and granular [browser preference](https://pydoll.tech/docs/features/configuration/browser-preferences/) control for fingerprint management.
 - **Network control**: [Intercept](https://pydoll.tech/docs/features/network/interception/) requests to block ads/trackers, [monitor](https://pydoll.tech/docs/features/network/monitoring/) traffic for API discovery, and make [authenticated HTTP requests](https://pydoll.tech/docs/features/network/http-requests/) that inherit the browser session.
 - **Shadow DOM and iframes**: Full support for [shadow roots](https://pydoll.tech/docs/deep-dive/architecture/shadow-dom/) (including closed) and cross-origin iframes. Discover, query, and interact with elements inside them using the same API.
-- **Ergonomic API**: `tab.find()` for most cases, `tab.query()` for complex [CSS/XPath selectors](https://pydoll.tech/docs/deep-dive/guides/selectors-guide/).
 
 ## Installation
 
@@ -62,55 +62,124 @@ pip install pydoll-python
 
 No WebDriver binaries or external dependencies required.
 
-## What's New
+## Getting Started
 
-<details>
-<summary><b>HAR Network Recording</b></summary>
-<br>
+### 1. Stateful Automation & Evasion
 
-Record network activity during a browser session and export as HAR 1.2. Replay recorded requests to reproduce exact API sequences.
+When you need to navigate, bypass challenges, or interact with dynamic UI, Pydoll's imperative API handles it with humanized timing by default.
 
 ```python
-from pydoll.browser.chromium import Chrome
+import asyncio
+from pydoll.browser import Chrome
+from pydoll.constants import Key
 
-async with Chrome() as browser:
-    tab = await browser.start()
+async def google_search(query: str):
+    async with Chrome() as browser:
+        tab = await browser.start()
+        await tab.go_to('https://www.google.com')
 
-    async with tab.request.record() as capture:
-        await tab.go_to('https://example.com')
+        # Find elements and interact with human-like timing
+        search_box = await tab.find(tag_name='textarea', name='q')
+        await search_box.insert_text(query)
+        await tab.keyboard.press(Key.ENTER)
 
-    capture.save('flow.har')
-    print(f'Captured {len(capture.entries)} requests')
+        first_result = await tab.find(
+            tag_name='h3',
+            text='autoscrape-labs/pydoll',
+            timeout=10,
+        )
+        await first_result.click()
+        print(f"Page loaded: {await tab.title}")
 
-    responses = await tab.request.replay('flow.har')
+asyncio.run(google_search('pydoll site:github.com'))
 ```
 
-Filter by resource type:
+### 2. Structured Data Extraction
+
+Once you reach the target page, switch to the declarative engine. Define what you want with a model, and Pydoll extracts it — typed, validated, and ready to use.
 
 ```python
-from pydoll.protocol.network.types import ResourceType
+from pydoll.browser.chromium import Chrome
+from pydoll.extractor import ExtractionModel, Field
+
+class Quote(ExtractionModel):
+    text: str = Field(selector='.text', description='The quote text')
+    author: str = Field(selector='.author', description='Who said it')
+    tags: list[str] = Field(selector='.tag', description='Tags')
+    year: int | None = Field(selector='.year', description='Year', default=None)
 
-async with tab.request.record(
-    resource_types=[ResourceType.FETCH, ResourceType.XHR]
-) as capture:
-    await tab.go_to('https://example.com')
+async def extract_quotes():
+    async with Chrome() as browser:
+        tab = await browser.start()
+        await tab.go_to('https://quotes.toscrape.com')
+
+        quotes = await tab.extract_all(Quote, scope='.quote', timeout=5)
+
+        for q in quotes:
+            print(f'{q.author}: {q.text}')  # fully typed, IDE autocomplete works
+            print(q.tags)                    # list[str], not a raw element
+            print(q.model_dump_json())       # pydantic serialization built-in
+
+asyncio.run(extract_quotes())
 ```
 
-[HAR Recording Docs](https://pydoll.tech/docs/features/network/network-recording/)
+Models support CSS/XPath auto-detection, HTML attribute targeting, custom transforms, and nested models.
+
+<details>
+<summary><b>Nested models, transforms, and attribute extraction</b></summary>
+<br>
+
+```python
+from datetime import datetime
+from pydoll.extractor import ExtractionModel, Field
+
+def parse_date(raw: str) -> datetime:
+    return datetime.strptime(raw.strip(), '%B %d, %Y')
+
+class Author(ExtractionModel):
+    name: str = Field(selector='.author-title')
+    born: datetime = Field(
+        selector='.author-born-date',
+        transform=parse_date,
+    )
+
+class Article(ExtractionModel):
+    title: str = Field(selector='h1')
+    url: str = Field(selector='.source-link', attribute='href')
+    author: Author = Field(selector='.author-card', description='Nested model')
+
+article = await tab.extract(Article, timeout=5)
+article.author.born.year  # int — types are preserved all the way down
+```
 </details>
 
+## Features
+
 <details>
-<summary><b>Page Bundles</b></summary>
+<summary><b>Humanized Mouse Movement</b></summary>
 <br>
 
-Save the current page and all its assets (CSS, JS, images, fonts) as a `.zip` bundle for offline viewing. Optionally inline everything into a single HTML file.
+Mouse operations produce human-like cursor movement by default:
+
+- **Bezier curve paths** with asymmetric control points
+- **Fitts's Law timing**: duration scales with distance
+- **Minimum-jerk velocity**: bell-shaped speed profile
+- **Physiological tremor**: Gaussian noise scaled with velocity
+- **Overshoot correction**: ~70% chance on fast movements, then corrects back
 
 ```python
-await tab.save_bundle('page.zip')
-await tab.save_bundle('page-inline.zip', inline_assets=True)
+await tab.mouse.move(500, 300)
+await tab.mouse.click(500, 300)
+await tab.mouse.drag(100, 200, 500, 400)
+
+button = await tab.find(id='submit')
+await button.click()
+
+# Opt out when speed matters
+await tab.mouse.click(500, 300, humanize=False)
 ```
 
-[Screenshots, PDFs & Bundles Docs](https://pydoll.tech/docs/features/automation/screenshots-and-pdfs/)
+[Mouse Control Docs](https://pydoll.tech/docs/features/automation/mouse-control/)
 </details>
 
 <details>
@@ -139,75 +208,46 @@ Highlights:
 - `deep=True` traverses cross-origin iframes (OOPIFs)
 - Standard `find()`, `query()`, `click()` API inside shadow roots
 
-```python
-# Cloudflare Turnstile inside a cross-origin iframe
-shadow_roots = await tab.find_shadow_roots(deep=True, timeout=10)
-for sr in shadow_roots:
-    checkbox = await sr.query('input[type="checkbox"]', raise_exc=False)
-    if checkbox:
-        await checkbox.click()
-```
-
 [Shadow DOM Docs](https://pydoll.tech/docs/deep-dive/architecture/shadow-dom/)
 </details>
 
 <details>
-<summary><b>Humanized Mouse Movement</b></summary>
+<summary><b>HAR Network Recording</b></summary>
 <br>
 
-Mouse operations produce human-like cursor movement by default:
-
-- **Bezier curve paths** with asymmetric control points
-- **Fitts's Law timing**: duration scales with distance
-- **Minimum-jerk velocity**: bell-shaped speed profile
-- **Physiological tremor**: Gaussian noise scaled with velocity
-- **Overshoot correction**: ~70% chance on fast movements, then corrects back
+Record network activity during a browser session and export as HAR 1.2. Replay recorded requests to reproduce exact API sequences.
 
 ```python
-await tab.mouse.move(500, 300)
-await tab.mouse.click(500, 300)
-await tab.mouse.drag(100, 200, 500, 400)
-
-button = await tab.find(id='submit')
-await button.click()
-
-# Opt out when speed matters
-await tab.mouse.click(500, 300, humanize=False)
-```
+from pydoll.browser.chromium import Chrome
 
-[Mouse Control Docs](https://pydoll.tech/docs/features/automation/mouse-control/)
-</details>
+async with Chrome() as browser:
+    tab = await browser.start()
 
-## Getting Started
+    async with tab.request.record() as capture:
+        await tab.go_to('https://example.com')
 
-```python
-import asyncio
-from pydoll.browser import Chrome
-from pydoll.constants import Key
+    capture.save('flow.har')
+    print(f'Captured {len(capture.entries)} requests')
 
-async def google_search(query: str):
-    async with Chrome() as browser:
-        tab = await browser.start()
-        await tab.go_to('https://www.google.com')
+    responses = await tab.request.replay('flow.har')
+```
 
-        search_box = await tab.find(tag_name='textarea', name='q')
-        await search_box.insert_text(query)
-        await tab.keyboard.press(Key.ENTER)
+[HAR Recording Docs](https://pydoll.tech/docs/features/network/network-recording/)
+</details>
 
-        first_result = await tab.find(
-            tag_name='h3',
-            text='autoscrape-labs/pydoll',
-            timeout=10,
-        )
-        await first_result.click()
+<details>
+<summary><b>Page Bundles</b></summary>
+<br>
 
-        await tab.find(id='repository-container-header', timeout=10)
-        print(f"Page loaded: {await tab.title}")
+Save the current page and all its assets (CSS, JS, images, fonts) as a `.zip` bundle for offline viewing. Optionally inline everything into a single HTML file.
 
-asyncio.run(google_search('pydoll site:github.com'))
+```python
+await tab.save_bundle('page.zip')
+await tab.save_bundle('page-inline.zip', inline_assets=True)
 ```
 
-## Features
+[Screenshots, PDFs & Bundles Docs](https://pydoll.tech/docs/features/automation/screenshots-and-pdfs/)
+</details>
 
 <details>
 <summary><b>Hybrid Automation (UI + API)</b></summary>

From af7cc3cf3d80c8bb2eb3852a5bfd9867b174e33b Mon Sep 17 00:00:00 2001
From: Thalison Fernandes <thalissfernandes99@gmail.com>
Date: Sun, 22 Mar 2026 17:54:47 -0300
Subject: [PATCH 17/21] docs: update landing pages with extractor examples in
 all languages

---
 docs/en/index.md | 177 ++++++++++++++++++++++++++--------------------
 docs/pt/index.md | 164 ++++++++++++++++++++++++------------------
 docs/zh/index.md | 180 +++++++++++++++++++++++++++--------------------
 3 files changed, 301 insertions(+), 220 deletions(-)

diff --git a/docs/en/index.md b/docs/en/index.md
index dd8e94f4..18c20454 100644
--- a/docs/en/index.md
+++ b/docs/en/index.md
@@ -50,6 +50,7 @@ $ pip install git+https://github.com/autoscrape-labs/pydoll.git
 - **Powerful Network Monitoring**: Intercept, modify, and analyze all network traffic with ease, giving you complete control over requests.
 - **Event-Driven Architecture**: React to page events, network requests, and user interactions in real-time.
 - **Intuitive Element Finding**: Modern `find()` and `query()` methods that make sense and work as you'd expect.
+- **Structured Extraction**: Define a [Pydantic](https://docs.pydantic.dev/) model, call `tab.extract()`, get typed and validated data back. No manual element-by-element querying.
 - **Robust Type Safety**: Comprehensive type system for better IDE support and error prevention.
 
 
@@ -57,9 +58,11 @@ Ready to dive in? The following pages will guide you through installation, basic
 
 Let's start automating the web, the right way! 🚀
 
-## Quick Start Guide: A simple example
+## Quick Start Guide
 
-Let's start with a practical example. The following script will open the Pydoll GitHub repository and star it:
+### 1. Stateful Automation & Evasion
+
+When you need to navigate, bypass challenges, or interact with dynamic UI, Pydoll's imperative API handles it with humanized timing by default.
 
 ```python
 import asyncio
@@ -69,7 +72,8 @@ async def main():
     async with Chrome() as browser:
         tab = await browser.start()
         await tab.go_to('https://github.com/autoscrape-labs/pydoll')
-        
+
+        # Find elements and interact with human-like timing
         star_button = await tab.find(
             tag_name='button',
             timeout=5,
@@ -85,101 +89,123 @@ async def main():
 asyncio.run(main())
 ```
 
-This example demonstrates how to navigate to a website, wait for an element to appear, and interact with it. You can adapt this pattern to automate many different web tasks.
+### 2. Structured Data Extraction
 
-??? note "Or use without context manager..."
-    If you prefer not to use the context manager pattern, you can manually manage the browser instance:
-    
-    ```python
-    import asyncio
-    from pydoll.browser.chromium import Chrome
-    
-    async def main():
-        browser = Chrome()
+Once you reach the target page, switch to the declarative engine. Define what you want with a model, and Pydoll extracts it — typed, validated, and ready to use.
+
+```python
+import asyncio
+from pydoll.browser.chromium import Chrome
+from pydoll.extractor import ExtractionModel, Field
+
+class Quote(ExtractionModel):
+    text: str = Field(selector='.text', description='The quote text')
+    author: str = Field(selector='.author', description='Who said it')
+    tags: list[str] = Field(selector='.tag', description='Tags')
+    year: int | None = Field(selector='.year', description='Year', default=None)
+
+async def extract_quotes():
+    async with Chrome() as browser:
         tab = await browser.start()
-        await tab.go_to('https://github.com/autoscrape-labs/pydoll')
-        
-        star_button = await tab.find(
-            tag_name='button',
-            timeout=5,
-            raise_exc=False
-        )
-        if not star_button:
-            print("Ops! The button was not found.")
-            return
+        await tab.go_to('https://quotes.toscrape.com')
 
-        await star_button.click()
-        await asyncio.sleep(3)
-        await browser.stop()
-    
-    asyncio.run(main())
-    ```
-    
-    Note that when not using the context manager, you'll need to explicitly call `browser.stop()` to release resources.
+        quotes = await tab.extract_all(Quote, scope='.quote', timeout=5)
+
+        for q in quotes:
+            print(f'{q.author}: {q.text}')  # fully typed, IDE autocomplete works
+            print(q.tags)                    # list[str], not a raw element
+            print(q.model_dump_json())       # pydantic serialization built-in
 
-## Extended Example: Custom Browser Configuration
+asyncio.run(extract_quotes())
+```
+
+Models support CSS/XPath auto-detection, HTML attribute targeting, custom transforms, and nested models.
+
+??? note "Nested models, transforms, and attribute extraction"
+    ```python
+    from datetime import datetime
+    from pydoll.extractor import ExtractionModel, Field
+
+    def parse_date(raw: str) -> datetime:
+        return datetime.strptime(raw.strip(), '%B %d, %Y')
+
+    class Author(ExtractionModel):
+        name: str = Field(selector='.author-title')
+        born: datetime = Field(
+            selector='.author-born-date',
+            transform=parse_date,
+        )
 
-For more advanced usage scenarios, Pydoll allows you to customize your browser configuration using the `ChromiumOptions` class. This is useful when you need to:
+    class Article(ExtractionModel):
+        title: str = Field(selector='h1')
+        url: str = Field(selector='.source-link', attribute='href')
+        author: Author = Field(selector='.author-card', description='Nested model')
 
-- Run in headless mode (no visible browser window)
-- Specify a custom browser executable path
-- Configure proxies, user agents, or other browser settings
-- Set window dimensions or startup arguments
+    article = await tab.extract(Article, timeout=5)
+    article.author.born.year  # int — types are preserved all the way down
+    ```
+
+## Extended Example: Combining Both Approaches
 
-Here's an example showing how to use custom options for Chrome:
+A real-world scraping task typically combines both approaches: imperative automation to navigate and bypass challenges, then declarative extraction to collect structured data.
 
-```python hl_lines="8-12 30-32 34-38"
+```python
 import asyncio
-import os
+from typing import Optional
+
 from pydoll.browser.chromium import Chrome
 from pydoll.browser.options import ChromiumOptions
+from pydoll.extractor import ExtractionModel, Field
+
+
+class GitHubRepo(ExtractionModel):
+    name: str = Field(
+        selector='[itemprop="name"] a',
+        description='Repository name',
+    )
+    description: Optional[str] = Field(
+        selector='[itemprop="description"]',
+        description='Repository description',
+        default=None,
+    )
+    language: Optional[str] = Field(
+        selector='[itemprop="programmingLanguage"]',
+        description='Primary programming language',
+        default=None,
+    )
+
 
 async def main():
     options = ChromiumOptions()
-    options.binary_location = '/usr/bin/google-chrome-stable'
     options.add_argument('--headless=new')
-    options.add_argument('--start-maximized')
-    options.add_argument('--disable-notifications')
-    
+
     async with Chrome(options=options) as browser:
         tab = await browser.start()
-        await tab.go_to('https://github.com/autoscrape-labs/pydoll')
-        
-        star_button = await tab.find(
-            tag_name='button',
-            timeout=5,
-            raise_exc=False
-        )
-        if not star_button:
-            print("Ops! The button was not found.")
-            return
 
-        await star_button.click()
-        await asyncio.sleep(3)
+        # 1. Navigate and interact (imperative)
+        await tab.go_to('https://github.com/autoscrape-labs')
 
-        screenshot_path = os.path.join(os.getcwd(), 'pydoll_repo.png')
-        await tab.take_screenshot(path=screenshot_path)
-        print(f"Screenshot saved to: {screenshot_path}")
-
-        base64_screenshot = await tab.take_screenshot(as_base64=True)
-
-        repo_description_element = await tab.find(
-            class_name='f4.my-3'
+        # 2. Extract structured data (declarative)
+        repos = await tab.extract_all(
+            GitHubRepo,
+            scope='article.Box-row',
+            timeout=10,
         )
-        repo_description = await repo_description_element.text
-        print(f"Repository description: {repo_description}")
+
+        for repo in repos:
+            print(f'{repo.name} ({repo.language}): {repo.description}')
+            print(repo.model_dump_json())
 
 if __name__ == "__main__":
     asyncio.run(main())
 ```
 
-This extended example demonstrates:
+This example demonstrates:
 
-1. Creating and configuring browser options
-2. Setting a custom Chrome binary path
-3. Enabling headless mode for invisible operation
-4. Setting additional browser flags
-5. Taking screenshots (especially useful in headless mode)
+1. Defining a typed model for GitHub repository data
+2. Configuring headless mode for invisible operation
+3. Using `extract_all` to collect multiple repositories at once
+4. Getting fully typed objects with IDE autocomplete and pydantic serialization
 
 ??? info "About Chromium Options"
     The `options.add_argument()` method allows you to pass any Chromium command-line argument to customize browser behavior. There are hundreds of available options to control everything from networking to rendering behavior.
@@ -232,10 +258,11 @@ Pydoll relies on just a few carefully selected packages:
 
 ```
 python = "^3.10"
-websockets = "^13.1"
+websockets = "^14"
 aiohttp = "^3.9.5"
-aiofiles = "^23.2.1"
-bs4 = "^0.0.2"
+aiofiles = "^25.1.0"
+pydantic = "^2.0"
+typing_extensions = "^4.14.0"
 ```
 
 That's it! This minimal dependency approach means:
diff --git a/docs/pt/index.md b/docs/pt/index.md
index 048de490..58a8e44e 100644
--- a/docs/pt/index.md
+++ b/docs/pt/index.md
@@ -50,6 +50,7 @@ $ pip install git+https://github.com/autoscrape-labs/pydoll.git
 - **Monitoramento de Rede Poderoso**: Intercepte, modifique e analise todo o tráfego de rede com facilidade, dando a você controle total sobre as requisições.
 - **Arquitetura Orientada a Eventos**: Reaja a eventos da página, requisições de rede e interações do usuário em tempo real.
 - **Localização de Elementos Intuitiva**: Métodos modernos `find()` e `query()` que fazem sentido e funcionam como você esperaria.
+- **Extração Estruturada**: Defina um modelo [Pydantic](https://docs.pydantic.dev/), chame `tab.extract()` e receba dados tipados e validados. Sem consulta manual elemento por elemento.
 - **Segurança de Tipos Robusta**: Sistema de tipos abrangente para melhor suporte da IDE e prevenção de erros.
 
 
@@ -57,9 +58,11 @@ Pronto para começar? As páginas a seguir guiarão você pela instalação, uso
 
 Vamos começar a automatizar a web, da maneira certa! 🚀
 
-## Guia de Início Rápido: Um exemplo simples
+## Guia de Início Rápido
 
-Vamos começar com um exemplo prático. O script a seguir abrirá o repositório Pydoll no GitHub e o marcará como favorito:
+### 1. Automação Stateful e Evasão
+
+Quando você precisa navegar, contornar desafios ou interagir com UIs dinâmicas, a API imperativa do Pydoll cuida de tudo com timing humanizado por padrão.
 
 ```python
 import asyncio
@@ -70,6 +73,7 @@ async def main():
         tab = await browser.start()
         await tab.go_to('https://github.com/autoscrape-labs/pydoll')
 
+        # Encontra elementos e interage com timing humano
         star_button = await tab.find(
             tag_name='button',
             timeout=5,
@@ -85,100 +89,123 @@ async def main():
 asyncio.run(main())
 ```
 
-Este exemplo demonstra como navegar até um site, esperar que um elemento apareça e interagir com ele. Você pode adaptar esse padrão para automatizar diversas tarefas web.
+### 2. Extração Estruturada de Dados
 
-??? note "Ou use sem o gerenciador de contexto..."
-    Se preferir não usar o padrão de gerenciador de contexto, você pode gerenciar a instância do navegador manualmente:
-    ```python
-    import asyncio
-    from pydoll.browser.chromium import Chrome
+Ao chegar na página alvo, mude para o motor declarativo. Defina o que você quer com um modelo, e o Pydoll extrai — tipado, validado e pronto para uso.
+
+```python
+import asyncio
+from pydoll.browser.chromium import Chrome
+from pydoll.extractor import ExtractionModel, Field
 
-    async def main():
-        browser = Chrome()
+class Quote(ExtractionModel):
+    text: str = Field(selector='.text', description='O texto da citação')
+    author: str = Field(selector='.author', description='Quem disse')
+    tags: list[str] = Field(selector='.tag', description='Tags')
+    year: int | None = Field(selector='.year', description='Ano', default=None)
+
+async def extract_quotes():
+    async with Chrome() as browser:
         tab = await browser.start()
-        await tab.go_to('https://github.com/autoscrape-labs/pydoll')
+        await tab.go_to('https://quotes.toscrape.com')
 
-        star_button = await tab.find(
-            tag_name='button',
-            timeout=5,
-            raise_exc=False
-        )
-        if not star_button:
-            print("Ops! O botão não foi encontrado.")
-            return
+        quotes = await tab.extract_all(Quote, scope='.quote', timeout=5)
 
-        await star_button.click()
-        await asyncio.sleep(3)
-        await browser.stop()
+        for q in quotes:
+            print(f'{q.author}: {q.text}')  # totalmente tipado, autocomplete da IDE funciona
+            print(q.tags)                    # list[str], não um elemento bruto
+            print(q.model_dump_json())       # serialização pydantic embutida
 
-    asyncio.run(main())
-    ```
-    Observe que, ao não usar o gerenciador de contexto, você precisará chamar explicitamente `browser.stop()` para liberar os recursos.
+asyncio.run(extract_quotes())
+```
+
+Modelos suportam auto-detecção CSS/XPath, extração de atributos HTML, transforms customizados e modelos aninhados.
 
+??? note "Modelos aninhados, transforms e extração de atributos"
+    ```python
+    from datetime import datetime
+    from pydoll.extractor import ExtractionModel, Field
+
+    def parse_date(raw: str) -> datetime:
+        return datetime.strptime(raw.strip(), '%B %d, %Y')
+
+    class Author(ExtractionModel):
+        name: str = Field(selector='.author-title')
+        born: datetime = Field(
+            selector='.author-born-date',
+            transform=parse_date,
+        )
 
-## Exemplo Estendido: Configuração personalizada do navegador
+    class Article(ExtractionModel):
+        title: str = Field(selector='h1')
+        url: str = Field(selector='.source-link', attribute='href')
+        author: Author = Field(selector='.author-card', description='Modelo aninhado')
 
-Para cenários de uso mais avançados, o Pydoll permite personalizar a configuração do seu navegador usando a classe `ChromiumOptions`. Isso é útil quando você precisa:
+    article = await tab.extract(Article, timeout=5)
+    article.author.born.year  # int — tipos preservados em toda a cadeia
+    ```
 
-- Executar em modo headless (sem janela do navegador visível)
-- Especificar um caminho personalizado para o executável do navegador
-- Configurar proxies, user agents ou outras configurações do navegador
-- Definir as dimensões da janela ou argumentos de inicialização
+## Exemplo Estendido: Combinando as Duas Abordagens
 
-Aqui está um exemplo mostrando como usar opções personalizadas para o Chrome:
+Uma tarefa real de scraping tipicamente combina as duas abordagens: automação imperativa para navegar e contornar desafios, depois extração declarativa para coletar dados estruturados.
 
-```python hl_lines="8-12 30-32 34-38"
+```python
 import asyncio
-import os
+from typing import Optional
+
 from pydoll.browser.chromium import Chrome
 from pydoll.browser.options import ChromiumOptions
+from pydoll.extractor import ExtractionModel, Field
+
+
+class GitHubRepo(ExtractionModel):
+    name: str = Field(
+        selector='[itemprop="name"] a',
+        description='Nome do repositório',
+    )
+    description: Optional[str] = Field(
+        selector='[itemprop="description"]',
+        description='Descrição do repositório',
+        default=None,
+    )
+    language: Optional[str] = Field(
+        selector='[itemprop="programmingLanguage"]',
+        description='Linguagem de programação principal',
+        default=None,
+    )
+
 
 async def main():
     options = ChromiumOptions()
-    options.binary_location = '/usr/bin/google-chrome-stable'
     options.add_argument('--headless=new')
-    options.add_argument('--start-maximized')
-    options.add_argument('--disable-notifications')
 
     async with Chrome(options=options) as browser:
         tab = await browser.start()
-        await tab.go_to('https://github.com/autoscrape-labs/pydoll')
 
-        star_button = await tab.find(
-            tag_name='button',
-            timeout=5,
-            raise_exc=False
-        )
-        if not star_button:
-            print("Ops! O botão não foi encontrado.")
-            return
+        # 1. Navegar e interagir (imperativo)
+        await tab.go_to('https://github.com/autoscrape-labs')
 
-        await star_button.click()
-        await asyncio.sleep(3)
-
-        screenshot_path = os.path.join(os.getcwd(), 'pydoll_repo.png')
-        await tab.take_screenshot(path=screenshot_path)
-        print(f"Captura de tela salva em: {screenshot_path}")
-
-        base64_screenshot = await tab.take_screenshot(as_base64=True)
-
-        repo_description_element = await tab.find(
-            class_name='f4.my-3'
+        # 2. Extrair dados estruturados (declarativo)
+        repos = await tab.extract_all(
+            GitHubRepo,
+            scope='article.Box-row',
+            timeout=10,
         )
-        repo_description = await repo_description_element.text
-        print(f"Descrição do repositório: {repo_description}")
+
+        for repo in repos:
+            print(f'{repo.name} ({repo.language}): {repo.description}')
+            print(repo.model_dump_json())
 
 if __name__ == "__main__":
     asyncio.run(main())
 ```
 
-Este exemplo estendido demonstra:
+Este exemplo demonstra:
 
-1. Criação e configuração de opções do navegador
-2. Definição de um caminho personalizado para o binário do Chrome
-3. Habilitação do modo headless para operação invisível
-4. Definição de sinalizadores adicionais do navegador
-5. Captura de tela (especialmente útil em modo headless) modo)
+1. Definição de um modelo tipado para dados de repositórios GitHub
+2. Configuração do modo headless para operação invisível
+3. Uso de `extract_all` para coletar múltiplos repositórios de uma vez
+4. Objetos totalmente tipados com autocomplete da IDE e serialização pydantic
 
 ??? info "Sobre as Opções do Chromium"
     O método `options.add_argument()` permite que você passe qualquer argumento de linha de comando do Chromium para personalizar o comportamento do navegador. Existem centenas de opções disponíveis para controlar tudo, desde rede até comportamento de renderização. 
@@ -231,10 +258,11 @@ O Pydoll depende de apenas alguns pacotes cuidadosamente selecionados:
 
 ```
 python = "^3.10"
-websockets = "^13.1"
+websockets = "^14"
 aiohttp = "^3.9.5"
-aiofiles = "^23.2.1"
-bs4 = "^0.0.2"
+aiofiles = "^25.1.0"
+pydantic = "^2.0"
+typing_extensions = "^4.14.0"
 ```
 
 É só isso! Essa dependência mínima do Pydoll significa:
diff --git a/docs/zh/index.md b/docs/zh/index.md
index e1ba73f4..323fcf80 100644
--- a/docs/zh/index.md
+++ b/docs/zh/index.md
@@ -50,6 +50,7 @@ $ pip install git+https://github.com/autoscrape-labs/pydoll.git
 - **强大的网络监控**: 轻松实现请求拦截、流量篡改与响应分析，完整掌控网络通信链路，轻松突破层层防护体系。
 - **事件驱动架构**: 实时响应页面事件、网络请求与用户交互，构建能动态适应防护系统的智能自动化流。
 - **直观的元素定位**: 使用符合人类直觉的定位方法 `find()` 和 `query()` ，面对动态加载的防护内容，定位依然精准。
+- **结构化提取**: 定义 [Pydantic](https://docs.pydantic.dev/) 模型，调用 `tab.extract()`，获取类型化和验证过的数据。无需逐元素手动查询。
 - **强类型安全**: 完备的类型系统为复杂自动化场景提供更优IDE支持和更好地预防运行时报错。
 
 
@@ -57,9 +58,11 @@ $ pip install git+https://github.com/autoscrape-labs/pydoll.git
 
 让我们以最优雅的方式，开启您的网页自动化之旅！🚀
 
-## 简单的例子上手
+## 快速入门
 
-让我们从一个实际案例开始。以下脚本将打开 Pydoll 的 GitHub 仓库并star：  
+### 1. 有状态自动化与规避
+
+当您需要导航、绕过挑战或与动态UI交互时，Pydoll的命令式API默认以人性化的时序处理一切。
 
 ```python
 import asyncio
@@ -69,14 +72,15 @@ async def main():
     async with Chrome() as browser:
         tab = await browser.start()
         await tab.go_to('https://github.com/autoscrape-labs/pydoll')
-        
+
+        # 查找元素并以人类般的时序进行交互
         star_button = await tab.find(
             tag_name='button',
             timeout=5,
             raise_exc=False
         )
         if not star_button:
-            print("Ops! The button was not found.")
+            print("按钮未找到。")
             return
 
         await star_button.click()
@@ -85,102 +89,123 @@ async def main():
 asyncio.run(main())
 ```
 
-此示例演示了如何导航到网站、等待元素出现并与之交互。您可以使用这样的模式来自动执行许多不同的 Web 任务。
+### 2. 结构化数据提取
 
-??? note "或者使用不带上下文管理器的..."
-    如果你不想要使用上下文管理器模式，你可以手动管理浏览器实例：
-    
-    ```python
-    import asyncio
-    from pydoll.browser.chromium import Chrome
-    
-    async def main():
-        browser = Chrome()
+到达目标页面后，切换到声明式引擎。用模型定义您想要的数据，Pydoll会提取它——类型化、验证过、随时可用。
+
+```python
+import asyncio
+from pydoll.browser.chromium import Chrome
+from pydoll.extractor import ExtractionModel, Field
+
+class Quote(ExtractionModel):
+    text: str = Field(selector='.text', description='引用文本')
+    author: str = Field(selector='.author', description='作者')
+    tags: list[str] = Field(selector='.tag', description='标签')
+    year: int | None = Field(selector='.year', description='年份', default=None)
+
+async def extract_quotes():
+    async with Chrome() as browser:
         tab = await browser.start()
-        await tab.go_to('https://github.com/autoscrape-labs/pydoll')
-        
-        star_button = await tab.find(
-            tag_name='button',
-            timeout=5,
-            raise_exc=False
-        )
-        if not star_button:
-            print("Ops! The button was not found.")
-            return
+        await tab.go_to('https://quotes.toscrape.com')
 
-        await star_button.click()
-        await asyncio.sleep(3)
-        await browser.stop()
-    
-    asyncio.run(main())
-    ```
-    
-    Note that when not using the context manager, you'll need to explicitly call `browser.stop()` to release resources.
+        quotes = await tab.extract_all(Quote, scope='.quote', timeout=5)
 
-## 补充例子: 自定义浏览器配置
+        for q in quotes:
+            print(f'{q.author}: {q.text}')  # 完全类型化，IDE自动补全
+            print(q.tags)                    # list[str]，不是原始元素
+            print(q.model_dump_json())       # 内置pydantic序列化
 
-对于更高级的使用场景，Pydoll 允许您使用 `ChromiumOptions` 类自定义浏览器配置。此功能在您需要执行以下操作时非常有用：
+asyncio.run(extract_quotes())
+```
 
-- 在无头模式下运行（无可见浏览器窗口）
-- 指定自定义浏览器可执行文件路径
-- 配置代理、用户代理或其他浏览器设置
-- 设置窗口尺寸或启动参数
+模型支持CSS/XPath自动检测、HTML属性提取、自定义转换函数和嵌套模型。
 
-以下示例展示了如何使用 Chrome 的自定义选项：
+??? note "嵌套模型、转换函数和属性提取"
+    ```python
+    from datetime import datetime
+    from pydoll.extractor import ExtractionModel, Field
 
-```python hl_lines="8-12 30-32 34-38"
+    def parse_date(raw: str) -> datetime:
+        return datetime.strptime(raw.strip(), '%B %d, %Y')
+
+    class Author(ExtractionModel):
+        name: str = Field(selector='.author-title')
+        born: datetime = Field(
+            selector='.author-born-date',
+            transform=parse_date,
+        )
+
+    class Article(ExtractionModel):
+        title: str = Field(selector='h1')
+        url: str = Field(selector='.source-link', attribute='href')
+        author: Author = Field(selector='.author-card', description='嵌套模型')
+
+    article = await tab.extract(Article, timeout=5)
+    article.author.born.year  # int — 类型在整个链中保持一致
+    ```
+
+## 扩展示例：结合两种方式
+
+实际的抓取任务通常结合两种方式：命令式自动化用于导航和绕过挑战，然后声明式提取用于收集结构化数据。
+
+```python
 import asyncio
-import os
+from typing import Optional
+
 from pydoll.browser.chromium import Chrome
 from pydoll.browser.options import ChromiumOptions
+from pydoll.extractor import ExtractionModel, Field
+
+
+class GitHubRepo(ExtractionModel):
+    name: str = Field(
+        selector='[itemprop="name"] a',
+        description='仓库名称',
+    )
+    description: Optional[str] = Field(
+        selector='[itemprop="description"]',
+        description='仓库描述',
+        default=None,
+    )
+    language: Optional[str] = Field(
+        selector='[itemprop="programmingLanguage"]',
+        description='主要编程语言',
+        default=None,
+    )
+
 
 async def main():
     options = ChromiumOptions()
-    options.binary_location = '/usr/bin/google-chrome-stable'
     options.add_argument('--headless=new')
-    options.add_argument('--start-maximized')
-    options.add_argument('--disable-notifications')
-    
+
     async with Chrome(options=options) as browser:
         tab = await browser.start()
-        await tab.go_to('https://github.com/autoscrape-labs/pydoll')
-        
-        star_button = await tab.find(
-            tag_name='button',
-            timeout=5,
-            raise_exc=False
-        )
-        if not star_button:
-            print("Ops! The button was not found.")
-            return
 
-        await star_button.click()
-        await asyncio.sleep(3)
-
-        screenshot_path = os.path.join(os.getcwd(), 'pydoll_repo.png')
-        await tab.take_screenshot(path=screenshot_path)
-        print(f"Screenshot saved to: {screenshot_path}")
+        # 1. 导航和交互（命令式）
+        await tab.go_to('https://github.com/autoscrape-labs')
 
-        base64_screenshot = await tab.take_screenshot(as_base64=True)
-
-        repo_description_element = await tab.find(
-            class_name='f4.my-3'
+        # 2. 提取结构化数据（声明式）
+        repos = await tab.extract_all(
+            GitHubRepo,
+            scope='article.Box-row',
+            timeout=10,
         )
-        repo_description = await repo_description_element.text
-        print(f"Repository description: {repo_description}")
+
+        for repo in repos:
+            print(f'{repo.name} ({repo.language}): {repo.description}')
+            print(repo.model_dump_json())
 
 if __name__ == "__main__":
     asyncio.run(main())
 ```
 
+此示例演示了：
 
-此扩展示例演示了：
-
-1. 创建和配置浏览器选项
-2. 设置自定义Chrome可执行程序路径
-3. 启用无头模式以实现无痕操作
-4. 设置其他浏览器命令行flags
-5. 屏幕截图（在无头模式下尤其有用）
+1. 为GitHub仓库数据定义类型化模型
+2. 配置无头模式以实现无痕操作
+3. 使用 `extract_all` 一次性收集多个仓库
+4. 获取完全类型化的对象，支持IDE自动补全和pydantic序列化
 
 ??? info "关于Chrome配置选项"
     The `options.add_argument()` 方法允许您传递任何 Chromium 命令行参数来自定义浏览器行为。有数百个可用选项可用于控制从网络到渲染行为的所有内容。
@@ -233,10 +258,11 @@ Pydoll仅依赖少量的核心库：
 
 ```
 python = "^3.10"
-websockets = "^13.1"
+websockets = "^14"
 aiohttp = "^3.9.5"
-aiofiles = "^23.2.1"
-bs4 = "^0.0.2"
+aiofiles = "^25.1.0"
+pydantic = "^2.0"
+typing_extensions = "^4.14.0"
 ```
 
 这种极简依赖策略带来五大核心优势：  

From 35f489840faecac84a4f1e9a1dc2ae8a2a6cd322 Mon Sep 17 00:00:00 2001
From: Thalison Fernandes <thalissfernandes99@gmail.com>
Date: Sun, 22 Mar 2026 17:54:52 -0300
Subject: [PATCH 18/21] docs(extractor): add structured extraction guide in en,
 pt, zh

---
 .../extraction/structured-extraction.md       | 329 ++++++++++++++++++
 docs/en/features/index.md                     |   6 +
 .../extraction/structured-extraction.md       | 329 ++++++++++++++++++
 docs/pt/features/index.md                     |   6 +
 .../extraction/structured-extraction.md       | 329 ++++++++++++++++++
 docs/zh/features/index.md                     |   6 +
 mkdocs.yml                                    |   2 +
 7 files changed, 1007 insertions(+)
 create mode 100644 docs/en/features/extraction/structured-extraction.md
 create mode 100644 docs/pt/features/extraction/structured-extraction.md
 create mode 100644 docs/zh/features/extraction/structured-extraction.md

diff --git a/docs/en/features/extraction/structured-extraction.md b/docs/en/features/extraction/structured-extraction.md
new file mode 100644
index 00000000..8cb43d50
--- /dev/null
+++ b/docs/en/features/extraction/structured-extraction.md
@@ -0,0 +1,329 @@
+# Structured Data Extraction
+
+Pydoll's extraction engine lets you define **what** you want from a page using typed models, and handles the **how** automatically. Instead of manually querying elements one by one, you declare a model with selectors and call `tab.extract()`. The result is a fully typed, validated Python object powered by [Pydantic](https://docs.pydantic.dev/).
+
+## Why Use Structured Extraction?
+
+Traditional scraping code tends to grow into a tangled mess of `find()` calls, `await element.text`, attribute reads, and manual type conversions scattered across dozens of lines. When the page changes, you hunt through that code to find which selector broke.
+
+With structured extraction, all your selectors live in one place (the model), the types are enforced automatically, and the output is a clean Pydantic object with IDE autocomplete and serialization built in.
+
+## Basic Usage
+
+### Defining a Model
+
+An extraction model is a class that inherits from `ExtractionModel`. Each field uses `Field()` to declare a CSS or XPath selector.
+
+```python
+from pydoll.extractor import ExtractionModel, Field
+
+class Quote(ExtractionModel):
+    text: str = Field(selector='.text', description='The quote text')
+    author: str = Field(selector='.author', description='Who said it')
+    tags: list[str] = Field(selector='.tag', description='Associated tags')
+```
+
+The `selector` parameter accepts both CSS selectors and XPath expressions. Pydoll auto-detects the type, exactly like `tab.query()`.
+
+### Extracting a Single Item
+
+Use `tab.extract()` to populate one model instance from the page:
+
+```python
+from pydoll.browser.chromium import Chrome
+
+async with Chrome() as browser:
+    tab = await browser.start()
+    await tab.go_to('https://example.com/article')
+
+    article = await tab.extract(Article)
+    print(article.title)       # str, fully typed
+    print(article.model_dump()) # dict via pydantic
+```
+
+### Extracting Multiple Items
+
+Use `tab.extract_all()` with a `scope` selector that identifies the repeating container. Each match generates one model instance, with fields resolved relative to that container.
+
+```python
+quotes = await tab.extract_all(Quote, scope='.quote')
+
+for q in quotes:
+    print(f'{q.author}: {q.text}')
+    print(q.tags)
+```
+
+You can limit the number of results:
+
+```python
+top_5 = await tab.extract_all(Quote, scope='.quote', limit=5)
+```
+
+## Field Options
+
+The `Field()` function accepts the following parameters:
+
+| Parameter     | Type                    | Description                                                  |
+|---------------|-------------------------|--------------------------------------------------------------|
+| `selector`    | `str` or `None`         | CSS or XPath selector (auto-detected)                        |
+| `attribute`   | `str` or `None`         | HTML attribute to read instead of inner text                 |
+| `description` | `str` or `None`         | Semantic description of the field                            |
+| `default`     | any value               | Default value when the element is not found                  |
+| `transform`   | callable or `None`      | Post-processing function applied to the raw string           |
+
+At least one of `selector` or `description` must be provided. Fields with only `description` (no selector) are reserved for future LLM-based extraction and are skipped by the current CSS engine.
+
+## Attribute Extraction
+
+By default, the engine reads the element's visible text (`innerText`). To read an HTML attribute instead, use the `attribute` parameter:
+
+```python
+class Article(ExtractionModel):
+    title: str = Field(selector='h1', description='Title')
+    published: str = Field(
+        selector='time.date',
+        attribute='datetime',
+        description='ISO publication date',
+    )
+    image_url: str = Field(
+        selector='.hero img',
+        attribute='src',
+        description='Hero image URL',
+    )
+    link: str = Field(
+        selector='a.source',
+        attribute='href',
+        description='Source link',
+    )
+    image_id: str = Field(
+        selector='.hero img',
+        attribute='data-id',
+        description='Custom data attribute',
+    )
+```
+
+Any HTML attribute works, including `data-*`, `aria-*`, `href`, `src`, `alt`, and custom attributes.
+
+## Transforms
+
+The `transform` parameter takes a callable that receives the raw string from the DOM and returns the desired type. This is where you convert strings to numbers, parse dates, or clean up formatting.
+
+```python
+from datetime import datetime
+
+def parse_price(raw: str) -> float:
+    return float(raw.replace('R$', '').replace('.', '').replace(',', '.').strip())
+
+def parse_date(raw: str) -> datetime:
+    return datetime.strptime(raw.strip(), '%B %d, %Y')
+
+class Product(ExtractionModel):
+    name: str = Field(selector='.name', description='Product name')
+    price: float = Field(
+        selector='.price',
+        description='Price in BRL',
+        transform=parse_price,
+    )
+    release: datetime = Field(
+        selector='.release-date',
+        description='Release date',
+        transform=parse_date,
+    )
+```
+
+The transform runs **before** Pydantic validation, so the field type should match what the transform returns.
+
+## Nested Models
+
+When a field's type is another `ExtractionModel`, the engine uses the field's selector to find a scope element, then extracts the nested model's fields within that scope.
+
+```python
+class Author(ExtractionModel):
+    name: str = Field(selector='.name', description='Author name')
+    avatar: str = Field(
+        selector='img.avatar',
+        attribute='src',
+        description='Avatar URL',
+    )
+    bio: str = Field(selector='.bio', description='Short bio')
+
+class Article(ExtractionModel):
+    title: str = Field(selector='h1', description='Title')
+    author: Author = Field(
+        selector='.author-card',
+        description='Author information',
+    )
+```
+
+The `.author-card` selector defines the scope. The `Author` fields (`.name`, `img.avatar`, `.bio`) are resolved **inside** that element, not from the full page. This prevents selector collisions when the page has multiple `.name` elements in different sections.
+
+### Lists of Nested Models
+
+You can also extract a list of nested models:
+
+```python
+class Contributor(ExtractionModel):
+    name: str = Field(selector='.name', description='Contributor name')
+    role: str = Field(selector='.role', description='Role')
+
+class Project(ExtractionModel):
+    title: str = Field(selector='h1', description='Project title')
+    contributors: list[Contributor] = Field(
+        selector='.contributor',
+        description='Project contributors',
+    )
+```
+
+Each `.contributor` element becomes the scope for one `Contributor` instance.
+
+## Optional Fields and Defaults
+
+Fields that might not be present on every page should use `Optional` with a `default`:
+
+```python
+from typing import Optional
+
+class Article(ExtractionModel):
+    title: str = Field(selector='h1', description='Title')
+    subtitle: Optional[str] = Field(
+        selector='.subtitle',
+        description='Optional subtitle',
+        default=None,
+    )
+    category: str = Field(
+        selector='.category',
+        description='Category with fallback',
+        default='uncategorized',
+    )
+```
+
+When the element is not found:
+
+- Fields **with** a default silently use that default value.
+- Fields **without** a default (required) raise `FieldExtractionFailed`.
+
+Both `typing.Optional[str]` and the PEP 604 syntax `str | None` are supported.
+
+## Timeout and Waiting
+
+The `timeout` parameter controls how long the engine waits for elements to appear, in seconds. This is propagated to every internal query, including nested models and list fields.
+
+```python
+# Wait up to 10 seconds for elements to appear
+article = await tab.extract(Article, timeout=10)
+
+# No waiting (default), elements must already be in the DOM
+article = await tab.extract(Article)
+
+# Also works with extract_all
+quotes = await tab.extract_all(Quote, scope='.quote', timeout=5)
+```
+
+This uses the same polling mechanism as `tab.query(timeout=...)`, so there is no need for manual `asyncio.sleep()` calls between navigation and extraction.
+
+## Scoped Extraction
+
+The `scope` parameter limits extraction to a specific region of the page:
+
+```python
+# Extract only from the main article, ignoring sidebar/footer
+article = await tab.extract(Article, scope='#main-article')
+
+# extract_all requires scope (it defines the repeating container)
+quotes = await tab.extract_all(Quote, scope='.quote')
+```
+
+## XPath Selectors
+
+XPath expressions are auto-detected (they start with `/` or `./`) and work everywhere CSS selectors work:
+
+```python
+class SearchResult(ExtractionModel):
+    title: str = Field(
+        selector='//h3[@class="title"]',
+        description='Result title via XPath',
+    )
+    url: str = Field(
+        selector='.//a',
+        attribute='href',
+        description='Result URL',
+    )
+```
+
+## Error Handling
+
+The extraction engine raises specific exceptions that you can catch and handle:
+
+```python
+from pydoll.extractor import FieldExtractionFailed, InvalidExtractionModel
+
+# InvalidExtractionModel: raised at model definition time
+# when a Field has neither selector nor description
+try:
+    class BadModel(ExtractionModel):
+        field: str = Field()  # no selector, no description
+except InvalidExtractionModel:
+    print('Invalid model definition')
+
+# FieldExtractionFailed: raised at extraction time
+# when a required field's element is not found
+try:
+    result = await tab.extract(MyModel)
+except FieldExtractionFailed as e:
+    print(f'Extraction failed: {e}')
+```
+
+For optional fields, extraction failures are silently handled and the default value is used. Only required fields (those without a `default`) raise exceptions.
+
+## Pydantic Integration
+
+`ExtractionModel` inherits from `pydantic.BaseModel`, so all Pydantic features work out of the box:
+
+```python
+article = await tab.extract(Article)
+
+# Serialization
+article.model_dump()          # dict
+article.model_dump_json()     # JSON string
+
+# JSON Schema (useful for API docs or LLM prompts)
+Article.model_json_schema()
+
+# Validation happens automatically
+# If a transform returns the wrong type, Pydantic raises ValidationError
+```
+
+You can use any Pydantic feature in your models: validators, field aliases, model configuration, and more. The extraction engine adds the selector/transform layer on top without interfering with Pydantic's behavior.
+
+## Complete Example
+
+Here is a complete, runnable example that extracts quotes from [quotes.toscrape.com](https://quotes.toscrape.com):
+
+```python
+import asyncio
+from pydoll.browser.chromium import Chrome
+from pydoll.extractor import ExtractionModel, Field
+
+class Quote(ExtractionModel):
+    text: str = Field(selector='.text', description='The quote text')
+    author: str = Field(selector='.author', description='Who said the quote')
+    tags: list[str] = Field(selector='.tag', description='Associated tags')
+
+async def main():
+    async with Chrome() as browser:
+        tab = await browser.start()
+        await tab.go_to('https://quotes.toscrape.com')
+
+        quotes = await tab.extract_all(Quote, scope='.quote', timeout=5)
+
+        print(f'Extracted {len(quotes)} quotes\n')
+        for q in quotes:
+            print(f'"{q.text}"')
+            print(f'  by {q.author} | tags: {", ".join(q.tags)}\n')
+
+        # Pydantic serialization
+        for q in quotes:
+            print(q.model_dump_json())
+
+asyncio.run(main())
+```
diff --git a/docs/en/features/index.md b/docs/en/features/index.md
index 04101d9b..3f517714 100644
--- a/docs/en/features/index.md
+++ b/docs/en/features/index.md
@@ -18,6 +18,12 @@ Finding and interacting with page elements is the bread and butter of automation
 
 **[Element Finding](element-finding.md)**: Master Pydoll's element location strategies, from the intuitive `find()` method that uses natural HTML attributes, to the powerful `query()` method for CSS selectors and XPath. You'll also learn about DOM traversal helpers that let you navigate the page structure efficiently.
 
+## Data Extraction
+
+Turn web pages into structured Python objects with typed models, automatic validation, and Pydantic serialization.
+
+**[Structured Extraction](extraction/structured-extraction.md)**: Define a Pydantic model with CSS/XPath selectors, call `tab.extract()`, and get a fully typed object back. Supports nested models, list fields, attribute extraction, custom transforms, optional fields with defaults, and configurable timeouts. No manual element-by-element querying required.
+
 ## Automation Capabilities
 
 These are the features that bring your automation to life: simulating user interactions, keyboard control, handling file operations, working with iframes, and capturing visual content.
diff --git a/docs/pt/features/extraction/structured-extraction.md b/docs/pt/features/extraction/structured-extraction.md
new file mode 100644
index 00000000..ba51cc84
--- /dev/null
+++ b/docs/pt/features/extraction/structured-extraction.md
@@ -0,0 +1,329 @@
+# Extração Estruturada de Dados
+
+O motor de extração do Pydoll permite que você defina **o que** deseja de uma página usando modelos tipados, e cuida do **como** automaticamente. Em vez de consultar elementos manualmente um a um, você declara um modelo com seletores e chama `tab.extract()`. O resultado é um objeto Python totalmente tipado e validado, alimentado pelo [Pydantic](https://docs.pydantic.dev/).
+
+## Por Que Usar Extração Estruturada?
+
+Código de scraping tradicional tende a crescer em uma confusão de chamadas `find()`, `await element.text`, leitura de atributos e conversões manuais de tipo espalhadas por dezenas de linhas. Quando a página muda, você precisa caçar no código para encontrar qual seletor quebrou.
+
+Com extração estruturada, todos os seus seletores ficam em um único lugar (o modelo), os tipos são garantidos automaticamente, e a saída é um objeto Pydantic limpo com autocomplete da IDE e serialização embutida.
+
+## Uso Básico
+
+### Definindo um Modelo
+
+Um modelo de extração é uma classe que herda de `ExtractionModel`. Cada campo usa `Field()` para declarar um seletor CSS ou XPath.
+
+```python
+from pydoll.extractor import ExtractionModel, Field
+
+class Quote(ExtractionModel):
+    text: str = Field(selector='.text', description='The quote text')
+    author: str = Field(selector='.author', description='Who said it')
+    tags: list[str] = Field(selector='.tag', description='Associated tags')
+```
+
+O parâmetro `selector` aceita tanto seletores CSS quanto expressões XPath. O Pydoll auto-detecta o tipo, exatamente como o `tab.query()`.
+
+### Extraindo um Único Item
+
+Use `tab.extract()` para preencher uma instância do modelo a partir da página:
+
+```python
+from pydoll.browser.chromium import Chrome
+
+async with Chrome() as browser:
+    tab = await browser.start()
+    await tab.go_to('https://example.com/article')
+
+    article = await tab.extract(Article)
+    print(article.title)       # str, fully typed
+    print(article.model_dump()) # dict via pydantic
+```
+
+### Extraindo Múltiplos Itens
+
+Use `tab.extract_all()` com um seletor `scope` que identifica o container repetido. Cada match gera uma instância do modelo, com os campos resolvidos relativamente àquele container.
+
+```python
+quotes = await tab.extract_all(Quote, scope='.quote')
+
+for q in quotes:
+    print(f'{q.author}: {q.text}')
+    print(q.tags)
+```
+
+Você pode limitar o número de resultados:
+
+```python
+top_5 = await tab.extract_all(Quote, scope='.quote', limit=5)
+```
+
+## Opções do Field
+
+A função `Field()` aceita os seguintes parâmetros:
+
+| Parâmetro     | Tipo                    | Descrição                                                    |
+|---------------|-------------------------|--------------------------------------------------------------|
+| `selector`    | `str` ou `None`         | Seletor CSS ou XPath (auto-detectado)                        |
+| `attribute`   | `str` ou `None`         | Atributo HTML a ler em vez do texto interno                  |
+| `description` | `str` ou `None`         | Descrição semântica do campo                                 |
+| `default`     | qualquer valor          | Valor padrão quando o elemento não é encontrado              |
+| `transform`   | callable ou `None`      | Função de pós-processamento aplicada à string bruta          |
+
+Pelo menos um entre `selector` ou `description` deve ser fornecido. Campos com apenas `description` (sem selector) são reservados para futura extração baseada em LLM e são ignorados pelo motor CSS atual.
+
+## Extração de Atributos
+
+Por padrão, o motor lê o texto visível do elemento (`innerText`). Para ler um atributo HTML em vez disso, use o parâmetro `attribute`:
+
+```python
+class Article(ExtractionModel):
+    title: str = Field(selector='h1', description='Title')
+    published: str = Field(
+        selector='time.date',
+        attribute='datetime',
+        description='ISO publication date',
+    )
+    image_url: str = Field(
+        selector='.hero img',
+        attribute='src',
+        description='Hero image URL',
+    )
+    link: str = Field(
+        selector='a.source',
+        attribute='href',
+        description='Source link',
+    )
+    image_id: str = Field(
+        selector='.hero img',
+        attribute='data-id',
+        description='Custom data attribute',
+    )
+```
+
+Qualquer atributo HTML funciona, incluindo `data-*`, `aria-*`, `href`, `src`, `alt` e atributos customizados.
+
+## Transforms
+
+O parâmetro `transform` recebe um callable que recebe a string bruta do DOM e retorna o tipo desejado. É aqui que você converte strings para números, parseia datas ou limpa formatação.
+
+```python
+from datetime import datetime
+
+def parse_price(raw: str) -> float:
+    return float(raw.replace('R$', '').replace('.', '').replace(',', '.').strip())
+
+def parse_date(raw: str) -> datetime:
+    return datetime.strptime(raw.strip(), '%B %d, %Y')
+
+class Product(ExtractionModel):
+    name: str = Field(selector='.name', description='Product name')
+    price: float = Field(
+        selector='.price',
+        description='Price in BRL',
+        transform=parse_price,
+    )
+    release: datetime = Field(
+        selector='.release-date',
+        description='Release date',
+        transform=parse_date,
+    )
+```
+
+O transform executa **antes** da validação do Pydantic, então o tipo do campo deve corresponder ao que o transform retorna.
+
+## Modelos Aninhados
+
+Quando o tipo de um campo é outro `ExtractionModel`, o motor usa o seletor do campo para encontrar um elemento de escopo, e então extrai os campos do modelo aninhado dentro daquele escopo.
+
+```python
+class Author(ExtractionModel):
+    name: str = Field(selector='.name', description='Author name')
+    avatar: str = Field(
+        selector='img.avatar',
+        attribute='src',
+        description='Avatar URL',
+    )
+    bio: str = Field(selector='.bio', description='Short bio')
+
+class Article(ExtractionModel):
+    title: str = Field(selector='h1', description='Title')
+    author: Author = Field(
+        selector='.author-card',
+        description='Author information',
+    )
+```
+
+O seletor `.author-card` define o escopo. Os campos do `Author` (`.name`, `img.avatar`, `.bio`) são resolvidos **dentro** daquele elemento, não da página inteira. Isso previne colisões de seletores quando a página tem múltiplos elementos `.name` em seções diferentes.
+
+### Listas de Modelos Aninhados
+
+Você também pode extrair uma lista de modelos aninhados:
+
+```python
+class Contributor(ExtractionModel):
+    name: str = Field(selector='.name', description='Contributor name')
+    role: str = Field(selector='.role', description='Role')
+
+class Project(ExtractionModel):
+    title: str = Field(selector='h1', description='Project title')
+    contributors: list[Contributor] = Field(
+        selector='.contributor',
+        description='Project contributors',
+    )
+```
+
+Cada elemento `.contributor` se torna o escopo para uma instância de `Contributor`.
+
+## Campos Opcionais e Defaults
+
+Campos que podem não estar presentes em toda página devem usar `Optional` com um `default`:
+
+```python
+from typing import Optional
+
+class Article(ExtractionModel):
+    title: str = Field(selector='h1', description='Title')
+    subtitle: Optional[str] = Field(
+        selector='.subtitle',
+        description='Optional subtitle',
+        default=None,
+    )
+    category: str = Field(
+        selector='.category',
+        description='Category with fallback',
+        default='uncategorized',
+    )
+```
+
+Quando o elemento não é encontrado:
+
+- Campos **com** default usam silenciosamente aquele valor padrão.
+- Campos **sem** default (obrigatórios) levantam `FieldExtractionFailed`.
+
+Tanto `typing.Optional[str]` quanto a sintaxe PEP 604 `str | None` são suportados.
+
+## Timeout e Espera
+
+O parâmetro `timeout` controla quanto tempo o motor espera pelos elementos aparecerem, em segundos. Ele é propagado para toda query interna, incluindo modelos aninhados e campos lista.
+
+```python
+# Wait up to 10 seconds for elements to appear
+article = await tab.extract(Article, timeout=10)
+
+# No waiting (default), elements must already be in the DOM
+article = await tab.extract(Article)
+
+# Also works with extract_all
+quotes = await tab.extract_all(Quote, scope='.quote', timeout=5)
+```
+
+Isso usa o mesmo mecanismo de polling que `tab.query(timeout=...)`, então não há necessidade de chamadas manuais `asyncio.sleep()` entre navegação e extração.
+
+## Extração com Escopo
+
+O parâmetro `scope` limita a extração a uma região específica da página:
+
+```python
+# Extract only from the main article, ignoring sidebar/footer
+article = await tab.extract(Article, scope='#main-article')
+
+# extract_all requires scope (it defines the repeating container)
+quotes = await tab.extract_all(Quote, scope='.quote')
+```
+
+## Seletores XPath
+
+Expressões XPath são auto-detectadas (começam com `/` ou `./`) e funcionam em todo lugar que seletores CSS funcionam:
+
+```python
+class SearchResult(ExtractionModel):
+    title: str = Field(
+        selector='//h3[@class="title"]',
+        description='Result title via XPath',
+    )
+    url: str = Field(
+        selector='.//a',
+        attribute='href',
+        description='Result URL',
+    )
+```
+
+## Tratamento de Erros
+
+O motor de extração levanta exceções específicas que você pode capturar e tratar:
+
+```python
+from pydoll.extractor import FieldExtractionFailed, InvalidExtractionModel
+
+# InvalidExtractionModel: raised at model definition time
+# when a Field has neither selector nor description
+try:
+    class BadModel(ExtractionModel):
+        field: str = Field()  # no selector, no description
+except InvalidExtractionModel:
+    print('Invalid model definition')
+
+# FieldExtractionFailed: raised at extraction time
+# when a required field's element is not found
+try:
+    result = await tab.extract(MyModel)
+except FieldExtractionFailed as e:
+    print(f'Extraction failed: {e}')
+```
+
+Para campos opcionais, falhas de extração são tratadas silenciosamente e o valor default é utilizado. Apenas campos obrigatórios (aqueles sem `default`) levantam exceções.
+
+## Integração com Pydantic
+
+`ExtractionModel` herda de `pydantic.BaseModel`, então todas as funcionalidades do Pydantic funcionam imediatamente:
+
+```python
+article = await tab.extract(Article)
+
+# Serialization
+article.model_dump()          # dict
+article.model_dump_json()     # JSON string
+
+# JSON Schema (useful for API docs or LLM prompts)
+Article.model_json_schema()
+
+# Validation happens automatically
+# If a transform returns the wrong type, Pydantic raises ValidationError
+```
+
+Você pode usar qualquer funcionalidade do Pydantic nos seus modelos: validadores, aliases de campos, configuração de modelo e mais. O motor de extração adiciona a camada de seletor/transform por cima sem interferir no comportamento do Pydantic.
+
+## Exemplo Completo
+
+Aqui está um exemplo completo e executável que extrai citações do [quotes.toscrape.com](https://quotes.toscrape.com):
+
+```python
+import asyncio
+from pydoll.browser.chromium import Chrome
+from pydoll.extractor import ExtractionModel, Field
+
+class Quote(ExtractionModel):
+    text: str = Field(selector='.text', description='The quote text')
+    author: str = Field(selector='.author', description='Who said the quote')
+    tags: list[str] = Field(selector='.tag', description='Associated tags')
+
+async def main():
+    async with Chrome() as browser:
+        tab = await browser.start()
+        await tab.go_to('https://quotes.toscrape.com')
+
+        quotes = await tab.extract_all(Quote, scope='.quote', timeout=5)
+
+        print(f'Extracted {len(quotes)} quotes\n')
+        for q in quotes:
+            print(f'"{q.text}"')
+            print(f'  by {q.author} | tags: {", ".join(q.tags)}\n')
+
+        # Pydantic serialization
+        for q in quotes:
+            print(q.model_dump_json())
+
+asyncio.run(main())
+```
diff --git a/docs/pt/features/index.md b/docs/pt/features/index.md
index 9f0a488d..8c9c6b79 100644
--- a/docs/pt/features/index.md
+++ b/docs/pt/features/index.md
@@ -18,6 +18,12 @@ Encontrar e interagir com elementos da página é o pão com manteiga da automa
 
 **[Localização de Elementos](element-finding.md)**: Domine as estratégias de localização de elementos do Pydoll, desde o intuitivo método `find()` que usa atributos HTML naturais, até o poderoso método `query()` para seletores CSS e XPath. Você também aprenderá sobre auxiliares de travessia do DOM que permitem navegar pela estrutura da página eficientemente.
 
+## Extração de Dados
+
+Transforme páginas web em objetos Python estruturados com modelos tipados, validação automática e serialização Pydantic.
+
+**[Extração Estruturada](extraction/structured-extraction.md)**: Defina um modelo Pydantic com seletores CSS/XPath, chame `tab.extract()` e receba um objeto totalmente tipado. Suporta modelos aninhados, campos lista, extração de atributos, transforms customizados, campos opcionais com defaults e timeouts configuráveis. Sem necessidade de consulta manual elemento por elemento.
+
 ## Capacidades de Automação
 
 Estas são as funcionalidades que dão vida à sua automação: simular interações do usuário, controle de teclado, lidar com operações de arquivo, trabalhar com iframes e capturar conteúdo visual.
diff --git a/docs/zh/features/extraction/structured-extraction.md b/docs/zh/features/extraction/structured-extraction.md
new file mode 100644
index 00000000..17442348
--- /dev/null
+++ b/docs/zh/features/extraction/structured-extraction.md
@@ -0,0 +1,329 @@
+# 结构化数据提取
+
+Pydoll 的提取引擎让您使用类型化模型定义想要从页面获取**什么**数据，并自动处理**如何**获取。无需逐个手动查询元素，您只需声明一个带有选择器的模型并调用 `tab.extract()`。结果是一个由 [Pydantic](https://docs.pydantic.dev/) 驱动的、完全类型化和验证过的 Python 对象。
+
+## 为什么使用结构化提取？
+
+传统的抓取代码往往会变成一堆散落在数十行中的 `find()` 调用、`await element.text`、属性读取和手动类型转换。当页面发生变化时，您需要在代码中逐行排查哪个选择器出了问题。
+
+使用结构化提取后，所有选择器都集中在一个地方（模型），类型会自动强制执行，输出是一个干净的 Pydantic 对象，内置 IDE 自动补全和序列化功能。
+
+## 基本用法
+
+### 定义模型
+
+提取模型是一个继承自 `ExtractionModel` 的类。每个字段使用 `Field()` 来声明 CSS 或 XPath 选择器。
+
+```python
+from pydoll.extractor import ExtractionModel, Field
+
+class Quote(ExtractionModel):
+    text: str = Field(selector='.text', description='The quote text')
+    author: str = Field(selector='.author', description='Who said it')
+    tags: list[str] = Field(selector='.tag', description='Associated tags')
+```
+
+`selector` 参数同时接受 CSS 选择器和 XPath 表达式。Pydoll 会自动检测类型，与 `tab.query()` 的行为完全一致。
+
+### 提取单个项目
+
+使用 `tab.extract()` 从页面填充一个模型实例：
+
+```python
+from pydoll.browser.chromium import Chrome
+
+async with Chrome() as browser:
+    tab = await browser.start()
+    await tab.go_to('https://example.com/article')
+
+    article = await tab.extract(Article)
+    print(article.title)       # str, fully typed
+    print(article.model_dump()) # dict via pydantic
+```
+
+### 提取多个项目
+
+使用 `tab.extract_all()` 并配合 `scope` 选择器来标识重复的容器。每个匹配项生成一个模型实例，字段相对于该容器解析。
+
+```python
+quotes = await tab.extract_all(Quote, scope='.quote')
+
+for q in quotes:
+    print(f'{q.author}: {q.text}')
+    print(q.tags)
+```
+
+您可以限制结果数量：
+
+```python
+top_5 = await tab.extract_all(Quote, scope='.quote', limit=5)
+```
+
+## Field 选项
+
+`Field()` 函数接受以下参数：
+
+| 参数          | 类型                    | 描述                                                         |
+|---------------|-------------------------|--------------------------------------------------------------|
+| `selector`    | `str` 或 `None`         | CSS 或 XPath 选择器（自动检测）                              |
+| `attribute`   | `str` 或 `None`         | 要读取的 HTML 属性，而非内部文本                             |
+| `description` | `str` 或 `None`         | 字段的语义描述                                               |
+| `default`     | 任意值                  | 未找到元素时的默认值                                         |
+| `transform`   | callable 或 `None`      | 应用于原始字符串的后处理函数                                 |
+
+必须提供 `selector` 或 `description` 中的至少一个。仅有 `description`（无 selector）的字段保留用于未来基于 LLM 的提取，当前 CSS 引擎会跳过这些字段。
+
+## 属性提取
+
+默认情况下，引擎读取元素的可见文本（`innerText`）。要读取 HTML 属性，请使用 `attribute` 参数：
+
+```python
+class Article(ExtractionModel):
+    title: str = Field(selector='h1', description='Title')
+    published: str = Field(
+        selector='time.date',
+        attribute='datetime',
+        description='ISO publication date',
+    )
+    image_url: str = Field(
+        selector='.hero img',
+        attribute='src',
+        description='Hero image URL',
+    )
+    link: str = Field(
+        selector='a.source',
+        attribute='href',
+        description='Source link',
+    )
+    image_id: str = Field(
+        selector='.hero img',
+        attribute='data-id',
+        description='Custom data attribute',
+    )
+```
+
+任何 HTML 属性都可以使用，包括 `data-*`、`aria-*`、`href`、`src`、`alt` 和自定义属性。
+
+## 转换函数
+
+`transform` 参数接受一个 callable，它接收来自 DOM 的原始字符串并返回所需类型。这是您将字符串转换为数字、解析日期或清理格式的地方。
+
+```python
+from datetime import datetime
+
+def parse_price(raw: str) -> float:
+    return float(raw.replace('R$', '').replace('.', '').replace(',', '.').strip())
+
+def parse_date(raw: str) -> datetime:
+    return datetime.strptime(raw.strip(), '%B %d, %Y')
+
+class Product(ExtractionModel):
+    name: str = Field(selector='.name', description='Product name')
+    price: float = Field(
+        selector='.price',
+        description='Price in BRL',
+        transform=parse_price,
+    )
+    release: datetime = Field(
+        selector='.release-date',
+        description='Release date',
+        transform=parse_date,
+    )
+```
+
+转换函数在 Pydantic 验证**之前**运行，因此字段类型应与转换函数的返回值匹配。
+
+## 嵌套模型
+
+当字段的类型是另一个 `ExtractionModel` 时，引擎使用该字段的选择器找到作用域元素，然后在该作用域内提取嵌套模型的字段。
+
+```python
+class Author(ExtractionModel):
+    name: str = Field(selector='.name', description='Author name')
+    avatar: str = Field(
+        selector='img.avatar',
+        attribute='src',
+        description='Avatar URL',
+    )
+    bio: str = Field(selector='.bio', description='Short bio')
+
+class Article(ExtractionModel):
+    title: str = Field(selector='h1', description='Title')
+    author: Author = Field(
+        selector='.author-card',
+        description='Author information',
+    )
+```
+
+`.author-card` 选择器定义了作用域。`Author` 的字段（`.name`、`img.avatar`、`.bio`）在该元素**内部**解析，而非从整个页面解析。这可以防止当页面在不同区域有多个 `.name` 元素时发生选择器冲突。
+
+### 嵌套模型列表
+
+您还可以提取嵌套模型的列表：
+
+```python
+class Contributor(ExtractionModel):
+    name: str = Field(selector='.name', description='Contributor name')
+    role: str = Field(selector='.role', description='Role')
+
+class Project(ExtractionModel):
+    title: str = Field(selector='h1', description='Project title')
+    contributors: list[Contributor] = Field(
+        selector='.contributor',
+        description='Project contributors',
+    )
+```
+
+每个 `.contributor` 元素成为一个 `Contributor` 实例的作用域。
+
+## 可选字段和默认值
+
+可能不会出现在每个页面上的字段应使用 `Optional` 和 `default`：
+
+```python
+from typing import Optional
+
+class Article(ExtractionModel):
+    title: str = Field(selector='h1', description='Title')
+    subtitle: Optional[str] = Field(
+        selector='.subtitle',
+        description='Optional subtitle',
+        default=None,
+    )
+    category: str = Field(
+        selector='.category',
+        description='Category with fallback',
+        default='uncategorized',
+    )
+```
+
+当未找到元素时：
+
+- **有**默认值的字段会静默使用该默认值。
+- **没有**默认值的字段（必填）会抛出 `FieldExtractionFailed`。
+
+`typing.Optional[str]` 和 PEP 604 语法 `str | None` 都受支持。
+
+## 超时和等待
+
+`timeout` 参数控制引擎等待元素出现的时间，单位为秒。它会传播到每个内部查询，包括嵌套模型和列表字段。
+
+```python
+# Wait up to 10 seconds for elements to appear
+article = await tab.extract(Article, timeout=10)
+
+# No waiting (default), elements must already be in the DOM
+article = await tab.extract(Article)
+
+# Also works with extract_all
+quotes = await tab.extract_all(Quote, scope='.quote', timeout=5)
+```
+
+这使用与 `tab.query(timeout=...)` 相同的轮询机制，因此在导航和提取之间不需要手动调用 `asyncio.sleep()`。
+
+## 限定范围提取
+
+`scope` 参数将提取限制在页面的特定区域：
+
+```python
+# Extract only from the main article, ignoring sidebar/footer
+article = await tab.extract(Article, scope='#main-article')
+
+# extract_all requires scope (it defines the repeating container)
+quotes = await tab.extract_all(Quote, scope='.quote')
+```
+
+## XPath 选择器
+
+XPath 表达式会自动检测（以 `/` 或 `./` 开头），并且在 CSS 选择器适用的所有地方都可以使用：
+
+```python
+class SearchResult(ExtractionModel):
+    title: str = Field(
+        selector='//h3[@class="title"]',
+        description='Result title via XPath',
+    )
+    url: str = Field(
+        selector='.//a',
+        attribute='href',
+        description='Result URL',
+    )
+```
+
+## 错误处理
+
+提取引擎会抛出特定的异常，您可以捕获和处理：
+
+```python
+from pydoll.extractor import FieldExtractionFailed, InvalidExtractionModel
+
+# InvalidExtractionModel: raised at model definition time
+# when a Field has neither selector nor description
+try:
+    class BadModel(ExtractionModel):
+        field: str = Field()  # no selector, no description
+except InvalidExtractionModel:
+    print('Invalid model definition')
+
+# FieldExtractionFailed: raised at extraction time
+# when a required field's element is not found
+try:
+    result = await tab.extract(MyModel)
+except FieldExtractionFailed as e:
+    print(f'Extraction failed: {e}')
+```
+
+对于可选字段，提取失败会被静默处理并使用默认值。只有必填字段（没有 `default` 的字段）会抛出异常。
+
+## Pydantic 集成
+
+`ExtractionModel` 继承自 `pydantic.BaseModel`，因此所有 Pydantic 功能都可以直接使用：
+
+```python
+article = await tab.extract(Article)
+
+# Serialization
+article.model_dump()          # dict
+article.model_dump_json()     # JSON string
+
+# JSON Schema (useful for API docs or LLM prompts)
+Article.model_json_schema()
+
+# Validation happens automatically
+# If a transform returns the wrong type, Pydantic raises ValidationError
+```
+
+您可以在模型中使用任何 Pydantic 功能：验证器、字段别名、模型配置等。提取引擎在 Pydantic 行为之上添加了选择器/转换层，不会干扰 Pydantic 的行为。
+
+## 完整示例
+
+以下是一个完整的、可运行的示例，从 [quotes.toscrape.com](https://quotes.toscrape.com) 提取名言：
+
+```python
+import asyncio
+from pydoll.browser.chromium import Chrome
+from pydoll.extractor import ExtractionModel, Field
+
+class Quote(ExtractionModel):
+    text: str = Field(selector='.text', description='The quote text')
+    author: str = Field(selector='.author', description='Who said the quote')
+    tags: list[str] = Field(selector='.tag', description='Associated tags')
+
+async def main():
+    async with Chrome() as browser:
+        tab = await browser.start()
+        await tab.go_to('https://quotes.toscrape.com')
+
+        quotes = await tab.extract_all(Quote, scope='.quote', timeout=5)
+
+        print(f'Extracted {len(quotes)} quotes\n')
+        for q in quotes:
+            print(f'"{q.text}"')
+            print(f'  by {q.author} | tags: {", ".join(q.tags)}\n')
+
+        # Pydantic serialization
+        for q in quotes:
+            print(q.model_dump_json())
+
+asyncio.run(main())
+```
diff --git a/docs/zh/features/index.md b/docs/zh/features/index.md
index 1320bc3d..23ebac32 100644
--- a/docs/zh/features/index.md
+++ b/docs/zh/features/index.md
@@ -18,6 +18,12 @@
 
 **[元素查找](element-finding.md)**：掌握 Pydoll 的元素定位策略，从使用自然 HTML 属性的直观 `find()` 方法，到用于 CSS 选择器和 XPath 的强大 `query()` 方法。您还将学习 DOM 遍历辅助工具，让您高效地导航页面结构。
 
+## 数据提取
+
+将网页转化为结构化 Python 对象，具备类型化模型、自动验证和 Pydantic 序列化。
+
+**[结构化提取](extraction/structured-extraction.md)**：定义带有 CSS/XPath 选择器的 Pydantic 模型，调用 `tab.extract()`，获取完全类型化的对象。支持嵌套模型、列表字段、属性提取、自定义转换函数、带默认值的可选字段和可配置超时。无需逐元素手动查询。
+
 ## 自动化能力
 
 这些功能使您的自动化栩栩如生：模拟用户交互、键盘控制、处理文件操作、使用 iframe 以及捕获视觉内容。
diff --git a/mkdocs.yml b/mkdocs.yml
index c2bef711..04e72e48 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -12,6 +12,8 @@ nav:
       - Overview: features/index.md
       - Core Concepts: features/core-concepts.md
       - Element Finding: features/element-finding.md
+      - Data Extraction:
+          - Structured Extraction: features/extraction/structured-extraction.md
       - Automation:
           - Human-Like Interactions: features/automation/human-interactions.md
           - Keyboard Control: features/automation/keyboard-control.md

From 16eb0efb37b681fce45ede3b9428290e545acce1 Mon Sep 17 00:00:00 2001
From: Thalison Fernandes <thalissfernandes99@gmail.com>
Date: Sun, 22 Mar 2026 17:57:53 -0300
Subject: [PATCH 19/21] fix(extractor): correct coroutine type annotation for
 mypy

---
 pydoll/extractor/engine.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/pydoll/extractor/engine.py b/pydoll/extractor/engine.py
index cd71394a..9b957675 100644
--- a/pydoll/extractor/engine.py
+++ b/pydoll/extractor/engine.py
@@ -5,6 +5,7 @@
 import asyncio
 import logging
 import types
+from collections.abc import Coroutine
 from typing import TYPE_CHECKING, Optional, TypeVar, Union, get_args, get_origin
 
 from pydoll.elements.mixins.find_elements_mixin import FindElementsMixin
@@ -119,7 +120,9 @@ async def _extract_fields(
             Dictionary of field name -> extracted value.
         """
         field_names: list[str] = []
-        tasks: list[asyncio.Task[Union[str, int, float, bool, list[str], object]]] = []
+        coroutines: list[
+            Coroutine[None, None, Union[str, int, float, bool, list[str], object]]
+        ] = []
 
         for name, metadata in model.get_extraction_fields().items():
             if not metadata.has_selector:
@@ -132,9 +135,9 @@ async def _extract_fields(
                 continue
 
             field_names.append(name)
-            tasks.append(self._extract_field(metadata, annotation, context, timeout))
+            coroutines.append(self._extract_field(metadata, annotation, context, timeout))
 
-        results = await asyncio.gather(*tasks, return_exceptions=True)
+        results = await asyncio.gather(*coroutines, return_exceptions=True)
 
         values: dict[str, Union[str, int, float, bool, list[str], object]] = {}
         for name, result in zip(field_names, results):

From b4408b4403c31d0d5967971fcb5ada8a80e51b55 Mon Sep 17 00:00:00 2001
From: Thalison Fernandes <thalissfernandes99@gmail.com>
Date: Sun, 22 Mar 2026 18:28:08 -0300
Subject: [PATCH 20/21] fix(test): filter only DeprecationWarning in interval
 deprecated test

---
 tests/test_interactions/test_keyboard.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/test_interactions/test_keyboard.py b/tests/test_interactions/test_keyboard.py
index 821b8561..d36e96e0 100644
--- a/tests/test_interactions/test_keyboard.py
+++ b/tests/test_interactions/test_keyboard.py
@@ -519,12 +519,14 @@ async def test_type_text_interval_deprecated_warning(self, keyboard_api):
         import warnings
 
         with warnings.catch_warnings(record=True) as w:
-            warnings.simplefilter("always")
+            warnings.simplefilter("always", DeprecationWarning)
             await keyboard_api.type_text("a", interval=0.1)
 
-            assert len(w) == 1
-            assert issubclass(w[0].category, DeprecationWarning)
-            assert "interval" in str(w[0].message)
+            deprecation_warnings = [
+                x for x in w if issubclass(x.category, DeprecationWarning)
+            ]
+            assert len(deprecation_warnings) == 1
+            assert "interval" in str(deprecation_warnings[0].message)
 
     @pytest.mark.asyncio
     async def test_type_char_calls_focus(self, keyboard_api, mock_tab):

From 597a914622f0f569f898ce8abd09812f10cc83a1 Mon Sep 17 00:00:00 2001
From: Thalison Fernandes <thalissfernandes99@gmail.com>
Date: Sun, 22 Mar 2026 18:32:49 -0300
Subject: [PATCH 21/21] refactor(extractor): parallelize list field extraction
 with asyncio.gather

---
 pydoll/extractor/engine.py | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/pydoll/extractor/engine.py b/pydoll/extractor/engine.py
index 9b957675..0d33d86a 100644
--- a/pydoll/extractor/engine.py
+++ b/pydoll/extractor/engine.py
@@ -204,17 +204,13 @@ async def _extract_list_field(
         inner_type = _get_inner_type(annotation)
 
         if _is_extraction_model(inner_type):
-            results: list[Union[str, int, float, bool, object]] = []
-            for element in elements:
-                field_values = await self._extract_fields(inner_type, element, timeout)
-                results.append(_build_instance(inner_type, field_values))
-            return results
-
-        scalar_values: list[Union[str, int, float, bool, object]] = []
-        for element in elements:
-            raw = await _extract_value(element, metadata)
-            scalar_values.append(_apply_transform(raw, metadata))
-        return scalar_values
+            all_field_values = await asyncio.gather(
+                *(self._extract_fields(inner_type, el, timeout) for el in elements)
+            )
+            return [_build_instance(inner_type, fv) for fv in all_field_values]
+
+        all_raw = await asyncio.gather(*(_extract_value(el, metadata) for el in elements))
+        return [_apply_transform(raw, metadata) for raw in all_raw]
 
     async def _extract_nested_model(
         self,