fast-crawler
diff --git a/‎fastcrawler/core/app.py‎
Lines changed: 6 additions & 3 deletions b/‎fastcrawler/core/app.py‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎fastcrawler/exceptions.py‎
Lines changed: 14 additions & 0 deletions b/‎fastcrawler/exceptions.py‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎fastcrawler/parsers/__init__.py‎
Lines changed: 10 additions & 3 deletions b/‎fastcrawler/parsers/__init__.py‎
Lines changed: 10 additions & 3 deletions
diff --git a/‎fastcrawler/parsers/base.py‎
Lines changed: 10 additions & 7 deletions b/‎fastcrawler/parsers/base.py‎
Lines changed: 10 additions & 7 deletions
diff --git a/‎fastcrawler/parsers/html.py‎
Lines changed: 19 additions & 23 deletions b/‎fastcrawler/parsers/html.py‎
Lines changed: 19 additions & 23 deletions
diff --git a/‎fastcrawler/parsers/json.py‎
Lines changed: 8 additions & 6 deletions b/‎fastcrawler/parsers/json.py‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎fastcrawler/parsers/processors/base.py‎
Lines changed: 7 additions & 5 deletions b/‎fastcrawler/parsers/processors/base.py‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎fastcrawler/parsers/processors/lxml.py‎
Lines changed: 9 additions & 10 deletions b/‎fastcrawler/parsers/processors/lxml.py‎
Lines changed: 9 additions & 10 deletions
diff --git a/‎fastcrawler/parsers/processors/modest.py‎
Lines changed: 37 additions & 0 deletions b/‎fastcrawler/parsers/processors/modest.py‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎fastcrawler/parsers/pydantic.py‎
Lines changed: 15 additions & 5 deletions b/‎fastcrawler/parsers/pydantic.py‎
Lines changed: 15 additions & 5 deletions
@@ -6,7 +6,7 @@
 
 
 class FastCrawler:
-    """ The client interface to start all crawlers.
+    """The client interface to start all crawlers.
     Initilize all crawlers
 
 
@@ -18,12 +18,15 @@ class FastCrawler:
         app.start()
 
     """
+
     crawlers: List[Crawler]
 
     def __init__(self, crawlers: List[Crawler] | Crawler):
-        """ Initilize FastCrawler with defined crawlers"""
+        """Initilize FastCrawler with defined crawlers"""
         if isinstance(crawlers, Crawler):
-            self.crawlers = [crawlers, ]
+            self.crawlers = [
+                crawlers,
+            ]
         else:
             self.crawlers = crawlers
 
 
@@ -4,6 +4,7 @@ class BaseModelError(Exception):
 
 class NoCrawlerFoundError(BaseModelError):
     """No crawler is found in starting application"""
+
     def __init__(self):
         super().__init__(self, self.__doc__)
 
@@ -23,3 +24,16 @@ def __init__(self, model):
             "\nfrom fastcrawler import BaseModel"
         )
         super().__init__(self.message)
+
+
+class ProcessorNotSupported(BaseModelError):
+    def __init__(self, model):
+        self.model = model
+        self.message = (
+            f"The provided processor {model} is not supported.\n"
+            "To support the process, please explictly map the processor"
+            "inside the XPATH/CSS/Base selector, as a method called 'interface_mapper'"
+            "\nWe support full duck typing which means you can inject whatever"
+            "you need."
+        )
+        super().__init__(self.message)
@@ -1,16 +1,23 @@
 from .html import HTMLParser
 from .json import JsonParser
+from .processors.lxml import LxmlProcessor
+
+# from .processors.modest import ModestProcessor
 from .pydantic import BaseModel
 from .selectors.css import CSSField
-from .selectors.xpath import XPATHField
 from .selectors.regex import RegexField
+from .selectors.xpath import XPATHField
 
 __all__ = [
+    # Selectors
     "XPATHField",
     "BaseModel",
     "CSSField",
     "RegexField",
-
+    # Parsers
     "JsonParser",
-    "HTMLParser"
+    "HTMLParser",
+    # Processors
+    "ModestProcessor",
+    "LxmlProcessor",
 ]
@@ -4,10 +4,13 @@
 
 
 class ParserProtocol(Protocol):
-    def __init__(self, scraped_data: Any): ...
-    """Initilize the parser with the given data (html/json/etc)"""
-    def parse(self, model: Any) -> Any: ...
-    """
-    Parse the saved data, with given model, which should be a pydantic model
-        imported from fastcrawler library
-    """
+    def __init__(self, scraped_data: Any):
+        """Initilize the parser with the given data (html/json/etc)"""
+        ...
+
+    def parse(self, model: Any) -> Any:
+        """
+        Parse the saved data, with given model, which should be a pydantic model
+            imported from fastcrawler library
+        """
+        ...
@@ -1,18 +1,16 @@
-from typing import List, Type
+from typing import Type
 
 from pydantic import ValidationError
 from pydantic_core import Url
 
-from fastcrawler.exceptions import (ParserInvalidModelType,
-                                    ParserValidationError)
+from fastcrawler.exceptions import ParserInvalidModelType, ParserValidationError
 
-from .base import ParserProtocol
 from .pydantic import BaseModel, BaseModelType, URLs
-from .selectors.base import BaseSelector
-from .utils import get_inner_model, get_selector
+from .selectors.base import BaseSelector, get_selector
+from .utils import get_inner_model
 
 
-class HTMLParser(ParserProtocol):
+class HTMLParser:
     """
     HTMLParser first initiate the scraped data, then it parses a given HTML document
         based on the specified model. Using Pydantic model with XPATHField or CSSField.
@@ -27,6 +25,7 @@ class HTMLParser(ParserProtocol):
         # parse it later!
         html_parser.parse(a pydantic model built with XPATHField or CSSField)
     """
+
     def __init__(self, scraped_data: str):
         """
         Initiate the HTML file in memory, so it can be parsed later
@@ -40,36 +39,33 @@ def parse(self, model: Type[BaseModelType]) -> BaseModelType:
         """
         Parse using the pydantic model
         """
-        if hasattr(model, "__mro__") and BaseModel in model.__mro__:  # type: ignore
+        if issubclass(model, BaseModel):  # type: ignore
             data = {}
             for field_name, field in model.model_fields.items():
-                field_selector = get_selector(field)
-                if field_selector:
-                    data[field_name] = field_selector.resolve(
+                fastcrawler_selector = get_selector(field)
+                if fastcrawler_selector:
+                    data[field_name] = fastcrawler_selector.resolve(
                         scraped_data=self.scraped_data,
-                        model=get_inner_model(model, field_name)
+                        model=get_inner_model(
+                            model, field_name
+                        ),  # TODO: check if pydantic returns the model data type
                     )
 
             if hasattr(
-                model.Config, "url_resolver",
+                model.Config,
+                "url_resolver",
             ) and issubclass(model.Config.url_resolver.__class__, BaseSelector):
-                urls: List[Url] = model.Config.url_resolver.resolve(  # type: ignore
+                urls: list[Url] = model.Config.url_resolver.resolve(  # type: ignore
                     self.scraped_data,
-                    model=None
+                    model=None,
                 )
-                if urls:
-                    self.resolver = URLs(
-                        urls=urls
-                    )
-                else:
-                    self.resolver = URLs()
+                self.resolver = URLs(urls=urls or [])
 
             try:
                 self.data: BaseModelType | None = model.model_validate(data)
+                return self.data
             except ValidationError as error:
                 raise ParserValidationError(error.errors()) from error
 
-            return self.data
-
         else:
             raise ParserInvalidModelType(model=model)
@@ -3,14 +3,11 @@
 from pydantic import ValidationError
 from pydantic_core import Url
 
-from fastcrawler.exceptions import (ParserInvalidModelType,
-                                    ParserValidationError)
+from fastcrawler.exceptions import ParserInvalidModelType, ParserValidationError
 from fastcrawler.parsers.pydantic import BaseModel, BaseModelType, URLs
 
-from .base import ParserProtocol
 
-
-class JsonParser(ParserProtocol):
+class JsonParser:
     """
     HTMLParser first initiate the scraped data, then it parses a given HTML document
         based on the specified model. Using Pydantic model with XPATHField or CSSField.
@@ -25,6 +22,7 @@ class JsonParser(ParserProtocol):
         # parse it later!
         html_parser.parse(a pydantic model built with XPATHField or CSSField)
     """
+
     data = None
 
     def __init__(self, scraped_data: dict):
@@ -49,7 +47,11 @@ def parse(self, model: Type[BaseModelType]) -> BaseModelType:
                 current_address: dict = self.scraped_data.copy()
                 for address in model.Config.url_resolver.split("."):
                     current_address = current_address.get(address)  # type: ignore
-                self.resolver = URLs(urls=[Url(current_address), ])
+                self.resolver = URLs(
+                    urls=[
+                        Url(current_address),
+                    ]
+                )
             try:
                 self.data = model.model_validate(self.data)
             except ValidationError as error:
 
@@ -1,4 +1,4 @@
-from typing import List, Protocol
+from typing import Protocol
 
 
 class ElementInterface(Protocol):
@@ -35,16 +35,18 @@ def to_string(result: ElementInterface) -> str:
 
     @staticmethod
     def from_string_by_xpath(
-        string: str, query: str
-    ) -> List[ElementInterface] | ElementInterface | None:
+        string: str,
+        query: str,
+    ) -> list[ElementInterface] | ElementInterface | None:
         """
         Resolves a HTML string by XPATH
         """
 
     @staticmethod
     def from_string_by_css(
-        string: str, query: str
-    ) -> List[ElementInterface] | ElementInterface | None:
+        string: str,
+        query: str,
+    ) -> list[ElementInterface] | ElementInterface | None:
         """
         Resolves a HTML string by CSS
         """
@@ -1,13 +1,10 @@
 # pylint: disable=c-extension-no-member
-from typing import List
 
 from lxml import etree  # type: ignore[attr-defined]
 from lxml import html as lxml_html  # type: ignore[attr-defined]
 
-from .base import ProcessorInterface
 
-
-class LxmlProcessor(ProcessorInterface):
+class LxmlProcessor:
     base_element = etree.ElementBase
 
     @staticmethod
@@ -20,22 +17,24 @@ def to_string(result: etree.ElementBase) -> str:
 
     @staticmethod
     def from_string_by_xpath(
-        string: str, query: str
-    ) -> etree.ElementBase | List[etree.ElementBase] | None:
+        string: str,
+        query: str,
+    ) -> etree.ElementBase | list[etree.ElementBase] | None:
         """
         Resolves a HTML string by XPATH
         """
         tree = lxml_html.fromstring(string)
-        results: List[etree.ElementBase] = tree.xpath(query)
+        results: list[etree.ElementBase] = tree.xpath(query)
         return results
 
     @staticmethod
     def from_string_by_css(
-        string: str, query: str
-    ) -> etree.ElementBase | List[etree.ElementBase] | None:
+        string: str,
+        query: str,
+    ) -> etree.ElementBase | list[etree.ElementBase] | None:
         """
         Resolves a HTML string by CSS
         """
         tree = lxml_html.fromstring(string)
-        results: List[etree.ElementBase] = tree.cssselect(query)
+        results: list[etree.ElementBase] = tree.cssselect(query)
         return results
@@ -0,0 +1,37 @@
+# NOTE: This library is not matured yet to be used
+
+
+# from selectolax.parser import HTMLParser, Node
+
+# from .base import ElementInterface
+
+
+# class ModestProcessor:
+#     base_element = Node
+
+#     @staticmethod
+#     def to_string(result: Node) -> str:
+#         """
+#         Resolves a result to string, by getting the inner html,
+#         This method is used to iterate over HTML elements to resolve inner pydantic models
+#         """
+#         return result.html
+
+#     @staticmethod
+#     def from_string_by_xpath(
+#         string: str, query: str
+#     ) -> list[ElementInterface] | ElementInterface | None:
+#         """
+#         Resolves a HTML string by XPATH
+#         """
+#         raise NotImplementedError("XPATH is not supported in selectolax")
+
+#     @staticmethod
+#     def from_string_by_css(
+#         string: str, query: str
+#     ) -> list[ElementInterface] | ElementInterface | None:
+#         """
+#         Resolves a HTML string by CSS
+#         """
+#         results = HTMLParser(string).css(query)
+#         return results
@@ -1,23 +1,33 @@
-from typing import TYPE_CHECKING, List, TypeVar, Union
+from typing import TYPE_CHECKING, TypeVar, Union
 
 from pydantic import AnyUrl
 from pydantic import BaseModel as _BaseModel
 
 if TYPE_CHECKING:
-    from fastcrawler.parsers.selectors.base import \
-        BaseSelector  # pragma: no cover
+    from fastcrawler.parsers.selectors.base import BaseSelector  # pragma: no cover
+
+
+class MappedAttr(_BaseModel):
+    is_property: bool
+    attr_name: str
+
+
+class MappedResult(_BaseModel):
+    get: MappedAttr
+    text: MappedAttr
 
 
 class BaseModel(_BaseModel):
     """
     Custom basemodel created from Pydantic :)
     """
+
     class Config:
         url_resolver: Union["BaseSelector", str]
 
 
 class URLs(BaseModel):
-    urls: List[AnyUrl] = []
+    urls: list[AnyUrl] = []
 
 
-BaseModelType = TypeVar('BaseModelType', bound=BaseModel)
+BaseModelType = TypeVar("BaseModelType", bound=BaseModel)