unclecode
diff --git a/‎CHANGELOG.md‎
Lines changed: 15 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 1 deletion b/‎README.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎SPONSORS.md‎
Lines changed: 1 addition & 1 deletion b/‎SPONSORS.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎crawl4ai/__init__.py‎
Lines changed: 14 additions & 2 deletions b/‎crawl4ai/__init__.py‎
Lines changed: 14 additions & 2 deletions
diff --git a/‎crawl4ai/__version__.py‎
Lines changed: 1 addition & 1 deletion b/‎crawl4ai/__version__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎crawl4ai/async_configs.py‎
Lines changed: 80 additions & 3 deletions b/‎crawl4ai/async_configs.py‎
Lines changed: 80 additions & 3 deletions
@@ -21,6 +21,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Added
+- **Flexible LLM Provider Configuration** (Docker): 
+  - Support for `LLM_PROVIDER` environment variable to override default provider
+  - Per-request provider override via optional `provider` parameter in API endpoints
+  - Automatic provider validation with clear error messages
+  - Updated Docker documentation and examples
+
+### Changed
+- **WebScrapingStrategy Refactoring**: Simplified content scraping architecture
+  - `WebScrapingStrategy` is now an alias for `LXMLWebScrapingStrategy` for backward compatibility
+  - Removed redundant BeautifulSoup-based implementation (~1000 lines of code)
+  - `LXMLWebScrapingStrategy` now inherits directly from `ContentScrapingStrategy`
+  - All existing code using `WebScrapingStrategy` continues to work without modification
+  - Default scraping strategy remains `LXMLWebScrapingStrategy` for optimal performance
+
 ### Added
 - **AsyncUrlSeeder**: High-performance URL discovery system for intelligent crawling at scale
   - Discover URLs from sitemaps and Common Crawl index
 
@@ -121,7 +121,8 @@ Crawl4AI is the #1 trending open-source web crawler on GitHub. Your support keep
 - **🌱 Believer ($5/mo)** — Join the movement for data democratization  
 - **🚀 Builder ($50/mo)** — Priority support & early access to features  
 - **💼 Growing Team ($500/mo)** — Bi-weekly syncs & optimization help  
-- **🏢 Data Infrastructure Partner ($2000/mo)** — Full partnership with dedicated support
+- **🏢 Data Infrastructure Partner ($2000/mo)** — Full partnership with dedicated support  
+  *Custom arrangements available - see [SPONSORS.md](SPONSORS.md) for details & contact*
 
 **Why sponsor?**  
 No rate-limited APIs. No lock-in. Build and own your data pipeline with direct guidance from the creator of Crawl4AI.
 
@@ -58,7 +58,7 @@ Crawl4AI is the #1 trending open-source web crawler. We're building the future o
 
 Building data extraction at scale? Need dedicated support or infrastructure? Let's talk about a custom partnership.
 
-📧 Contact: [[email protected]](mailto:[email protected])
+📧 Contact: [[email protected]](mailto:[email protected]) | 📅 [Schedule a call](https://calendar.app.google/rEpvi2UBgUQjWHfJ9)
 
 ---
 
 
@@ -3,12 +3,12 @@
 
 from .async_webcrawler import AsyncWebCrawler, CacheMode
 # MODIFIED: Add SeedingConfig and VirtualScrollConfig here
-from .async_configs import BrowserConfig, CrawlerRunConfig, HTTPCrawlerConfig, LLMConfig, ProxyConfig, GeolocationConfig, SeedingConfig, VirtualScrollConfig, LinkPreviewConfig
+from .async_configs import BrowserConfig, CrawlerRunConfig, HTTPCrawlerConfig, LLMConfig, ProxyConfig, GeolocationConfig, SeedingConfig, VirtualScrollConfig, LinkPreviewConfig, MatchMode
 
 from .content_scraping_strategy import (
     ContentScrapingStrategy,
-    WebScrapingStrategy,
     LXMLWebScrapingStrategy,
+    WebScrapingStrategy,  # Backward compatibility alias
 )
 from .async_logger import (
     AsyncLoggerBase,
@@ -88,6 +88,13 @@
     ErrorDetail
 )
 
+# Browser Adapters
+from .browser_adapter import (
+    BrowserAdapter,
+    PlaywrightAdapter,
+    UndetectedAdapter
+)
+
 from .utils import (
     start_colab_display_server,
     setup_colab_environment
@@ -132,6 +139,7 @@
     "CrawlResult",
     "CrawlerHub",
     "CacheMode",
+    "MatchMode",
     "ContentScrapingStrategy",
     "WebScrapingStrategy",
     "LXMLWebScrapingStrategy",
@@ -173,6 +181,10 @@
     "CompilationResult",
     "ValidationResult",
     "ErrorDetail",
+    # Browser Adapters
+    "BrowserAdapter",
+    "PlaywrightAdapter", 
+    "UndetectedAdapter",
     "LinkPreviewConfig"
 ]
 
 
@@ -1,7 +1,7 @@
 # crawl4ai/__version__.py
 
 # This is the version that will be used for stable releases
-__version__ = "0.7.2"
+__version__ = "0.7.3"
 
 # For nightly builds, this gets set during build process
 __nightly_version__ = None
 
@@ -18,17 +18,24 @@
 from .chunking_strategy import ChunkingStrategy, RegexChunking
 
 from .markdown_generation_strategy import MarkdownGenerationStrategy, DefaultMarkdownGenerator
-from .content_scraping_strategy import ContentScrapingStrategy, WebScrapingStrategy, LXMLWebScrapingStrategy
+from .content_scraping_strategy import ContentScrapingStrategy, LXMLWebScrapingStrategy
 from .deep_crawling import DeepCrawlStrategy
 
 from .cache_context import CacheMode
 from .proxy_strategy import ProxyRotationStrategy
 
-from typing import Union, List
+from typing import Union, List, Callable
 import inspect
 from typing import Any, Dict, Optional
 from enum import Enum
 
+# Type alias for URL matching
+UrlMatcher = Union[str, Callable[[str], bool], List[Union[str, Callable[[str], bool]]]]
+
+class MatchMode(Enum):
+    OR = "or"
+    AND = "and"
+
 # from .proxy_strategy import ProxyConfig
 
 
@@ -383,6 +390,8 @@ class BrowserConfig:
         light_mode (bool): Disables certain background features for performance gains. Default: False.
         extra_args (list): Additional command-line arguments passed to the browser.
                            Default: [].
+        enable_stealth (bool): If True, applies playwright-stealth to bypass basic bot detection.
+                              Cannot be used with use_undetected browser mode. Default: False.
     """
 
     def __init__(
@@ -423,6 +432,7 @@ def __init__(
         extra_args: list = None,
         debugging_port: int = 9222,
         host: str = "localhost",
+        enable_stealth: bool = False,
     ):
         self.browser_type = browser_type
         self.headless = headless 
@@ -463,6 +473,7 @@ def __init__(
         self.verbose = verbose
         self.debugging_port = debugging_port
         self.host = host
+        self.enable_stealth = enable_stealth
 
         fa_user_agenr_generator = ValidUAGenerator()
         if self.user_agent_mode == "random":
@@ -494,6 +505,13 @@ def __init__(
         # If persistent context is requested, ensure managed browser is enabled
         if self.use_persistent_context:
             self.use_managed_browser = True
+            
+        # Validate stealth configuration
+        if self.enable_stealth and self.use_managed_browser and self.browser_mode == "builtin":
+            raise ValueError(
+                "enable_stealth cannot be used with browser_mode='builtin'. "
+                "Stealth mode requires a dedicated browser instance."
+            )
 
     @staticmethod
     def from_kwargs(kwargs: dict) -> "BrowserConfig":
@@ -530,6 +548,7 @@ def from_kwargs(kwargs: dict) -> "BrowserConfig":
             extra_args=kwargs.get("extra_args", []),
             debugging_port=kwargs.get("debugging_port", 9222),
             host=kwargs.get("host", "localhost"),
+            enable_stealth=kwargs.get("enable_stealth", False),
         )
 
     def to_dict(self):
@@ -564,6 +583,7 @@ def to_dict(self):
             "verbose": self.verbose,
             "debugging_port": self.debugging_port,
             "host": self.host,
+            "enable_stealth": self.enable_stealth,
         }
 
 
@@ -862,7 +882,7 @@ class CrawlerRunConfig():
         parser_type (str): Type of parser to use for HTML parsing.
                            Default: "lxml".
         scraping_strategy (ContentScrapingStrategy): Scraping strategy to use.
-                           Default: WebScrapingStrategy.
+                           Default: LXMLWebScrapingStrategy.
         proxy_config (ProxyConfig or dict or None): Detailed proxy configuration, e.g. {"server": "...", "username": "..."}.
                                      If None, no additional proxy config. Default: None.
 
@@ -1113,6 +1133,9 @@ def __init__(
         link_preview_config: Union[LinkPreviewConfig, Dict[str, Any]] = None,
         # Virtual Scroll Parameters
         virtual_scroll_config: Union[VirtualScrollConfig, Dict[str, Any]] = None,
+        # URL Matching Parameters
+        url_matcher: Optional[UrlMatcher] = None,
+        match_mode: MatchMode = MatchMode.OR,
         # Experimental Parameters
         experimental: Dict[str, Any] = None,
     ):
@@ -1266,6 +1289,10 @@ def __init__(
         else:
             raise ValueError("virtual_scroll_config must be VirtualScrollConfig object or dict")
 
+        # URL Matching Parameters
+        self.url_matcher = url_matcher
+        self.match_mode = match_mode
+        
         # Experimental Parameters
         self.experimental = experimental or {}
 
@@ -1321,6 +1348,51 @@ def _compile_c4a_script(self):
             if "compilation error" not in str(e).lower():
                 raise ValueError(f"Failed to compile C4A script: {str(e)}")
             raise
+    
+    def is_match(self, url: str) -> bool:
+        """Check if this config matches the given URL.
+        
+        Args:
+            url: The URL to check against this config's matcher
+            
+        Returns:
+            bool: True if this config should be used for the URL or if no matcher is set.
+        """
+        if self.url_matcher is None:
+            return True
+            
+        if callable(self.url_matcher):
+            # Single function matcher
+            return self.url_matcher(url)
+        
+        elif isinstance(self.url_matcher, str):
+            # Single pattern string
+            from fnmatch import fnmatch
+            return fnmatch(url, self.url_matcher)
+        
+        elif isinstance(self.url_matcher, list):
+            # List of mixed matchers
+            if not self.url_matcher:  # Empty list
+                return False
+                
+            results = []
+            for matcher in self.url_matcher:
+                if callable(matcher):
+                    results.append(matcher(url))
+                elif isinstance(matcher, str):
+                    from fnmatch import fnmatch
+                    results.append(fnmatch(url, matcher))
+                else:
+                    # Skip invalid matchers
+                    continue
+            
+            # Apply match mode logic
+            if self.match_mode == MatchMode.OR:
+                return any(results) if results else False
+            else:  # AND mode
+                return all(results) if results else False
+        
+        return False
 
 
     def __getattr__(self, name):
@@ -1443,6 +1515,9 @@ def from_kwargs(kwargs: dict) -> "CrawlerRunConfig":
             # Link Extraction Parameters
             link_preview_config=kwargs.get("link_preview_config"),
             url=kwargs.get("url"),
+            # URL Matching Parameters
+            url_matcher=kwargs.get("url_matcher"),
+            match_mode=kwargs.get("match_mode", MatchMode.OR),
             # Experimental Parameters 
             experimental=kwargs.get("experimental"),
         )
@@ -1540,6 +1615,8 @@ def to_dict(self):
             "deep_crawl_strategy": self.deep_crawl_strategy,
             "link_preview_config": self.link_preview_config.to_dict() if self.link_preview_config else None,
             "url": self.url,
+            "url_matcher": self.url_matcher,
+            "match_mode": self.match_mode,
             "experimental": self.experimental,
         }