unclecode · chansearrington · Jan 9, 2026 · Jan 9, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,13 +8,29 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
 
 ### Added
+- **🔐 Claude Code Provider**: New LLM provider using Claude Code CLI authentication
+  - Enables LLM extraction using existing Claude Code subscriptions without API keys
+  - Integrated with LiteLLM as custom provider via `claude-code/` prefix
+  - Supports models: `claude-sonnet-4-20250514`, `claude-opus-4-20250514`, `claude-haiku-3-5-latest`
+  - Async and sync completion with automatic event loop handling
+  - New optional dependency: `pip install crawl4ai[claude-code]`
+  - Provider: `crawl4ai/providers/claude_code_provider.py`
+  - Example: `examples/claude_code_extraction.py`
+  - Contributor: [@chansearrington](https://github.com/chansearrington)
+
 - **🔒 HTTPS Preservation for Internal Links**: New `preserve_https_for_internal_links` configuration flag
   - Maintains HTTPS scheme for internal links even when servers redirect to HTTP
   - Prevents security downgrades during deep crawling
   - Useful for security-conscious crawling and sites supporting both protocols
   - Fully backward compatible with opt-in flag (default: `False`)
   - Fixes issue #1410 where HTTPS URLs were being downgraded to HTTP
 
+### Improved
+- **📊 LLM Verbose Logging**: Now logs provider and model name when `verbose=True`
+  - Shows which LLM provider/model is being used for each extraction
+  - Helps debug configuration issues and track usage across providers
+  - Example output: `[LOG] LLM Provider: claude-code | Model: claude-sonnet-4-20250514`
+
 ## [0.7.3] - 2025-08-09
 
 ### Added

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -0,0 +1,96 @@
+# Contributing to Crawl4AI
+
+Thank you for your interest in contributing to Crawl4AI!
+
+## Getting Started
+
+1. Fork the repository
+2. Clone your fork
+3. Install in development mode: `pip install -e ".[all]"`
+4. Run tests: `pytest tests/`
+
+## Contributing New LLM Providers
+
+### 1. Create Provider Class
+
+Create `crawl4ai/providers/your_provider.py`:
+
+```python
+from litellm import CustomLLM
+from litellm.types.utils import ModelResponse
+
+class YourProvider(CustomLLM):
+    async def acompletion(self, model, messages, **kwargs) -> ModelResponse:
+        # Implement async completion
+        pass
+
+    def completion(self, model, messages, **kwargs) -> ModelResponse:
+        # Implement sync completion
+        pass
+```
+
+### 2. Register Provider
+
+Update `crawl4ai/providers/__init__.py` to register your provider:
+
+```python
+from .your_provider import YourProvider
+
+def register_your_provider():
+    import litellm
+    your_provider = YourProvider()
+    litellm.custom_provider_map = [
+        {"provider": "your-provider", "custom_handler": your_provider}
+    ]
+```
+
+### 3. Add Optional Dependency
+
+Update `pyproject.toml` with your SDK dependency:
+
+```toml
+[project.optional-dependencies]
+your-provider = ["your-sdk>=1.0.0"]
+```
+
+### 4. Write Tests
+
+Add tests at:
+- `tests/unit/test_your_provider.py` - Unit tests for the provider
+- `tests/integration/test_your_integration.py` - Integration tests
+
+### 5. Add Documentation
+
+- Update `docs/md_v2/extraction/llm-strategies.md` with a provider example
+- Consider creating a dedicated doc page at `docs/md_v2/extraction/your-provider.md`
+
+## Pull Request Process
+
+1. Ensure all tests pass: `pytest tests/`
+2. Update documentation as needed
+3. Add a CHANGELOG.md entry under `[Unreleased]`
+4. Submit PR against the `main` branch
+
+## Code Style
+
+- Follow PEP 8
+- Use type hints where possible
+- Add docstrings to public functions and classes
+- Keep functions focused and testable
+
+## Testing
+
+```bash
+# Run all tests
+pytest tests/
+
+# Run specific test file
+pytest tests/unit/test_your_provider.py -v
+
+# Run with coverage
+pytest tests/ --cov=crawl4ai
+```
+
+## Questions?
+
+Open an issue for discussion before starting major changes.
diff --git a/README.md b/README.md
@@ -293,6 +293,7 @@ pip install -e ".[torch]"           # With PyTorch features
 pip install -e ".[transformer]"     # With Transformer features
 pip install -e ".[cosine]"          # With cosine similarity features
 pip install -e ".[sync]"            # With synchronous crawling (Selenium)
+pip install -e ".[claude-code]"     # With Claude Code provider (no API keys needed)
 pip install -e ".[all]"             # Install all optional features
 ```
 
@@ -517,6 +518,52 @@ if __name__ == "__main__":
 
 </details>
 
+<details>
+<summary>🔐 <strong>Using Claude Code Provider (No API Keys Required)</strong></summary>
+
+Use your Claude Code CLI subscription for LLM extraction without managing API keys:
+
+```python
+import asyncio
+from crawl4ai import AsyncWebCrawler, LLMConfig, CrawlerRunConfig
+from crawl4ai import LLMExtractionStrategy
+
+async def main():
+    # Claude Code uses local CLI authentication - no API key needed!
+    llm_config = LLMConfig(provider="claude-code/claude-sonnet-4-20250514")
+
+    strategy = LLMExtractionStrategy(
+        llm_config=llm_config,
+        instruction="Extract all product names and prices as JSON"
+    )
+
+    config = CrawlerRunConfig(extraction_strategy=strategy)
+
+    async with AsyncWebCrawler() as crawler:
+        result = await crawler.arun(url="https://example.com", config=config)
+        print(result.extracted_content)
+
+asyncio.run(main())
+```
+
+**Installation:**
+```bash
+pip install crawl4ai[claude-code]
+```
+
+**Prerequisites:**
+- Claude Code CLI installed: `npm install -g @anthropic-ai/claude-code`
+- CLI authenticated: run `claude login`
+
+**Supported Models:**
+| Model | Provider String | Use Case |
+|-------|-----------------|----------|
+| Sonnet 4 | `claude-code/claude-sonnet-4-20250514` | Balanced (recommended) |
+| Opus 4 | `claude-code/claude-opus-4-20250514` | Most capable |
+| Haiku 3.5 | `claude-code/claude-haiku-3-5-latest` | Fastest |
+
+</details>
+
 <details>
 <summary>🤖 <strong>Using Your own Browser with Custom User Profile</strong></summary>
 

diff --git a/crawl4ai/__init__.py b/crawl4ai/__init__.py
@@ -1,6 +1,10 @@
 # __init__.py
 import warnings
 
+# Register custom LLM providers with LiteLLM
+from .providers import register_custom_providers
+register_custom_providers()
+
 from .async_webcrawler import AsyncWebCrawler, CacheMode
 # MODIFIED: Add SeedingConfig and VirtualScrollConfig here
 from .async_configs import BrowserConfig, CrawlerRunConfig, HTTPCrawlerConfig, LLMConfig, ProxyConfig, GeolocationConfig, SeedingConfig, VirtualScrollConfig, LinkPreviewConfig, MatchMode

diff --git a/crawl4ai/config.py b/crawl4ai/config.py
@@ -36,6 +36,7 @@
     "anthropic": os.getenv("ANTHROPIC_API_KEY"),
     "gemini": os.getenv("GEMINI_API_KEY"),
     "deepseek": os.getenv("DEEPSEEK_API_KEY"),
+    "claude-code": "no-token-needed",  # Uses local Claude Code CLI auth
 }
 
 # Chunk token threshold

diff --git a/crawl4ai/extraction_strategy.py b/crawl4ai/extraction_strategy.py
@@ -616,7 +616,12 @@ def extract(self, url: str, ix: int, html: str) -> List[Dict[str, Any]]:
             A list of extracted blocks or chunks.
         """
         if self.verbose:
-            # print("[LOG] Extracting blocks from URL:", url)
+            # Log which LLM provider/model is being used
+            provider_str = self.llm_config.provider if self.llm_config else "unknown"
+            provider_parts = provider_str.split('/')
+            provider_name = provider_parts[0] if provider_parts else "unknown"
+            model_name = provider_parts[1] if len(provider_parts) > 1 else provider_str
+            print(f"[LOG] LLM Provider: {provider_name} | Model: {model_name}")
             print(f"[LOG] Call LLM for {url} - block index: {ix}")
 
         variable_values = {
@@ -817,6 +822,12 @@ async def aextract(self, url: str, ix: int, html: str) -> List[Dict[str, Any]]:
         from .utils import aperform_completion_with_backoff
 
         if self.verbose:
+            # Log which LLM provider/model is being used
+            provider_str = self.llm_config.provider if self.llm_config else "unknown"
+            provider_parts = provider_str.split('/')
+            provider_name = provider_parts[0] if provider_parts else "unknown"
+            model_name = provider_parts[1] if len(provider_parts) > 1 else provider_str
+            print(f"[LOG] LLM Provider: {provider_name} | Model: {model_name}")
             print(f"[LOG] Call LLM for {url} - block index: {ix}")
 
         variable_values = {

diff --git a/crawl4ai/providers/__init__.py b/crawl4ai/providers/__init__.py
@@ -0,0 +1,47 @@
+"""
+Custom LLM providers for Crawl4AI.
+
+This module provides custom LLM provider integrations beyond what LiteLLM
+offers out of the box.
+"""
+
+_providers_registered = False
+
+
+def register_custom_providers():
+    """
+    Register custom LLM providers with LiteLLM.
+
+    This function registers all custom providers defined in this package
+    with LiteLLM's custom_provider_map. It is idempotent - calling it
+    multiple times has no additional effect.
+
+    Currently registered providers:
+    - claude-code: Uses Claude Code CLI for LLM completions (requires local auth)
+    """
+    global _providers_registered
+    if _providers_registered:
+        return
+
+    import litellm
+
+    # Initialize custom_provider_map if it doesn't exist
+    if litellm.custom_provider_map is None:
+        litellm.custom_provider_map = []
+
+    # Try to register Claude Code provider (optional dependency)
+    try:
+        from .claude_code_provider import ClaudeCodeProvider
+
+        # Check if already registered
+        existing_providers = [p.get("provider") for p in litellm.custom_provider_map]
+        if "claude-code" not in existing_providers:
+            litellm.custom_provider_map.append({
+                "provider": "claude-code",
+                "custom_handler": ClaudeCodeProvider()
+            })
+    except ImportError:
+        # claude-agent-sdk not installed, skip registration
+        pass
+
+    _providers_registered = True