diff --git a/CHANGELOG.md b/CHANGELOG.md
index ce63516f9..207508c1f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Added
+- **🔐 Claude Code Provider**: New LLM provider using Claude Code CLI authentication
+ - Enables LLM extraction using existing Claude Code subscriptions without API keys
+ - Integrated with LiteLLM as custom provider via `claude-code/` prefix
+ - Supports models: `claude-sonnet-4-20250514`, `claude-opus-4-20250514`, `claude-haiku-3-5-latest`
+ - Async and sync completion with automatic event loop handling
+ - New optional dependency: `pip install crawl4ai[claude-code]`
+ - Provider: `crawl4ai/providers/claude_code_provider.py`
+ - Example: `examples/claude_code_extraction.py`
+ - Contributor: [@chansearrington](https://github.com/chansearrington)
+
- **🔒 HTTPS Preservation for Internal Links**: New `preserve_https_for_internal_links` configuration flag
- Maintains HTTPS scheme for internal links even when servers redirect to HTTP
- Prevents security downgrades during deep crawling
@@ -15,6 +25,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Fully backward compatible with opt-in flag (default: `False`)
- Fixes issue #1410 where HTTPS URLs were being downgraded to HTTP
+### Improved
+- **📊 LLM Verbose Logging**: Now logs provider and model name when `verbose=True`
+ - Shows which LLM provider/model is being used for each extraction
+ - Helps debug configuration issues and track usage across providers
+ - Example output: `[LOG] LLM Provider: claude-code | Model: claude-sonnet-4-20250514`
+
## [0.7.3] - 2025-08-09
### Added
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 000000000..574b2516a
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,96 @@
+# Contributing to Crawl4AI
+
+Thank you for your interest in contributing to Crawl4AI!
+
+## Getting Started
+
+1. Fork the repository
+2. Clone your fork
+3. Install in development mode: `pip install -e ".[all]"`
+4. Run tests: `pytest tests/`
+
+## Contributing New LLM Providers
+
+### 1. Create Provider Class
+
+Create `crawl4ai/providers/your_provider.py`:
+
+```python
+from litellm import CustomLLM
+from litellm.types.utils import ModelResponse
+
+class YourProvider(CustomLLM):
+ async def acompletion(self, model, messages, **kwargs) -> ModelResponse:
+ # Implement async completion
+ pass
+
+ def completion(self, model, messages, **kwargs) -> ModelResponse:
+ # Implement sync completion
+ pass
+```
+
+### 2. Register Provider
+
+Update `crawl4ai/providers/__init__.py` to register your provider:
+
+```python
+from .your_provider import YourProvider
+
+def register_your_provider():
+ import litellm
+ your_provider = YourProvider()
+ litellm.custom_provider_map = [
+ {"provider": "your-provider", "custom_handler": your_provider}
+ ]
+```
+
+### 3. Add Optional Dependency
+
+Update `pyproject.toml` with your SDK dependency:
+
+```toml
+[project.optional-dependencies]
+your-provider = ["your-sdk>=1.0.0"]
+```
+
+### 4. Write Tests
+
+Add tests at:
+- `tests/unit/test_your_provider.py` - Unit tests for the provider
+- `tests/integration/test_your_integration.py` - Integration tests
+
+### 5. Add Documentation
+
+- Update `docs/md_v2/extraction/llm-strategies.md` with a provider example
+- Consider creating a dedicated doc page at `docs/md_v2/extraction/your-provider.md`
+
+## Pull Request Process
+
+1. Ensure all tests pass: `pytest tests/`
+2. Update documentation as needed
+3. Add a CHANGELOG.md entry under `[Unreleased]`
+4. Submit PR against the `main` branch
+
+## Code Style
+
+- Follow PEP 8
+- Use type hints where possible
+- Add docstrings to public functions and classes
+- Keep functions focused and testable
+
+## Testing
+
+```bash
+# Run all tests
+pytest tests/
+
+# Run specific test file
+pytest tests/unit/test_your_provider.py -v
+
+# Run with coverage
+pytest tests/ --cov=crawl4ai
+```
+
+## Questions?
+
+Open an issue for discussion before starting major changes.
diff --git a/README.md b/README.md
index 7535e6bd5..5c94ee95d 100644
--- a/README.md
+++ b/README.md
@@ -293,6 +293,7 @@ pip install -e ".[torch]" # With PyTorch features
pip install -e ".[transformer]" # With Transformer features
pip install -e ".[cosine]" # With cosine similarity features
pip install -e ".[sync]" # With synchronous crawling (Selenium)
+pip install -e ".[claude-code]" # With Claude Code provider (no API keys needed)
pip install -e ".[all]" # Install all optional features
```
@@ -517,6 +518,52 @@ if __name__ == "__main__":
+
+🔐 Using Claude Code Provider (No API Keys Required)
+
+Use your Claude Code CLI subscription for LLM extraction without managing API keys:
+
+```python
+import asyncio
+from crawl4ai import AsyncWebCrawler, LLMConfig, CrawlerRunConfig
+from crawl4ai import LLMExtractionStrategy
+
+async def main():
+ # Claude Code uses local CLI authentication - no API key needed!
+ llm_config = LLMConfig(provider="claude-code/claude-sonnet-4-20250514")
+
+ strategy = LLMExtractionStrategy(
+ llm_config=llm_config,
+ instruction="Extract all product names and prices as JSON"
+ )
+
+ config = CrawlerRunConfig(extraction_strategy=strategy)
+
+ async with AsyncWebCrawler() as crawler:
+ result = await crawler.arun(url="https://example.com", config=config)
+ print(result.extracted_content)
+
+asyncio.run(main())
+```
+
+**Installation:**
+```bash
+pip install crawl4ai[claude-code]
+```
+
+**Prerequisites:**
+- Claude Code CLI installed: `npm install -g @anthropic-ai/claude-code`
+- CLI authenticated: run `claude login`
+
+**Supported Models:**
+| Model | Provider String | Use Case |
+|-------|-----------------|----------|
+| Sonnet 4 | `claude-code/claude-sonnet-4-20250514` | Balanced (recommended) |
+| Opus 4 | `claude-code/claude-opus-4-20250514` | Most capable |
+| Haiku 3.5 | `claude-code/claude-haiku-3-5-latest` | Fastest |
+
+
+
🤖 Using Your own Browser with Custom User Profile
diff --git a/crawl4ai/__init__.py b/crawl4ai/__init__.py
index af35e6a0e..f4b6e8ecd 100644
--- a/crawl4ai/__init__.py
+++ b/crawl4ai/__init__.py
@@ -1,6 +1,10 @@
# __init__.py
import warnings
+# Register custom LLM providers with LiteLLM
+from .providers import register_custom_providers
+register_custom_providers()
+
from .async_webcrawler import AsyncWebCrawler, CacheMode
# MODIFIED: Add SeedingConfig and VirtualScrollConfig here
from .async_configs import BrowserConfig, CrawlerRunConfig, HTTPCrawlerConfig, LLMConfig, ProxyConfig, GeolocationConfig, SeedingConfig, VirtualScrollConfig, LinkPreviewConfig, MatchMode
diff --git a/crawl4ai/config.py b/crawl4ai/config.py
index 08f56b832..9e0d2f546 100644
--- a/crawl4ai/config.py
+++ b/crawl4ai/config.py
@@ -36,6 +36,7 @@
"anthropic": os.getenv("ANTHROPIC_API_KEY"),
"gemini": os.getenv("GEMINI_API_KEY"),
"deepseek": os.getenv("DEEPSEEK_API_KEY"),
+ "claude-code": "no-token-needed", # Uses local Claude Code CLI auth
}
# Chunk token threshold
diff --git a/crawl4ai/extraction_strategy.py b/crawl4ai/extraction_strategy.py
index 7033e3800..6a46fdce1 100644
--- a/crawl4ai/extraction_strategy.py
+++ b/crawl4ai/extraction_strategy.py
@@ -616,7 +616,12 @@ def extract(self, url: str, ix: int, html: str) -> List[Dict[str, Any]]:
A list of extracted blocks or chunks.
"""
if self.verbose:
- # print("[LOG] Extracting blocks from URL:", url)
+ # Log which LLM provider/model is being used
+ provider_str = self.llm_config.provider if self.llm_config else "unknown"
+ provider_parts = provider_str.split('/')
+ provider_name = provider_parts[0] if provider_parts else "unknown"
+ model_name = provider_parts[1] if len(provider_parts) > 1 else provider_str
+ print(f"[LOG] LLM Provider: {provider_name} | Model: {model_name}")
print(f"[LOG] Call LLM for {url} - block index: {ix}")
variable_values = {
@@ -817,6 +822,12 @@ async def aextract(self, url: str, ix: int, html: str) -> List[Dict[str, Any]]:
from .utils import aperform_completion_with_backoff
if self.verbose:
+ # Log which LLM provider/model is being used
+ provider_str = self.llm_config.provider if self.llm_config else "unknown"
+ provider_parts = provider_str.split('/')
+ provider_name = provider_parts[0] if provider_parts else "unknown"
+ model_name = provider_parts[1] if len(provider_parts) > 1 else provider_str
+ print(f"[LOG] LLM Provider: {provider_name} | Model: {model_name}")
print(f"[LOG] Call LLM for {url} - block index: {ix}")
variable_values = {
diff --git a/crawl4ai/providers/__init__.py b/crawl4ai/providers/__init__.py
new file mode 100644
index 000000000..7e56ed196
--- /dev/null
+++ b/crawl4ai/providers/__init__.py
@@ -0,0 +1,47 @@
+"""
+Custom LLM providers for Crawl4AI.
+
+This module provides custom LLM provider integrations beyond what LiteLLM
+offers out of the box.
+"""
+
+_providers_registered = False
+
+
+def register_custom_providers():
+ """
+ Register custom LLM providers with LiteLLM.
+
+ This function registers all custom providers defined in this package
+ with LiteLLM's custom_provider_map. It is idempotent - calling it
+ multiple times has no additional effect.
+
+ Currently registered providers:
+ - claude-code: Uses Claude Code CLI for LLM completions (requires local auth)
+ """
+ global _providers_registered
+ if _providers_registered:
+ return
+
+ import litellm
+
+ # Initialize custom_provider_map if it doesn't exist
+ if litellm.custom_provider_map is None:
+ litellm.custom_provider_map = []
+
+ # Try to register Claude Code provider (optional dependency)
+ try:
+ from .claude_code_provider import ClaudeCodeProvider
+
+ # Check if already registered
+ existing_providers = [p.get("provider") for p in litellm.custom_provider_map]
+ if "claude-code" not in existing_providers:
+ litellm.custom_provider_map.append({
+ "provider": "claude-code",
+ "custom_handler": ClaudeCodeProvider()
+ })
+ except ImportError:
+ # claude-agent-sdk not installed, skip registration
+ pass
+
+ _providers_registered = True
diff --git a/crawl4ai/providers/claude_code_provider.py b/crawl4ai/providers/claude_code_provider.py
new file mode 100644
index 000000000..d8bbba78a
--- /dev/null
+++ b/crawl4ai/providers/claude_code_provider.py
@@ -0,0 +1,369 @@
+"""
+Claude Code/Agent SDK provider for LiteLLM integration.
+
+This provider enables using Claude Code CLI authentication for LLM completions,
+allowing users with Claude Code subscriptions to leverage their existing auth
+without needing separate API keys.
+
+IMPORTANT: Uses LOCAL Claude Code CLI authentication.
+Each user must have their own Claude Code CLI installed and authenticated.
+
+Usage:
+ >>> from crawl4ai.async_configs import LLMConfig
+ >>> config = LLMConfig(provider="claude-code/claude-sonnet-4-20250514")
+
+Supported models:
+ - claude-code/claude-sonnet-4-20250514 (recommended)
+ - claude-code/claude-opus-4-20250514
+ - claude-code/claude-haiku-3-5-latest
+
+Requirements:
+ - Claude Code CLI: npm install -g @anthropic-ai/claude-code
+ - Authenticated: run `claude login`
+ - SDK: pip install crawl4ai[claude-code]
+
+Exceptions:
+ - ClaudeCodeError: Base exception for all provider errors
+ - ClaudeCodeSDKError: SDK not installed
+ - ClaudeCodeAuthenticationError: CLI auth failed
+ - ClaudeCodeConnectionError: Connection to service failed
+
+See Also:
+ - LLMConfig: Configuration class for LLM providers
+ - https://docs.anthropic.com/claude-code/
+
+.. versionadded:: 0.7.9
+"""
+import asyncio
+import logging
+import time
+from typing import Any, Dict, List, Optional, Tuple
+
+from litellm import CustomLLM
+from litellm.types.utils import Choices, Message, ModelResponse, Usage
+
+logger = logging.getLogger(__name__)
+
+
+class ClaudeCodeError(Exception):
+ """Base exception for Claude Code provider errors."""
+ pass
+
+
+class ClaudeCodeSDKError(ClaudeCodeError):
+ """Raised when claude-agent-sdk is not installed.
+
+ Recovery:
+ 1. Install: pip install crawl4ai[claude-code]
+ 2. Or directly: pip install claude-agent-sdk
+ """
+ pass
+
+
+class ClaudeCodeAuthenticationError(ClaudeCodeError):
+ """Raised when Claude Code CLI authentication fails.
+
+ Recovery:
+ 1. Install CLI: npm install -g @anthropic-ai/claude-code
+ 2. Authenticate: claude login
+ 3. Verify: run `claude` in terminal
+ """
+ pass
+
+
+class ClaudeCodeConnectionError(ClaudeCodeError):
+ """Raised when connection to Claude Code service fails."""
+ pass
+
+
+class ClaudeCodeProvider(CustomLLM):
+ """
+ Custom LiteLLM provider for Claude Code/Agent SDK.
+
+ This provider wraps the Claude Agent SDK to provide LLM completions
+ using the user's local Claude Code CLI authentication.
+ """
+
+ def _extract_model(self, model: str) -> str:
+ """
+ Extract model name from provider string.
+
+ Args:
+ model: Provider string like "claude-code/claude-sonnet-4-20250514"
+
+ Returns:
+ Model name like "claude-sonnet-4-20250514"
+ """
+ if "/" in model:
+ return model.split("/", 1)[1]
+ return model
+
+ def _convert_messages_to_prompt(self, messages: List[Dict]) -> str:
+ """
+ Convert LiteLLM message format to single prompt string.
+
+ The Claude Agent SDK expects a single prompt string, not a messages
+ array. This method converts the standard messages format to a
+ formatted prompt string.
+
+ Args:
+ messages: List of message dicts with 'role' and 'content' keys
+
+ Returns:
+ Formatted prompt string
+
+ Raises:
+ ValueError: If messages is empty or has invalid format
+ TypeError: If message content is not a string
+ """
+ if not messages:
+ return ""
+
+ parts = []
+ for i, msg in enumerate(messages):
+ if not isinstance(msg, dict):
+ raise TypeError(
+ f"Message at index {i} must be a dict, got {type(msg).__name__}"
+ )
+
+ role = msg.get("role", "user")
+ content = msg.get("content", "")
+
+ # Validate content is a string (not multimodal)
+ if content is not None and not isinstance(content, str):
+ raise TypeError(
+ f"Message at index {i} has non-string content (type: {type(content).__name__}). "
+ "Multimodal content is not supported by claude-code provider."
+ )
+
+ if role == "system":
+ parts.append(f"System: {content}")
+ elif role == "user":
+ parts.append(content or "")
+ elif role == "assistant":
+ parts.append(f"Assistant: {content}")
+
+ return "\n\n".join(parts)
+
+ async def _collect_response(
+ self, prompt: str, model: str
+ ) -> Tuple[str, Optional[Dict[str, Any]]]:
+ """
+ Call Claude Agent SDK and collect the full response.
+
+ Args:
+ prompt: The prompt string to send
+ model: The model name to use
+
+ Returns:
+ Tuple of (response_text, usage_info)
+
+ Raises:
+ ImportError: If claude-agent-sdk is not installed
+ ClaudeCodeError: If the SDK returns an error or empty response
+ """
+ try:
+ from claude_agent_sdk import (
+ AssistantMessage,
+ ClaudeAgentOptions,
+ ResultMessage,
+ TextBlock,
+ query,
+ )
+ except ImportError:
+ raise ClaudeCodeSDKError(
+ "The claude-agent-sdk package is not installed.\n\n"
+ "To fix this:\n"
+ " 1. Install: pip install crawl4ai[claude-code]\n"
+ " 2. Or directly: pip install claude-agent-sdk\n\n"
+ "Docs: https://github.com/anthropics/claude-code"
+ )
+
+ options = ClaudeAgentOptions(
+ model=model,
+ max_turns=1, # Single turn for extraction tasks
+ allowed_tools=[], # No tools needed for text completion
+ )
+
+ logger.debug(f"Sending prompt to Claude Code: model={model}, prompt_length={len(prompt)}")
+
+ collected_text = []
+ usage_info = None
+
+ try:
+ async for message in query(prompt=prompt, options=options):
+ if isinstance(message, AssistantMessage):
+ for block in message.content:
+ if isinstance(block, TextBlock):
+ collected_text.append(block.text)
+ elif isinstance(message, ResultMessage):
+ usage_info = {
+ "session_id": message.session_id,
+ "duration_ms": message.duration_ms,
+ "input_tokens": message.usage.get("input_tokens", 0)
+ if message.usage
+ else 0,
+ "output_tokens": message.usage.get("output_tokens", 0)
+ if message.usage
+ else 0,
+ }
+ else:
+ logger.debug(f"Received unexpected message type: {type(message).__name__}")
+ except ConnectionError as e:
+ raise ClaudeCodeConnectionError(
+ f"Failed to connect to Claude Code service: {e}\n\n"
+ "To fix this:\n"
+ " 1. Check Claude Code CLI is running: claude --version\n"
+ " 2. Re-authenticate if needed: claude login\n"
+ " 3. Verify it works: claude 'Hello'"
+ ) from e
+ except Exception as e:
+ # Re-raise ImportError as-is
+ if isinstance(e, ImportError):
+ raise
+ raise ClaudeCodeError(
+ f"Claude Code SDK error: {e}. "
+ "Verify your Claude Code CLI is properly installed and authenticated "
+ "by running 'claude' in your terminal."
+ ) from e
+
+ response_text = "".join(collected_text)
+
+ logger.debug(f"Received response: length={len(response_text)}, usage={usage_info}")
+
+ # Warn if response is empty (but don't fail - the SDK may have valid reasons)
+ if not response_text.strip():
+ logger.warning(
+ "Claude Code returned an empty response. This may indicate an "
+ "authentication issue, rate limiting, or API error."
+ )
+
+ return response_text, usage_info
+
+ async def acompletion(
+ self,
+ model: str,
+ messages: List[Dict],
+ api_base: Optional[str] = None,
+ api_key: Optional[str] = None,
+ optional_params: Optional[Dict] = None,
+ timeout: Optional[float] = None,
+ client=None,
+ **kwargs,
+ ) -> ModelResponse:
+ """
+ Async completion using Claude Code SDK.
+
+ Args:
+ model: Model string like "claude-code/claude-sonnet-4-20250514"
+ messages: List of message dicts with 'role' and 'content'
+ api_base: Not used (Claude Code uses local auth)
+ api_key: Not used (Claude Code uses local auth)
+ optional_params: Additional parameters (currently unused)
+ timeout: Request timeout (not currently enforced - SDK handles timeouts)
+ client: HTTP client (not used)
+ **kwargs: Additional arguments
+
+ Returns:
+ LiteLLM ModelResponse object
+ """
+ model_name = self._extract_model(model)
+ prompt = self._convert_messages_to_prompt(messages)
+
+ logger.info(f"Claude Code completion: model={model_name}, messages={len(messages)}")
+
+ response_text, usage_info = await self._collect_response(prompt, model_name)
+
+ # Calculate token counts
+ input_tokens = usage_info.get("input_tokens", 0) if usage_info else 0
+ output_tokens = usage_info.get("output_tokens", 0) if usage_info else 0
+
+ # Generate response ID
+ response_id = (
+ usage_info.get("session_id") if usage_info
+ else f"claude-code-{int(time.time())}"
+ )
+
+ return ModelResponse(
+ id=response_id,
+ choices=[
+ Choices(
+ message=Message(role="assistant", content=response_text),
+ index=0,
+ finish_reason="stop",
+ )
+ ],
+ created=int(time.time()),
+ model=model_name,
+ usage=Usage(
+ prompt_tokens=input_tokens,
+ completion_tokens=output_tokens,
+ total_tokens=input_tokens + output_tokens,
+ ),
+ object="chat.completion",
+ )
+
+ def completion(
+ self,
+ model: str,
+ messages: List[Dict],
+ api_base: Optional[str] = None,
+ api_key: Optional[str] = None,
+ optional_params: Optional[Dict] = None,
+ timeout: Optional[float] = None,
+ client=None,
+ **kwargs,
+ ) -> ModelResponse:
+ """
+ Sync completion - runs async version in event loop.
+
+ Args:
+ Same as acompletion
+
+ Returns:
+ LiteLLM ModelResponse object
+ """
+ # Check if we're in an async context by looking for a running event loop
+ loop = None
+ try:
+ loop = asyncio.get_running_loop()
+ except RuntimeError as e:
+ # Only catch the specific "no running event loop" error
+ if "no running event loop" not in str(e).lower():
+ raise
+
+ if loop and loop.is_running():
+ # We're in an async context, need to use a new thread
+ import concurrent.futures
+
+ with concurrent.futures.ThreadPoolExecutor() as executor:
+ future = executor.submit(
+ asyncio.run,
+ self.acompletion(
+ model=model,
+ messages=messages,
+ api_base=api_base,
+ api_key=api_key,
+ optional_params=optional_params,
+ timeout=timeout,
+ client=client,
+ **kwargs,
+ ),
+ )
+ try:
+ return future.result()
+ except Exception as e:
+ raise ClaudeCodeError(f"Claude Code completion failed: {e}") from e
+ else:
+ return asyncio.run(
+ self.acompletion(
+ model=model,
+ messages=messages,
+ api_base=api_base,
+ api_key=api_key,
+ optional_params=optional_params,
+ timeout=timeout,
+ client=client,
+ **kwargs,
+ )
+ )
diff --git a/docs/md_v2/extraction/claude-code-provider.md b/docs/md_v2/extraction/claude-code-provider.md
new file mode 100644
index 000000000..266a6faf4
--- /dev/null
+++ b/docs/md_v2/extraction/claude-code-provider.md
@@ -0,0 +1,177 @@
+# Claude Code Provider
+
+Use your Claude Code CLI subscription for LLM-powered web extraction without managing API keys.
+
+## Overview
+
+The Claude Code provider enables Crawl4AI users with Claude Code subscriptions to leverage their existing authentication for LLM extraction:
+
+- **No API Keys Required**: Uses local Claude Code CLI authentication
+- **Familiar Models**: Access Claude Sonnet, Opus, and Haiku models
+- **Seamless Integration**: Works with all existing `LLMExtractionStrategy` features
+
+## Prerequisites
+
+1. **Claude Code CLI** installed and authenticated:
+ ```bash
+ npm install -g @anthropic-ai/claude-code
+ claude login
+ ```
+
+2. **Crawl4AI with claude-code extra**:
+ ```bash
+ pip install crawl4ai[claude-code]
+ ```
+
+## Quick Start
+
+```python
+import asyncio
+from crawl4ai import AsyncWebCrawler, LLMConfig, CrawlerRunConfig
+from crawl4ai import LLMExtractionStrategy
+
+async def main():
+ # No API token needed - uses local Claude Code authentication
+ llm_config = LLMConfig(provider="claude-code/claude-sonnet-4-20250514")
+
+ strategy = LLMExtractionStrategy(
+ llm_config=llm_config,
+ instruction="Extract article titles and summaries as JSON"
+ )
+
+ config = CrawlerRunConfig(extraction_strategy=strategy)
+
+ async with AsyncWebCrawler() as crawler:
+ result = await crawler.arun(
+ url="https://news.ycombinator.com",
+ config=config
+ )
+ print(result.extracted_content)
+
+if __name__ == "__main__":
+ asyncio.run(main())
+```
+
+## Supported Models
+
+| Model ID | Description | Use Case |
+|----------|-------------|----------|
+| `claude-code/claude-haiku-3-5-latest` | Fastest, most economical | Quick extractions, high volume |
+| `claude-code/claude-sonnet-4-20250514` | Balanced performance | **Recommended default** |
+| `claude-code/claude-opus-4-20250514` | Most capable | Complex reasoning tasks |
+
+## Configuration
+
+The Claude Code provider works with standard `LLMConfig`:
+
+```python
+llm_config = LLMConfig(
+ provider="claude-code/claude-sonnet-4-20250514"
+ # api_token is optional (uses "no-token-needed" internally)
+)
+```
+
+## Examples
+
+### Schema-Based Extraction
+
+```python
+from pydantic import BaseModel
+
+class Product(BaseModel):
+ name: str
+ price: str
+ rating: float
+
+strategy = LLMExtractionStrategy(
+ llm_config=LLMConfig(provider="claude-code/claude-sonnet-4-20250514"),
+ schema=Product.model_json_schema(),
+ extraction_type="schema",
+ instruction="Extract all products"
+)
+```
+
+### With Chunking for Large Pages
+
+```python
+strategy = LLMExtractionStrategy(
+ llm_config=LLMConfig(provider="claude-code/claude-sonnet-4-20250514"),
+ instruction="Extract all article summaries",
+ chunk_token_threshold=2000,
+ overlap_rate=0.1,
+ apply_chunking=True
+)
+```
+
+### Verbose Mode with Model Logging
+
+```python
+strategy = LLMExtractionStrategy(
+ llm_config=LLMConfig(provider="claude-code/claude-sonnet-4-20250514"),
+ instruction="Extract main content",
+ verbose=True # Logs which provider/model is being used
+)
+```
+
+Output:
+```
+[LOG] LLM Provider: claude-code | Model: claude-sonnet-4-20250514
+[LOG] Call LLM for https://example.com - block index: 0
+```
+
+## Comparison with API Providers
+
+| Feature | Claude Code | Anthropic API |
+|---------|-------------|---------------|
+| Authentication | Local CLI | API Key |
+| Billing | Subscription | Per-token |
+| Setup | CLI login once | Environment variable |
+
+## Docker Deployment
+
+When running Crawl4AI in Docker, mount your Claude Code credentials:
+
+```bash
+docker run -d \
+ -p 11235:11235 \
+ -v /path/to/.claude:/home/appuser/.claude:rw \
+ crawl4ai-claude:latest
+```
+
+Set the provider via environment variable:
+
+```bash
+-e LLM_PROVIDER=claude-code/claude-sonnet-4-20250514
+```
+
+## Troubleshooting
+
+### "claude-agent-sdk is not installed"
+
+```bash
+pip install crawl4ai[claude-code]
+```
+
+### "Failed to connect to Claude Code service"
+
+```bash
+claude --version # Check installation
+claude login # Re-authenticate
+```
+
+### Empty Responses
+
+Verify CLI works: `claude "Hello"`
+
+### Permission Denied (Docker)
+
+Ensure the credentials directory is readable by the container user (uid 999):
+
+```bash
+sudo chown -R 999:999 /path/to/.claude
+```
+
+## See Also
+
+- [LLM Extraction Strategies](./llm-strategies.md)
+- [Claude Code Documentation](https://docs.anthropic.com/claude-code/)
diff --git a/docs/md_v2/extraction/llm-strategies.md b/docs/md_v2/extraction/llm-strategies.md
index df948a9eb..a0071b86c 100644
--- a/docs/md_v2/extraction/llm-strategies.md
+++ b/docs/md_v2/extraction/llm-strategies.md
@@ -32,7 +32,18 @@ Crawl4AI uses a “provider string” (e.g., `"openai/gpt-4o"`, `"ollama/llama2.
- **`api_token`**: If needed (for OpenAI, HuggingFace, etc.); local models or Ollama might not require it.
- **`base_url`** (optional): If your provider has a custom endpoint.
-This means you **aren’t locked** into a single LLM vendor. Switch or experiment easily.
+This means you **aren't locked** into a single LLM vendor. Switch or experiment easily.
+
+### Claude Code Provider (No API Keys)
+
+If you have a Claude Code subscription, use your local CLI authentication:
+
+```python
+llm_config = LLMConfig(provider="claude-code/claude-sonnet-4-20250514")
+# No api_token needed - uses local Claude Code CLI auth
+```
+
+See [Claude Code Provider](./claude-code-provider.md) for full documentation.
---
diff --git a/examples/claude_code_error_handling.py b/examples/claude_code_error_handling.py
new file mode 100644
index 000000000..dcb35036a
--- /dev/null
+++ b/examples/claude_code_error_handling.py
@@ -0,0 +1,71 @@
+"""
+Example: Robust Error Handling with Claude Code Provider
+
+Demonstrates proper error handling patterns for production applications.
+"""
+
+import asyncio
+from crawl4ai import AsyncWebCrawler
+from crawl4ai.async_configs import LLMConfig, CrawlerRunConfig
+from crawl4ai.extraction_strategy import LLMExtractionStrategy
+
+
+async def extraction_with_retry(url: str, max_retries: int = 3):
+ """Extract content with automatic retry on transient failures."""
+ from crawl4ai.providers.claude_code_provider import (
+ ClaudeCodeError,
+ ClaudeCodeConnectionError,
+ ClaudeCodeSDKError,
+ )
+
+ llm_config = LLMConfig(provider="claude-code/claude-sonnet-4-20250514")
+ strategy = LLMExtractionStrategy(
+ llm_config=llm_config,
+ instruction="Extract the main content as a summary"
+ )
+ run_config = CrawlerRunConfig(extraction_strategy=strategy)
+
+ for attempt in range(max_retries):
+ try:
+ async with AsyncWebCrawler() as crawler:
+ result = await crawler.arun(url=url, config=run_config)
+ if result.success:
+ return result.extracted_content
+ else:
+ print(f"Crawl failed: {result.error_message}")
+
+ except ClaudeCodeSDKError as e:
+ # SDK not installed - no point retrying
+ print(f"SDK Error (no retry): {e}")
+ raise
+
+ except ClaudeCodeConnectionError as e:
+ # Connection issue - may be transient, worth retrying
+ wait_time = 2 ** attempt
+ print(f"Connection error (attempt {attempt + 1}/{max_retries}), "
+ f"retrying in {wait_time}s: {e}")
+ await asyncio.sleep(wait_time)
+
+ except ClaudeCodeError as e:
+ # Other Claude Code errors
+ wait_time = 2 ** attempt
+ print(f"Claude Code error (attempt {attempt + 1}/{max_retries}), "
+ f"retrying in {wait_time}s: {e}")
+ await asyncio.sleep(wait_time)
+
+ raise Exception(f"Max retries ({max_retries}) exceeded")
+
+
+async def main():
+ print("Claude Code Error Handling Example")
+ print("=" * 50)
+
+ try:
+ result = await extraction_with_retry("https://example.com")
+ print(f"\nSuccess! Extracted content:\n{result[:500]}...")
+ except Exception as e:
+ print(f"\nFailed: {e}")
+
+
+if __name__ == "__main__":
+ asyncio.run(main())
diff --git a/examples/claude_code_extraction.py b/examples/claude_code_extraction.py
new file mode 100644
index 000000000..650dc08f0
--- /dev/null
+++ b/examples/claude_code_extraction.py
@@ -0,0 +1,166 @@
+"""
+Example: Using Claude Code Provider for LLM Extraction
+
+Demonstrates using your Claude Code CLI subscription for web content extraction
+with Crawl4AI.
+
+Prerequisites:
+ - Claude Code CLI installed and authenticated (run `claude` in terminal)
+ - pip install crawl4ai[claude-code]
+
+Usage:
+ python examples/claude_code_extraction.py
+"""
+
+import asyncio
+from crawl4ai import AsyncWebCrawler
+from crawl4ai.async_configs import LLMConfig, CrawlerRunConfig
+from crawl4ai.extraction_strategy import LLMExtractionStrategy
+
+
+async def basic_extraction():
+ """Basic example: Extract article titles from Hacker News."""
+ print("=" * 60)
+ print("Example 1: Basic Extraction with Claude Code")
+ print("=" * 60)
+
+ # Configure Claude Code provider (no API token needed - uses local auth)
+ llm_config = LLMConfig(
+ provider="claude-code/claude-sonnet-4-20250514" # Recommended model
+ )
+
+ strategy = LLMExtractionStrategy(
+ llm_config=llm_config,
+ instruction="Extract all article titles and their point counts as a JSON array"
+ )
+
+ run_config = CrawlerRunConfig(extraction_strategy=strategy)
+
+ async with AsyncWebCrawler() as crawler:
+ result = await crawler.arun(
+ url="https://news.ycombinator.com",
+ config=run_config
+ )
+
+ if result.success:
+ print("\nExtracted Content:")
+ print(result.extracted_content[:500]) # First 500 chars
+ else:
+ print(f"\nExtraction failed: {result.error_message}")
+
+
+async def model_comparison():
+ """Compare different Claude models."""
+ print("\n" + "=" * 60)
+ print("Example 2: Model Comparison")
+ print("=" * 60)
+
+ models = [
+ ("claude-code/claude-haiku-3-5-latest", "Haiku (fastest, cheapest)"),
+ ("claude-code/claude-sonnet-4-20250514", "Sonnet (balanced)"),
+ # Uncomment to test Opus (slowest, most capable):
+ # ("claude-code/claude-opus-4-20250514", "Opus (most capable)"),
+ ]
+
+ html = "Product: Widget Pro
Price: $99.99
Rating: 4.5 stars
"
+
+ for provider, description in models:
+ print(f"\nTesting {description}...")
+
+ llm_config = LLMConfig(provider=provider)
+ strategy = LLMExtractionStrategy(
+ llm_config=llm_config,
+ instruction="Extract product name, price, and rating as JSON"
+ )
+
+ result = await strategy.aextract(
+ url="https://example.com",
+ ix=0,
+ html=html
+ )
+
+ print(f" Result: {result}")
+
+
+async def structured_extraction():
+ """Extract structured data with a schema."""
+ print("\n" + "=" * 60)
+ print("Example 3: Structured Extraction with Schema")
+ print("=" * 60)
+
+ llm_config = LLMConfig(
+ provider="claude-code/claude-sonnet-4-20250514"
+ )
+
+ # Define expected schema
+ schema = {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "title": {"type": "string"},
+ "author": {"type": "string"},
+ "date": {"type": "string"}
+ }
+ }
+ }
+
+ strategy = LLMExtractionStrategy(
+ llm_config=llm_config,
+ instruction="Extract all blog posts with their titles, authors, and dates",
+ schema=schema,
+ extraction_type="schema"
+ )
+
+ html = """
+
+
+
+ First Post
+ John Doe
+
+
+
+ Second Post
+ Jane Smith
+
+
+
+
+ """
+
+ result = await strategy.aextract(
+ url="https://example.com",
+ ix=0,
+ html=html
+ )
+
+ print(f"\nExtracted Posts:")
+ print(result)
+
+
+async def main():
+ """Run all examples."""
+ print("\nClaude Code Provider Examples for Crawl4AI")
+ print("=" * 60)
+ print("Using your Claude Code subscription for LLM extraction")
+ print("No API keys required - uses local Claude Code authentication")
+ print("=" * 60)
+
+ try:
+ await basic_extraction()
+ await model_comparison()
+ await structured_extraction()
+ except ImportError as e:
+ print(f"\nError: {e}")
+ print("\nMake sure to install the claude-code extras:")
+ print(" pip install crawl4ai[claude-code]")
+ except Exception as e:
+ print(f"\nError: {e}")
+ print("\nMake sure Claude Code CLI is installed and authenticated:")
+ print(" 1. Install: npm install -g @anthropic-ai/claude-code")
+ print(" 2. Authenticate: claude login")
+
+
+if __name__ == "__main__":
+ asyncio.run(main())
diff --git a/mkdocs.yml b/mkdocs.yml
index ef23b4551..00d4f99f6 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -58,6 +58,7 @@ nav:
- Extraction:
- "LLM-Free Strategies": "extraction/no-llm-strategies.md"
- "LLM Strategies": "extraction/llm-strategies.md"
+ - "Claude Code Provider": "extraction/claude-code-provider.md"
- "Clustering Strategies": "extraction/clustring-strategies.md"
- "Chunking": "extraction/chunking.md"
- API Reference:
diff --git a/pyproject.toml b/pyproject.toml
index 06d1e4ab0..d4342045f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -59,11 +59,12 @@ classifiers = [
]
[project.optional-dependencies]
-pdf = ["pypdf"]
+pdf = ["pypdf"]
torch = ["torch", "nltk", "scikit-learn"]
transformer = ["transformers", "tokenizers", "sentence-transformers"]
cosine = ["torch", "transformers", "nltk", "sentence-transformers"]
sync = ["selenium"]
+claude-code = ["claude-agent-sdk>=0.1.0"]
all = [
"pypdf",
"torch",
@@ -72,7 +73,8 @@ all = [
"transformers",
"tokenizers",
"sentence-transformers",
- "selenium"
+ "selenium",
+ "claude-agent-sdk>=0.1.0"
]
[project.scripts]
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/integration/test_claude_code_integration.py b/tests/integration/test_claude_code_integration.py
new file mode 100644
index 000000000..76b8782cc
--- /dev/null
+++ b/tests/integration/test_claude_code_integration.py
@@ -0,0 +1,173 @@
+"""
+Integration tests for Claude Code Provider.
+
+These tests require Claude Code CLI to be installed and authenticated,
+AND the claude-agent-sdk package to be installed.
+They will be skipped if either is not available.
+"""
+import pytest
+import shutil
+import asyncio
+
+# Check if claude-agent-sdk is installed
+def _sdk_available():
+ try:
+ import claude_agent_sdk
+ return True
+ except ImportError:
+ return False
+
+# Mark for tests that need both CLI and SDK
+requires_claude_code = pytest.mark.skipif(
+ not shutil.which("claude") or not _sdk_available(),
+ reason="Claude Code CLI or claude-agent-sdk not installed"
+)
+
+
+class TestClaudeCodeIntegration:
+ """Integration tests with real Claude Code CLI."""
+
+ @requires_claude_code
+ @pytest.mark.asyncio
+ async def test_basic_completion_with_claude_code(self):
+ """Test basic completion using Claude Code provider."""
+ from crawl4ai.providers.claude_code_provider import ClaudeCodeProvider
+ from litellm.types.utils import ModelResponse
+
+ provider = ClaudeCodeProvider()
+
+ # Simple prompt that should get a quick response
+ result = await provider.acompletion(
+ model="claude-code/claude-haiku-3-5-latest", # Use Haiku for speed
+ messages=[{"role": "user", "content": "Reply with only the word 'hello'"}]
+ )
+
+ assert isinstance(result, ModelResponse)
+ assert result.choices[0].message.content is not None
+ assert len(result.choices[0].message.content) > 0
+
+ def test_llm_config_with_claude_code(self):
+ """Test LLMConfig works with claude-code provider."""
+ from crawl4ai.async_configs import LLMConfig
+
+ config = LLMConfig(provider="claude-code/claude-haiku-3-5-latest")
+
+ assert config.provider == "claude-code/claude-haiku-3-5-latest"
+ assert config.api_token == "no-token-needed"
+
+ @requires_claude_code
+ @pytest.mark.asyncio
+ async def test_extraction_strategy_with_claude_code(self):
+ """Test LLMExtractionStrategy with Claude Code provider."""
+ from crawl4ai.async_configs import LLMConfig
+ from crawl4ai.extraction_strategy import LLMExtractionStrategy
+
+ config = LLMConfig(provider="claude-code/claude-haiku-3-5-latest")
+
+ strategy = LLMExtractionStrategy(
+ llm_config=config,
+ instruction="Extract the main heading text"
+ )
+
+ # Simple HTML to extract from
+ html = "Test Heading
Some content
"
+
+ # Run extraction
+ result = await strategy.aextract(
+ url="https://example.com",
+ ix=0,
+ html=html
+ )
+
+ # Should get some result (exact format depends on extraction)
+ assert result is not None
+
+ @requires_claude_code
+ @pytest.mark.asyncio
+ async def test_model_selection_sonnet(self):
+ """Test that Sonnet model can be used."""
+ from crawl4ai.providers.claude_code_provider import ClaudeCodeProvider
+ from litellm.types.utils import ModelResponse
+
+ provider = ClaudeCodeProvider()
+
+ result = await provider.acompletion(
+ model="claude-code/claude-sonnet-4-20250514",
+ messages=[{"role": "user", "content": "Say 'test' only"}]
+ )
+
+ assert isinstance(result, ModelResponse)
+ assert result.model == "claude-sonnet-4-20250514"
+
+ @requires_claude_code
+ @pytest.mark.asyncio
+ async def test_system_prompt_handling(self):
+ """Test that system prompts are properly included."""
+ from crawl4ai.providers.claude_code_provider import ClaudeCodeProvider
+
+ provider = ClaudeCodeProvider()
+
+ result = await provider.acompletion(
+ model="claude-code/claude-haiku-3-5-latest",
+ messages=[
+ {"role": "system", "content": "You only respond with the word 'CONFIRMED'"},
+ {"role": "user", "content": "Hello"}
+ ]
+ )
+
+ # System prompt should influence the response
+ assert result.choices[0].message.content is not None
+
+
+class TestBackwardCompatibility:
+ """Test that existing providers still work alongside claude-code."""
+
+ def test_existing_providers_still_in_config(self):
+ """Ensure existing providers weren't removed."""
+ from crawl4ai.config import PROVIDER_MODELS_PREFIXES
+
+ # Original providers should still exist
+ assert "ollama" in PROVIDER_MODELS_PREFIXES
+ assert "openai" in PROVIDER_MODELS_PREFIXES
+ assert "anthropic" in PROVIDER_MODELS_PREFIXES
+ assert "gemini" in PROVIDER_MODELS_PREFIXES
+ assert "deepseek" in PROVIDER_MODELS_PREFIXES
+
+ # New provider should also exist
+ assert "claude-code" in PROVIDER_MODELS_PREFIXES
+
+ def test_llm_config_still_works_with_openai(self):
+ """LLMConfig should still work with OpenAI provider."""
+ from crawl4ai.async_configs import LLMConfig
+ import os
+
+ # Set a dummy key for testing
+ original = os.environ.get("OPENAI_API_KEY")
+ os.environ["OPENAI_API_KEY"] = "test-key"
+
+ try:
+ config = LLMConfig(provider="openai/gpt-4o")
+ assert config.provider == "openai/gpt-4o"
+ finally:
+ if original:
+ os.environ["OPENAI_API_KEY"] = original
+ else:
+ os.environ.pop("OPENAI_API_KEY", None)
+
+ def test_llm_config_still_works_with_anthropic(self):
+ """LLMConfig should still work with Anthropic provider."""
+ from crawl4ai.async_configs import LLMConfig
+ import os
+
+ # Set a dummy key for testing
+ original = os.environ.get("ANTHROPIC_API_KEY")
+ os.environ["ANTHROPIC_API_KEY"] = "test-key"
+
+ try:
+ config = LLMConfig(provider="anthropic/claude-3-5-sonnet-20240620")
+ assert config.provider == "anthropic/claude-3-5-sonnet-20240620"
+ finally:
+ if original:
+ os.environ["ANTHROPIC_API_KEY"] = original
+ else:
+ os.environ.pop("ANTHROPIC_API_KEY", None)
diff --git a/tests/unit/test_claude_code_provider.py b/tests/unit/test_claude_code_provider.py
new file mode 100644
index 000000000..1b0f020bf
--- /dev/null
+++ b/tests/unit/test_claude_code_provider.py
@@ -0,0 +1,261 @@
+"""
+Unit tests for Claude Code Provider (TDD - RED Phase)
+
+These tests are written BEFORE the implementation to follow TDD methodology.
+All tests should FAIL initially until the provider is implemented.
+"""
+import pytest
+from unittest.mock import AsyncMock, MagicMock, patch
+import asyncio
+
+
+class TestClaudeCodeProviderImport:
+ """Test that the provider module can be imported."""
+
+ def test_provider_module_exists(self):
+ """The provider module should exist and be importable."""
+ from crawl4ai.providers import claude_code_provider
+ assert claude_code_provider is not None
+
+ def test_provider_class_exists(self):
+ """The ClaudeCodeProvider class should exist."""
+ from crawl4ai.providers.claude_code_provider import ClaudeCodeProvider
+ assert ClaudeCodeProvider is not None
+
+ def test_provider_inherits_from_custom_llm(self):
+ """ClaudeCodeProvider should inherit from LiteLLM's CustomLLM."""
+ from crawl4ai.providers.claude_code_provider import ClaudeCodeProvider
+ from litellm import CustomLLM
+ assert issubclass(ClaudeCodeProvider, CustomLLM)
+
+
+class TestModelExtraction:
+ """Test model name extraction from provider string."""
+
+ def test_extracts_model_from_provider_string(self):
+ """Should extract 'claude-sonnet-4' from 'claude-code/claude-sonnet-4'."""
+ from crawl4ai.providers.claude_code_provider import ClaudeCodeProvider
+ provider = ClaudeCodeProvider()
+
+ result = provider._extract_model("claude-code/claude-sonnet-4-20250514")
+ assert result == "claude-sonnet-4-20250514"
+
+ def test_extracts_model_with_opus(self):
+ """Should extract opus model correctly."""
+ from crawl4ai.providers.claude_code_provider import ClaudeCodeProvider
+ provider = ClaudeCodeProvider()
+
+ result = provider._extract_model("claude-code/claude-opus-4-20250514")
+ assert result == "claude-opus-4-20250514"
+
+ def test_extracts_model_with_haiku(self):
+ """Should extract haiku model correctly."""
+ from crawl4ai.providers.claude_code_provider import ClaudeCodeProvider
+ provider = ClaudeCodeProvider()
+
+ result = provider._extract_model("claude-code/claude-haiku-3-5-latest")
+ assert result == "claude-haiku-3-5-latest"
+
+ def test_handles_model_without_prefix(self):
+ """Should return model as-is if no prefix."""
+ from crawl4ai.providers.claude_code_provider import ClaudeCodeProvider
+ provider = ClaudeCodeProvider()
+
+ result = provider._extract_model("claude-sonnet-4-20250514")
+ assert result == "claude-sonnet-4-20250514"
+
+
+class TestMessageConversion:
+ """Test conversion from LiteLLM message format to prompt string."""
+
+ def test_single_user_message(self):
+ """Should convert single user message to prompt."""
+ from crawl4ai.providers.claude_code_provider import ClaudeCodeProvider
+ provider = ClaudeCodeProvider()
+
+ messages = [{"role": "user", "content": "Hello"}]
+ result = provider._convert_messages_to_prompt(messages)
+
+ assert "Hello" in result
+
+ def test_system_and_user_messages(self):
+ """Should include both system and user messages."""
+ from crawl4ai.providers.claude_code_provider import ClaudeCodeProvider
+ provider = ClaudeCodeProvider()
+
+ messages = [
+ {"role": "system", "content": "You are helpful"},
+ {"role": "user", "content": "Hello"}
+ ]
+ result = provider._convert_messages_to_prompt(messages)
+
+ assert "You are helpful" in result
+ assert "Hello" in result
+
+ def test_multi_turn_conversation(self):
+ """Should handle multi-turn conversations."""
+ from crawl4ai.providers.claude_code_provider import ClaudeCodeProvider
+ provider = ClaudeCodeProvider()
+
+ messages = [
+ {"role": "system", "content": "Be helpful"},
+ {"role": "user", "content": "Hello"},
+ {"role": "assistant", "content": "Hi there"},
+ {"role": "user", "content": "How are you?"}
+ ]
+ result = provider._convert_messages_to_prompt(messages)
+
+ assert "Be helpful" in result
+ assert "Hello" in result
+ assert "Hi there" in result
+ assert "How are you?" in result
+
+ def test_empty_messages(self):
+ """Should handle empty message list."""
+ from crawl4ai.providers.claude_code_provider import ClaudeCodeProvider
+ provider = ClaudeCodeProvider()
+
+ messages = []
+ result = provider._convert_messages_to_prompt(messages)
+
+ assert result == ""
+
+
+class TestCompletionMethods:
+ """Test completion and acompletion methods."""
+
+ @pytest.mark.asyncio
+ async def test_acompletion_returns_model_response(self):
+ """acompletion should return a LiteLLM ModelResponse."""
+ from crawl4ai.providers.claude_code_provider import ClaudeCodeProvider
+ from litellm.types.utils import ModelResponse
+
+ provider = ClaudeCodeProvider()
+
+ # Mock the _collect_response method instead of the SDK directly
+ async def mock_collect_response(prompt, model):
+ return ("Test response", {"session_id": "test-session", "input_tokens": 10, "output_tokens": 5})
+
+ with patch.object(provider, '_collect_response', mock_collect_response):
+ result = await provider.acompletion(
+ model="claude-code/claude-sonnet-4-20250514",
+ messages=[{"role": "user", "content": "Hello"}]
+ )
+
+ assert isinstance(result, ModelResponse)
+
+ @pytest.mark.asyncio
+ async def test_acompletion_extracts_text_content(self):
+ """acompletion should extract text from Claude SDK response."""
+ from crawl4ai.providers.claude_code_provider import ClaudeCodeProvider
+
+ provider = ClaudeCodeProvider()
+
+ async def mock_collect_response(prompt, model):
+ return ("The capital of France is Paris.", {"session_id": "test-session", "input_tokens": 10, "output_tokens": 5})
+
+ with patch.object(provider, '_collect_response', mock_collect_response):
+ result = await provider.acompletion(
+ model="claude-code/claude-sonnet-4-20250514",
+ messages=[{"role": "user", "content": "What is the capital of France?"}]
+ )
+
+ assert "Paris" in result.choices[0].message.content
+
+ def test_completion_calls_acompletion(self):
+ """Sync completion should internally call acompletion."""
+ from crawl4ai.providers.claude_code_provider import ClaudeCodeProvider
+
+ provider = ClaudeCodeProvider()
+
+ with patch.object(provider, 'acompletion', new_callable=AsyncMock) as mock_acompletion:
+ mock_response = MagicMock()
+ mock_acompletion.return_value = mock_response
+
+ result = provider.completion(
+ model="claude-code/claude-sonnet-4-20250514",
+ messages=[{"role": "user", "content": "Hello"}]
+ )
+
+ mock_acompletion.assert_called_once()
+
+
+class TestErrorHandling:
+ """Test error handling scenarios."""
+
+ @pytest.mark.asyncio
+ async def test_import_error_when_sdk_not_installed(self):
+ """Should raise ClaudeCodeSDKError with helpful message if SDK not installed."""
+ from crawl4ai.providers.claude_code_provider import (
+ ClaudeCodeProvider,
+ ClaudeCodeSDKError,
+ )
+
+ provider = ClaudeCodeProvider()
+
+ with patch.dict('sys.modules', {'claude_agent_sdk': None}):
+ with pytest.raises(ClaudeCodeSDKError) as exc_info:
+ await provider.acompletion(
+ model="claude-code/claude-sonnet-4-20250514",
+ messages=[{"role": "user", "content": "Hello"}]
+ )
+
+ assert "claude-agent-sdk" in str(exc_info.value).lower() or \
+ "pip install" in str(exc_info.value).lower()
+
+
+class TestProviderRegistration:
+ """Test that the provider is registered correctly with LiteLLM."""
+
+ def test_registration_function_exists(self):
+ """register_custom_providers function should exist."""
+ from crawl4ai.providers import register_custom_providers
+ assert callable(register_custom_providers)
+
+ def test_provider_registered_after_import(self):
+ """Provider should be registered after calling register_custom_providers."""
+ import litellm
+ from crawl4ai.providers import register_custom_providers
+
+ # Call registration
+ register_custom_providers()
+
+ # Check if claude-code is in custom_provider_map
+ providers = [p.get("provider") for p in (litellm.custom_provider_map or [])]
+ assert "claude-code" in providers
+
+
+class TestConfigIntegration:
+ """Test integration with Crawl4AI's config system."""
+
+ def test_provider_prefix_in_config(self):
+ """claude-code should be in PROVIDER_MODELS_PREFIXES."""
+ from crawl4ai.config import PROVIDER_MODELS_PREFIXES
+
+ assert "claude-code" in PROVIDER_MODELS_PREFIXES
+
+ def test_provider_prefix_value_is_no_token_needed(self):
+ """claude-code should require no token (uses local auth)."""
+ from crawl4ai.config import PROVIDER_MODELS_PREFIXES
+
+ assert PROVIDER_MODELS_PREFIXES.get("claude-code") == "no-token-needed"
+
+
+class TestLLMConfigIntegration:
+ """Test integration with LLMConfig class."""
+
+ def test_llm_config_accepts_claude_code_provider(self):
+ """LLMConfig should accept claude-code provider without error."""
+ from crawl4ai.async_configs import LLMConfig
+
+ config = LLMConfig(provider="claude-code/claude-sonnet-4-20250514")
+
+ assert config.provider == "claude-code/claude-sonnet-4-20250514"
+
+ def test_llm_config_resolves_no_token_needed(self):
+ """LLMConfig should resolve api_token to 'no-token-needed' for claude-code."""
+ from crawl4ai.async_configs import LLMConfig
+
+ config = LLMConfig(provider="claude-code/claude-sonnet-4-20250514")
+
+ assert config.api_token == "no-token-needed"