Skip to content
Merged
Show file tree
Hide file tree
Changes from 30 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
b4df352
feat(proxy): add Anthropic Messages API endpoint for Claude Code comp…
FammasMaz Dec 10, 2025
7e229f4
feat(anthropic): add extended thinking support to /v1/messages endpoint
FammasMaz Dec 12, 2025
7aea08e
feat(anthropic): force high thinking budget for Opus models by default
FammasMaz Dec 12, 2025
05d89a2
fix: ensure max_tokens exceeds thinking budget and improve error hand…
FammasMaz Dec 13, 2025
e35f3f0
fix(anthropic): properly close all content blocks in streaming wrapper
FammasMaz Dec 14, 2025
4ec92ec
fix(anthropic): add missing uuid import for /v1/messages endpoint
FammasMaz Dec 14, 2025
b70efdf
fix(anthropic): always set custom_reasoning_budget when thinking is e…
FammasMaz Dec 14, 2025
4bd879b
feat(openai): auto-enable full thinking budget for Opus
FammasMaz Dec 14, 2025
758b4b5
fix(anthropic): add missing JSONResponse import for non-streaming res…
FammasMaz Dec 14, 2025
f2d7288
fix(anthropic): ensure message_start is sent before message_stop in s…
FammasMaz Dec 15, 2025
de88557
feat: add /context endpoint for anthropic routes
FammasMaz Dec 16, 2025
beed0bc
Revert "feat(openai): auto-enable full thinking budget for Opus"
FammasMaz Dec 19, 2025
2c93a68
Revert "fix(anthropic): always set custom_reasoning_budget when think…
FammasMaz Dec 19, 2025
b19526c
refactor: Move Anthropic translation layer to rotator_library
FammasMaz Dec 20, 2025
d91f98b
fix(anthropic): improve model detection and document thinking budget
FammasMaz Dec 20, 2025
16c889f
fix(anthropic): handle images in tool results for Claude Code
FammasMaz Dec 22, 2025
545d0d5
fix(anthropic): force Claude thinking budget and interleaved hint
FammasMaz Dec 31, 2025
765df7a
fix(anthropic): read thinking budget from client request
FammasMaz Dec 31, 2025
5af1f10
fix(anthropic): handle thinking toggle for text-only assistant messages
FammasMaz Jan 1, 2026
0bb8a52
fix(anthropic): strengthen interleaved thinking hint
FammasMaz Jan 1, 2026
991a8e3
fix(antigravity): remove unreachable is_claude condition in thinking …
FammasMaz Jan 1, 2026
354ac17
fix(antigravity): add debug logging for non-data URL images
FammasMaz Jan 1, 2026
b81ca57
fix(anthropic): correct cache token handling in usage responses
FammasMaz Jan 2, 2026
97ef2d1
feat(anthropic): add 5 translation improvements from reference
FammasMaz Jan 2, 2026
dc19691
fix(antigravity): make interleaved thinking hint more explicit
FammasMaz Jan 2, 2026
5a8258c
fix(antigravity): reject requests exceeding Claude's 64K max_tokens l…
FammasMaz Jan 5, 2026
bbc1060
experimental: try to be more explicit about must think instruction
FammasMaz Jan 5, 2026
3fc1436
Merge origin/dev into feature/anthropic-endpoints
FammasMaz Jan 8, 2026
d4ad8af
feat(anthropic): respect explicit thinking_budget from Anthropic routes
FammasMaz Jan 8, 2026
9d568fe
feat(anthropic): always use max thinking budget (31999) for Claude
FammasMaz Jan 8, 2026
67ffea5
fix(anthropic): inject [Continue] for fresh thinking turn when histor…
FammasMaz Jan 8, 2026
b7b5d07
fix(token-count): include Antigravity preprompt tokens in count
FammasMaz Jan 8, 2026
4aa703f
Merge remote-tracking branch 'origin/dev' into feature/anthropic-endp…
FammasMaz Jan 8, 2026
9d4799e
Merge origin/dev into feature/anthropic-endpoints
FammasMaz Jan 9, 2026
49d2e47
fix(antigravity): remove stale interleaved thinking references
FammasMaz Jan 10, 2026
aa88eb3
Merge origin/dev into feature/anthropic-endpoints
Mirrowel Jan 15, 2026
8e10a66
refactor(rotator_library): 🔨 standardize thinking budget mapping and …
Mirrowel Jan 15, 2026
d9f2ddb
feat(logging): ✨ implement nested transaction logging for anthropic c…
Mirrowel Jan 15, 2026
6d9f9cc
fix(anthropic-compat): 🐛 handle null tool_calls in streaming delta
Mirrowel Jan 15, 2026
1798e75
docs: 📚 document anthropic api compatibility layer and client usage
Mirrowel Jan 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
181 changes: 180 additions & 1 deletion src/proxy_app/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import time
import uuid

# Phase 1: Minimal imports for arg parsing and TUI
import asyncio
Expand Down Expand Up @@ -99,7 +100,7 @@
from contextlib import asynccontextmanager
from fastapi import FastAPI, Request, HTTPException, Depends
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse
from fastapi.responses import StreamingResponse, JSONResponse
from fastapi.security import APIKeyHeader

print(" → Loading core dependencies...")
Expand Down Expand Up @@ -213,6 +214,13 @@ class EnrichedModelList(BaseModel):
data: List[EnrichedModelCard]


# --- Anthropic API Models (imported from library) ---
from rotator_library.anthropic_compat import (
AnthropicMessagesRequest,
AnthropicCountTokensRequest,
)


# Calculate total loading time
_elapsed = time.time() - _start_time
print(
Expand Down Expand Up @@ -664,6 +672,27 @@ async def verify_api_key(auth: str = Depends(api_key_header)):
return auth


# --- Anthropic API Key Header ---
anthropic_api_key_header = APIKeyHeader(name="x-api-key", auto_error=False)


async def verify_anthropic_api_key(
x_api_key: str = Depends(anthropic_api_key_header),
auth: str = Depends(api_key_header),
):
"""
Dependency to verify API key for Anthropic endpoints.
Accepts either x-api-key header (Anthropic style) or Authorization Bearer (OpenAI style).
"""
# Check x-api-key first (Anthropic style)
if x_api_key and x_api_key == PROXY_API_KEY:
return x_api_key
# Fall back to Bearer token (OpenAI style)
if auth and auth == f"Bearer {PROXY_API_KEY}":
return auth
raise HTTPException(status_code=401, detail="Invalid or missing API Key")
Comment on lines +703 to +709
Copy link

Copilot AI Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When PROXY_API_KEY is not set or empty (open access mode), this function will always raise an HTTPException because neither condition will match. This is inconsistent with verify_api_key at line 794 which allows access when PROXY_API_KEY is not set. Consider adding a check similar to line 794 to allow open access mode.

Copilot uses AI. Check for mistakes.


async def streaming_response_wrapper(
request: Request,
request_data: dict,
Expand Down Expand Up @@ -963,6 +992,156 @@ async def chat_completions(
raise HTTPException(status_code=500, detail=str(e))


# --- Anthropic Messages API Endpoint ---
@app.post("/v1/messages")
async def anthropic_messages(
request: Request,
body: AnthropicMessagesRequest,
client: RotatingClient = Depends(get_rotating_client),
_=Depends(verify_anthropic_api_key),
):
"""
Anthropic-compatible Messages API endpoint.

Accepts requests in Anthropic's format and returns responses in Anthropic's format.
Internally translates to OpenAI format for processing via LiteLLM.

This endpoint is compatible with Claude Code and other Anthropic API clients.
"""
# Initialize logger if enabled
logger = DetailedLogger() if ENABLE_REQUEST_LOGGING else None

try:
# Log the request to console
log_request_to_console(
url=str(request.url),
headers=dict(request.headers),
client_info=(
request.client.host if request.client else "unknown",
request.client.port if request.client else 0,
),
request_data=body.model_dump(exclude_none=True),
)

# Use the library method to handle the request
result = await client.anthropic_messages(body, raw_request=request)

if body.stream:
# Streaming response
return StreamingResponse(
result,
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no",
},
)
else:
# Non-streaming response
if logger:
logger.log_final_response(
status_code=200,
headers=None,
body=result,
)
return JSONResponse(content=result)

except (
litellm.InvalidRequestError,
ValueError,
litellm.ContextWindowExceededError,
) as e:
error_response = {
"type": "error",
"error": {"type": "invalid_request_error", "message": str(e)},
}
raise HTTPException(status_code=400, detail=error_response)
except litellm.AuthenticationError as e:
error_response = {
"type": "error",
"error": {"type": "authentication_error", "message": str(e)},
}
raise HTTPException(status_code=401, detail=error_response)
except litellm.RateLimitError as e:
error_response = {
"type": "error",
"error": {"type": "rate_limit_error", "message": str(e)},
}
raise HTTPException(status_code=429, detail=error_response)
except (litellm.ServiceUnavailableError, litellm.APIConnectionError) as e:
error_response = {
"type": "error",
"error": {"type": "api_error", "message": str(e)},
}
raise HTTPException(status_code=503, detail=error_response)
except litellm.Timeout as e:
error_response = {
"type": "error",
"error": {"type": "api_error", "message": f"Request timed out: {str(e)}"},
}
raise HTTPException(status_code=504, detail=error_response)
except Exception as e:
logging.error(f"Anthropic messages endpoint error: {e}")
if logger:
logger.log_final_response(
status_code=500,
headers=None,
body={"error": str(e)},
)
error_response = {
"type": "error",
"error": {"type": "api_error", "message": str(e)},
}
raise HTTPException(status_code=500, detail=error_response)


# --- Anthropic Count Tokens Endpoint ---
@app.post("/v1/messages/count_tokens")
async def anthropic_count_tokens(
request: Request,
body: AnthropicCountTokensRequest,
client: RotatingClient = Depends(get_rotating_client),
_=Depends(verify_anthropic_api_key),
):
"""
Anthropic-compatible count_tokens endpoint.

Counts the number of tokens that would be used by a Messages API request.
This is useful for estimating costs and managing context windows.

Accepts requests in Anthropic's format and returns token count in Anthropic's format.
"""
try:
# Use the library method to handle the request
result = await client.anthropic_count_tokens(body)
return JSONResponse(content=result)

except (
litellm.InvalidRequestError,
ValueError,
litellm.ContextWindowExceededError,
) as e:
error_response = {
"type": "error",
"error": {"type": "invalid_request_error", "message": str(e)},
}
raise HTTPException(status_code=400, detail=error_response)
except litellm.AuthenticationError as e:
error_response = {
"type": "error",
"error": {"type": "authentication_error", "message": str(e)},
}
raise HTTPException(status_code=401, detail=error_response)
except Exception as e:
logging.error(f"Anthropic count_tokens endpoint error: {e}")
error_response = {
"type": "error",
"error": {"type": "api_error", "message": str(e)},
}
raise HTTPException(status_code=500, detail=error_response)


@app.post("/v1/embeddings")
async def embeddings(
request: Request,
Expand Down
8 changes: 7 additions & 1 deletion src/rotator_library/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,20 @@
from .providers import PROVIDER_PLUGINS
from .providers.provider_interface import ProviderInterface
from .model_info_service import ModelInfoService, ModelInfo, ModelMetadata
from . import anthropic_compat

__all__ = [
"RotatingClient",
"PROVIDER_PLUGINS",
"ModelInfoService",
"ModelInfo",
"ModelMetadata",
"anthropic_compat",
]


def __getattr__(name):
"""Lazy-load PROVIDER_PLUGINS and ModelInfoService to speed up module import."""
"""Lazy-load PROVIDER_PLUGINS, ModelInfoService, and anthropic_compat to speed up module import."""
if name == "PROVIDER_PLUGINS":
from .providers import PROVIDER_PLUGINS

Expand All @@ -36,4 +38,8 @@ def __getattr__(name):
from .model_info_service import ModelMetadata

return ModelMetadata
if name == "anthropic_compat":
from . import anthropic_compat

return anthropic_compat
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
67 changes: 67 additions & 0 deletions src/rotator_library/anthropic_compat/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""
Anthropic API compatibility module for rotator_library.

This module provides format translation between Anthropic's Messages API
and OpenAI's Chat Completions API, enabling any OpenAI-compatible provider
to work with Anthropic clients like Claude Code.

Usage:
from rotator_library.anthropic_compat import (
AnthropicMessagesRequest,
AnthropicMessagesResponse,
translate_anthropic_request,
openai_to_anthropic_response,
anthropic_streaming_wrapper,
)
"""

from .models import (
AnthropicTextBlock,
AnthropicImageSource,
AnthropicImageBlock,
AnthropicToolUseBlock,
AnthropicToolResultBlock,
AnthropicMessage,
AnthropicTool,
AnthropicThinkingConfig,
AnthropicMessagesRequest,
AnthropicUsage,
AnthropicMessagesResponse,
AnthropicCountTokensRequest,
AnthropicCountTokensResponse,
)

from .translator import (
anthropic_to_openai_messages,
anthropic_to_openai_tools,
anthropic_to_openai_tool_choice,
openai_to_anthropic_response,
translate_anthropic_request,
)

from .streaming import anthropic_streaming_wrapper

__all__ = [
# Models
"AnthropicTextBlock",
"AnthropicImageSource",
"AnthropicImageBlock",
"AnthropicToolUseBlock",
"AnthropicToolResultBlock",
"AnthropicMessage",
"AnthropicTool",
"AnthropicThinkingConfig",
"AnthropicMessagesRequest",
"AnthropicUsage",
"AnthropicMessagesResponse",
"AnthropicCountTokensRequest",
"AnthropicCountTokensResponse",
# Translator functions
"anthropic_to_openai_messages",
"anthropic_to_openai_tools",
"anthropic_to_openai_tool_choice",
"openai_to_anthropic_response",
"translate_anthropic_request",
# Streaming
"anthropic_streaming_wrapper",
]
Loading
Loading