Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
18f1dc6
feat(proxy): add Anthropic Messages API endpoint for Claude Code comp…
FammasMaz Dec 10, 2025
eabb999
feat(anthropic): add extended thinking support to /v1/messages endpoint
FammasMaz Dec 12, 2025
fcd33de
feat(anthropic): force high thinking budget for Opus models by default
FammasMaz Dec 12, 2025
4236c19
fix: ensure max_tokens exceeds thinking budget and improve error hand…
FammasMaz Dec 13, 2025
bea58d4
fix(anthropic): properly close all content blocks in streaming wrapper
FammasMaz Dec 14, 2025
a615953
fix(anthropic): add missing uuid import for /v1/messages endpoint
FammasMaz Dec 14, 2025
fca4277
fix(anthropic): always set custom_reasoning_budget when thinking is e…
FammasMaz Dec 14, 2025
63a0d86
feat(openai): auto-enable full thinking budget for Opus
FammasMaz Dec 14, 2025
32c3f21
fix(anthropic): add missing JSONResponse import for non-streaming res…
FammasMaz Dec 14, 2025
7a01e79
fix(anthropic): ensure message_start is sent before message_stop in s…
FammasMaz Dec 15, 2025
1227d8c
feat: add /context endpoint for anthropic routes
FammasMaz Dec 16, 2025
72b612c
Revert "feat(openai): auto-enable full thinking budget for Opus"
FammasMaz Dec 19, 2025
aef492b
Revert "fix(anthropic): always set custom_reasoning_budget when think…
FammasMaz Dec 19, 2025
0e81ead
feat(anthropic): Add Pydantic models and translator for Anthropic format
tiesworkman1025 Dec 19, 2025
bd477ce
feat(anthropic): Add streaming wrapper and module exports
tiesworkman1025 Dec 19, 2025
7396891
feat(client): Add anthropic_messages() and anthropic_count_tokens() m…
tiesworkman1025 Dec 19, 2025
ef25099
feat(exports): Export Anthropic compatibility models from rotator_lib…
tiesworkman1025 Dec 19, 2025
9d30ea6
refactor(main): Use library for Anthropic format translation
tiesworkman1025 Dec 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
298 changes: 297 additions & 1 deletion src/proxy_app/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import time
import uuid

# Phase 1: Minimal imports for arg parsing and TUI
import asyncio
Expand Down Expand Up @@ -99,7 +100,7 @@
from contextlib import asynccontextmanager
from fastapi import FastAPI, Request, HTTPException, Depends
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse
from fastapi.responses import StreamingResponse, JSONResponse
from fastapi.security import APIKeyHeader

print(" → Loading core dependencies...")
Expand All @@ -124,6 +125,18 @@
from rotator_library.credential_manager import CredentialManager
from rotator_library.background_refresher import BackgroundRefresher
from rotator_library.model_info_service import init_model_info_service
from rotator_library.anthropic_compat import (
AnthropicMessagesRequest,
AnthropicMessagesResponse,
AnthropicCountTokensRequest,
AnthropicCountTokensResponse,
anthropic_streaming_wrapper,
anthropic_to_openai_messages,
anthropic_to_openai_tools,
anthropic_to_openai_tool_choice,
openai_to_anthropic_response,
translate_anthropic_request,
)
from proxy_app.request_logger import log_request_to_console
from proxy_app.batch_manager import EmbeddingBatcher
from proxy_app.detailed_logger import DetailedLogger
Expand Down Expand Up @@ -214,6 +227,9 @@ class EnrichedModelList(BaseModel):
data: List[EnrichedModelCard]


# Anthropic API Models are imported from rotator_library.anthropic_compat


# Calculate total loading time
_elapsed = time.time() - _start_time
print(
Expand Down Expand Up @@ -665,6 +681,33 @@ async def verify_api_key(auth: str = Depends(api_key_header)):
return auth


# --- Anthropic API Key Header ---
anthropic_api_key_header = APIKeyHeader(name="x-api-key", auto_error=False)


async def verify_anthropic_api_key(
x_api_key: str = Depends(anthropic_api_key_header),
auth: str = Depends(api_key_header),
):
"""
Dependency to verify API key for Anthropic endpoints.
Accepts either x-api-key header (Anthropic style) or Authorization Bearer (OpenAI style).
"""
# If PROXY_API_KEY is not set or empty, skip verification (open access)
if not PROXY_API_KEY:
return auth or x_api_key
# Check x-api-key first (Anthropic style)
if x_api_key and x_api_key == PROXY_API_KEY:
return x_api_key
# Fall back to Bearer token (OpenAI style)
if auth and auth == f"Bearer {PROXY_API_KEY}":
return auth
raise HTTPException(status_code=401, detail="Invalid or missing API Key")


# Format translation functions are now in rotator_library.anthropic_compat


async def streaming_response_wrapper(
request: Request,
request_data: dict,
Expand Down Expand Up @@ -967,6 +1010,259 @@ async def chat_completions(
raise HTTPException(status_code=500, detail=str(e))


# --- Anthropic Messages API Endpoint ---
@app.post("/v1/messages")
async def anthropic_messages(
request: Request,
body: AnthropicMessagesRequest,
client: RotatingClient = Depends(get_rotating_client),
_=Depends(verify_anthropic_api_key),
):
"""
Anthropic-compatible Messages API endpoint.

Accepts requests in Anthropic's format and returns responses in Anthropic's format.
Internally translates to OpenAI format for processing via LiteLLM.

This endpoint is compatible with Claude Code and other Anthropic API clients.
"""
request_id = f"msg_{uuid.uuid4().hex[:24]}"
original_model = body.model

# Initialize logger if enabled
logger = DetailedLogger() if ENABLE_REQUEST_LOGGING else None

try:
# Convert Anthropic request to OpenAI format
anthropic_request = body.model_dump(exclude_none=True)

openai_messages = anthropic_to_openai_messages(
anthropic_request.get("messages", []), anthropic_request.get("system")
)

openai_tools = anthropic_to_openai_tools(anthropic_request.get("tools"))
openai_tool_choice = anthropic_to_openai_tool_choice(
anthropic_request.get("tool_choice")
)

# Build OpenAI-compatible request
openai_request = {
"model": body.model,
"messages": openai_messages,
"max_tokens": body.max_tokens,
"stream": body.stream or False,
}

if body.temperature is not None:
openai_request["temperature"] = body.temperature
if body.top_p is not None:
openai_request["top_p"] = body.top_p
if body.stop_sequences:
openai_request["stop"] = body.stop_sequences
if openai_tools:
openai_request["tools"] = openai_tools
if openai_tool_choice:
openai_request["tool_choice"] = openai_tool_choice

# Handle Anthropic thinking config -> reasoning_effort translation
if body.thinking:
if body.thinking.type == "enabled":
# Map budget_tokens to reasoning_effort level
# Default to "medium" if enabled but budget not specified
budget = body.thinking.budget_tokens or 10000
if budget >= 32000:
openai_request["reasoning_effort"] = "high"
openai_request["custom_reasoning_budget"] = True
elif budget >= 10000:
openai_request["reasoning_effort"] = "high"
elif budget >= 5000:
openai_request["reasoning_effort"] = "medium"
else:
openai_request["reasoning_effort"] = "low"
elif body.thinking.type == "disabled":
openai_request["reasoning_effort"] = "disable"
elif "opus" in body.model.lower():
# Force high thinking for Opus models when no thinking config is provided
# Opus 4.5 always uses the -thinking variant, so we want maximum thinking budget
# Without this, the backend defaults to thinkingBudget: -1 (auto) instead of high
openai_request["reasoning_effort"] = "high"
openai_request["custom_reasoning_budget"] = True
Comment on lines +1036 to +1089
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, it seems past-me got a bit ahead of himself! I specifically created translate_anthropic_request() in the library to handle this translation, but then proceeded to duplicate the logic here manually.

The thinking budget mapping logic (lines 1067-1089) is nearly identical to what I put in translator.py (lines 283-304). This could lead to drift if one is updated without the other.

I should either:

  1. Use translate_anthropic_request() directly, or
  2. Document why the endpoint needs different handling than the library method


log_request_to_console(
url=str(request.url),
headers=dict(request.headers),
client_info=(
request.client.host if request.client else "unknown",
request.client.port if request.client else 0,
),
request_data=openai_request,
)

if body.stream:
# Streaming response - acompletion returns a generator for streaming
response_generator = client.acompletion(request=request, **openai_request)

return StreamingResponse(
anthropic_streaming_wrapper(
request, response_generator, original_model, request_id
),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no",
},
)
else:
# Non-streaming response
response = await client.acompletion(request=request, **openai_request)

# Convert OpenAI response to Anthropic format
openai_response = (
response.model_dump()
if hasattr(response, "model_dump")
else dict(response)
)
anthropic_response = openai_to_anthropic_response(
openai_response, original_model
)

# Override the ID with our request ID
anthropic_response["id"] = request_id

if logger:
logger.log_final_response(
status_code=200,
headers=None,
body=anthropic_response,
)

return JSONResponse(content=anthropic_response)

except (
litellm.InvalidRequestError,
ValueError,
litellm.ContextWindowExceededError,
) as e:
error_response = {
"type": "error",
"error": {"type": "invalid_request_error", "message": str(e)},
}
raise HTTPException(status_code=400, detail=error_response)
except litellm.AuthenticationError as e:
error_response = {
"type": "error",
"error": {"type": "authentication_error", "message": str(e)},
}
raise HTTPException(status_code=401, detail=error_response)
except litellm.RateLimitError as e:
error_response = {
"type": "error",
"error": {"type": "rate_limit_error", "message": str(e)},
}
raise HTTPException(status_code=429, detail=error_response)
except (litellm.ServiceUnavailableError, litellm.APIConnectionError) as e:
error_response = {
"type": "error",
"error": {"type": "api_error", "message": str(e)},
}
raise HTTPException(status_code=503, detail=error_response)
except litellm.Timeout as e:
error_response = {
"type": "error",
"error": {"type": "api_error", "message": f"Request timed out: {str(e)}"},
}
raise HTTPException(status_code=504, detail=error_response)
except Exception as e:
logging.error(f"Anthropic messages endpoint error: {e}")
if logger:
logger.log_final_response(
status_code=500,
headers=None,
body={"error": str(e)},
)
error_response = {
"type": "error",
"error": {"type": "api_error", "message": str(e)},
}
raise HTTPException(status_code=500, detail=error_response)


# --- Anthropic Count Tokens Endpoint ---
@app.post("/v1/messages/count_tokens")
async def anthropic_count_tokens(
request: Request,
body: AnthropicCountTokensRequest,
client: RotatingClient = Depends(get_rotating_client),
_=Depends(verify_anthropic_api_key),
):
"""
Anthropic-compatible count_tokens endpoint.

Counts the number of tokens that would be used by a Messages API request.
This is useful for estimating costs and managing context windows.

Accepts requests in Anthropic's format and returns token count in Anthropic's format.
"""
try:
# Convert Anthropic request to OpenAI format for token counting
anthropic_request = body.model_dump(exclude_none=True)

openai_messages = anthropic_to_openai_messages(
anthropic_request.get("messages", []), anthropic_request.get("system")
)

# Count tokens for messages
message_tokens = client.token_count(
model=body.model,
messages=openai_messages,
)

# Count tokens for tools if present
tool_tokens = 0
if body.tools:
# Tools add tokens based on their definitions
# Convert to JSON string and count tokens for tool definitions
openai_tools = anthropic_to_openai_tools(
[tool.model_dump() for tool in body.tools]
)
if openai_tools:
# Serialize tools to count their token contribution
tools_text = json.dumps(openai_tools)
tool_tokens = client.token_count(
model=body.model,
text=tools_text,
)

total_tokens = message_tokens + tool_tokens

return JSONResponse(content={"input_tokens": total_tokens})

except (
litellm.InvalidRequestError,
ValueError,
litellm.ContextWindowExceededError,
) as e:
error_response = {
"type": "error",
"error": {"type": "invalid_request_error", "message": str(e)},
}
raise HTTPException(status_code=400, detail=error_response)
except litellm.AuthenticationError as e:
error_response = {
"type": "error",
"error": {"type": "authentication_error", "message": str(e)},
}
raise HTTPException(status_code=401, detail=error_response)
except Exception as e:
logging.error(f"Anthropic count_tokens endpoint error: {e}")
error_response = {
"type": "error",
"error": {"type": "api_error", "message": str(e)},
}
raise HTTPException(status_code=500, detail=error_response)


@app.post("/v1/embeddings")
async def embeddings(
request: Request,
Expand Down
30 changes: 29 additions & 1 deletion src/rotator_library/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,29 @@
from .providers import PROVIDER_PLUGINS
from .providers.provider_interface import ProviderInterface
from .model_info_service import ModelInfoService, ModelInfo, ModelMetadata
from .anthropic_compat import (
AnthropicMessagesRequest,
AnthropicMessagesResponse,
AnthropicCountTokensRequest,
AnthropicCountTokensResponse,
)

__all__ = [
"RotatingClient",
"PROVIDER_PLUGINS",
"ModelInfoService",
"ModelInfo",
"ModelMetadata",
# Anthropic compatibility
"AnthropicMessagesRequest",
"AnthropicMessagesResponse",
"AnthropicCountTokensRequest",
"AnthropicCountTokensResponse",
]


def __getattr__(name):
"""Lazy-load PROVIDER_PLUGINS and ModelInfoService to speed up module import."""
"""Lazy-load PROVIDER_PLUGINS, ModelInfoService, and Anthropic compat to speed up module import."""
if name == "PROVIDER_PLUGINS":
from .providers import PROVIDER_PLUGINS

Expand All @@ -36,4 +47,21 @@ def __getattr__(name):
from .model_info_service import ModelMetadata

return ModelMetadata
# Anthropic compatibility models
if name == "AnthropicMessagesRequest":
from .anthropic_compat import AnthropicMessagesRequest

return AnthropicMessagesRequest
if name == "AnthropicMessagesResponse":
from .anthropic_compat import AnthropicMessagesResponse

return AnthropicMessagesResponse
if name == "AnthropicCountTokensRequest":
from .anthropic_compat import AnthropicCountTokensRequest

return AnthropicCountTokensRequest
if name == "AnthropicCountTokensResponse":
from .anthropic_compat import AnthropicCountTokensResponse

return AnthropicCountTokensResponse
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
Loading
Loading