Skip to content

Commit 7f882b4

Browse files
committed
Implement multiple PR features: newer OpenAI models, enhanced conversion, passthrough mode, Docker improvements
This commit implements changes from PRs fuergaosi233#35, fuergaosi233#24, fuergaosi233#39, fuergaosi233#37, and fuergaosi233#32: ## PR fuergaosi233#35 + fuergaosi233#24: OpenAI Newer Models Support (o1, o3, o4, gpt-5) - Add `is_newer_openai_model()` method to ModelManager to detect o1, o3, o4, gpt-5 - Add `is_o3_model()` method for specific o3 model detection - Use `max_completion_tokens` instead of `max_tokens` for newer models - Enforce `temperature=1` for newer OpenAI reasoning models - Update test-connection endpoint to handle newer models correctly - Fix compatibility with OpenAI's newer model API requirements ## PR fuergaosi233#39: Enhanced Conversion & Error Handling - Add comprehensive input validation to request converter - Validate Claude request structure before processing - Add enhanced error handling in response converter - Validate OpenAI response structure with detailed error messages - Add prompt cache support for non-streaming responses - Extract and include `cache_read_input_tokens` from prompt_tokens_details - Improve error messages for debugging ## PR fuergaosi233#37: Multi-Tenant Passthrough Mode - Add passthrough mode support for multi-tenant deployments - Make OPENAI_API_KEY optional - enable passthrough mode when not set - Add per-request API key support via `openai-api-key` header - Update OpenAIClient to accept per-request API keys - Add API key format validation (sk- prefix, minimum length) - Update endpoints to extract and validate OpenAI API keys from headers - Support both proxy mode (server key) and passthrough mode (user keys) - Enable separate billing per tenant in passthrough mode ## PR fuergaosi233#32: Docker Improvements - Add comprehensive .dockerignore file - Exclude development files, tests, and sensitive data from Docker builds - Reduce Docker image size by excluding unnecessary files - Improve build performance with proper ignore patterns ## Additional Improvements - Better error messages and logging throughout - Enhanced API key validation - Improved request/response handling All changes maintain backward compatibility with existing deployments.
1 parent e2dd942 commit 7f882b4

File tree

7 files changed

+331
-60
lines changed

7 files changed

+331
-60
lines changed

.dockerignore

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# Git files
2+
.git
3+
.gitignore
4+
.gitattributes
5+
6+
# Python cache
7+
__pycache__
8+
*.py[cod]
9+
*$py.class
10+
*.so
11+
.Python
12+
13+
# Virtual environments
14+
venv/
15+
env/
16+
ENV/
17+
.venv
18+
19+
# IDE
20+
.vscode/
21+
.idea/
22+
*.swp
23+
*.swo
24+
*~
25+
26+
# Environment files (keep .env.example but exclude actual .env)
27+
.env
28+
.env.local
29+
.env.*.local
30+
31+
# Test and development
32+
tests/
33+
*.pytest_cache/
34+
.coverage
35+
htmlcov/
36+
.tox/
37+
dist/
38+
build/
39+
*.egg-info/
40+
41+
# Documentation
42+
docs/
43+
*.md
44+
!README.md
45+
46+
# Logs and databases
47+
*.log
48+
*.db
49+
*.sqlite
50+
*.sqlite3
51+
usage_tracking.db
52+
53+
# Temporary files
54+
*.tmp
55+
*.temp
56+
.DS_Store
57+
Thumbs.db
58+
59+
# CI/CD
60+
.github/
61+
.gitlab-ci.yml
62+
.travis.yml
63+
64+
# Docker files (no need to include in image)
65+
Dockerfile*
66+
docker-compose*.yml
67+
.dockerignore
68+
69+
# Configuration modes
70+
modes.json
71+
72+
# Scripts (unless needed at runtime)
73+
scripts/

src/api/endpoints.py

Lines changed: 74 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -53,37 +53,72 @@
5353
# Configure per-model clients for hybrid deployments
5454
openai_client.configure_per_model_clients(config)
5555

56-
async def validate_api_key(x_api_key: Optional[str] = Header(None), authorization: Optional[str] = Header(None)):
57-
"""Validate the client's API key from either x-api-key header or Authorization header."""
56+
async def validate_and_extract_api_key(
57+
x_api_key: Optional[str] = Header(None),
58+
authorization: Optional[str] = Header(None),
59+
openai_api_key: Optional[str] = Header(None, alias="openai-api-key")
60+
) -> Optional[str]:
61+
"""
62+
Validate and extract API keys based on operating mode.
63+
64+
Returns:
65+
OpenAI API key to use for the request (None in proxy mode)
66+
67+
Raises:
68+
HTTPException: If validation fails
69+
"""
5870
client_api_key = None
71+
openai_key = None
5972

60-
# Extract API key from headers
73+
# Extract Anthropic API key from headers (for Claude client validation)
6174
if x_api_key:
6275
client_api_key = x_api_key
6376
logger.debug(f"API key from x-api-key header: {client_api_key[:10]}...")
6477
elif authorization and authorization.startswith("Bearer "):
6578
client_api_key = authorization.replace("Bearer ", "")
6679
logger.debug(f"API key from Authorization header: {client_api_key[:10]}...")
6780

68-
# Skip validation if ANTHROPIC_API_KEY is not set in the environment
69-
if not config.anthropic_api_key:
70-
logger.debug("ANTHROPIC_API_KEY not set, skipping client validation")
71-
return
81+
# Extract OpenAI API key from headers (for passthrough mode)
82+
if openai_api_key:
83+
openai_key = openai_api_key
84+
logger.debug(f"OpenAI API key from header: {openai_key[:10]}...")
85+
86+
# Passthrough mode: Require OpenAI API key from user
87+
if config.passthrough_mode:
88+
if not openai_key:
89+
raise HTTPException(
90+
status_code=401,
91+
detail="Passthrough mode: Please provide your OpenAI API key via 'openai-api-key' header"
92+
)
7293

73-
logger.debug(f"Expected ANTHROPIC_API_KEY: {config.anthropic_api_key}")
94+
# Validate OpenAI API key format
95+
if not config.validate_api_key(openai_key):
96+
raise HTTPException(
97+
status_code=401,
98+
detail="Invalid OpenAI API key format. Key must start with 'sk-' and be at least 20 characters"
99+
)
74100

75-
# Validate the client API key
76-
if not client_api_key or not config.validate_client_api_key(client_api_key):
77-
logger.warning(f"Invalid API key provided by client. Expected: {config.anthropic_api_key}, Got: {client_api_key[:10] if client_api_key else 'None'}...")
78-
raise HTTPException(
79-
status_code=401,
80-
detail="Invalid API key. Please provide a valid Anthropic API key."
81-
)
101+
logger.debug("Passthrough mode: OpenAI API key validated")
102+
return openai_key
82103

83-
logger.debug("API key validation passed")
104+
# Proxy mode: Validate Anthropic client key if configured
105+
if config.anthropic_api_key:
106+
if not client_api_key or not config.validate_client_api_key(client_api_key):
107+
logger.warning(f"Invalid API key provided by client. Expected: {config.anthropic_api_key}, Got: {client_api_key[:10] if client_api_key else 'None'}...")
108+
raise HTTPException(
109+
status_code=401,
110+
detail="Invalid API key. Please provide a valid Anthropic API key."
111+
)
112+
logger.debug("Proxy mode: Anthropic API key validation passed")
113+
114+
return None # Proxy mode: use server-configured API key
84115

85116
@router.post("/v1/messages")
86-
async def create_message(request: ClaudeMessagesRequest, http_request: Request, _: None = Depends(validate_api_key)):
117+
async def create_message(
118+
request: ClaudeMessagesRequest,
119+
http_request: Request,
120+
openai_api_key: Optional[str] = Depends(validate_and_extract_api_key)
121+
):
87122
request_start_time = time.time()
88123
request_id = str(uuid.uuid4())
89124

@@ -204,7 +239,7 @@ async def create_message(request: ClaudeMessagesRequest, http_request: Request,
204239
logger.debug(f"Starting streaming response for request_id: {request_id}")
205240
try:
206241
openai_stream = openai_client.create_chat_completion_stream(
207-
openai_request, request_id, config
242+
openai_request, request_id, config, api_key=openai_api_key
208243
)
209244
logger.debug(f"OpenAI stream created for request_id: {request_id}")
210245
return StreamingResponse(
@@ -251,7 +286,7 @@ async def create_message(request: ClaudeMessagesRequest, http_request: Request,
251286
# Non-streaming response
252287
logger.debug(f"Starting non-streaming response for request_id: {request_id}")
253288
openai_response = await openai_client.create_chat_completion(
254-
openai_request, request_id, config
289+
openai_request, request_id, config, api_key=openai_api_key
255290
)
256291
logger.debug(f"OpenAI response received for request_id: {request_id}")
257292

@@ -383,7 +418,10 @@ async def create_message(request: ClaudeMessagesRequest, http_request: Request,
383418

384419

385420
@router.post("/v1/messages/count_tokens")
386-
async def count_tokens(request: ClaudeTokenCountRequest, _: None = Depends(validate_api_key)):
421+
async def count_tokens(
422+
request: ClaudeTokenCountRequest,
423+
openai_api_key: Optional[str] = Depends(validate_and_extract_api_key)
424+
):
387425
try:
388426
# For token counting, we'll use a simple estimation
389427
# In a real implementation, you might want to use tiktoken or similar
@@ -437,12 +475,23 @@ async def test_connection():
437475
"""Test API connectivity to OpenAI"""
438476
try:
439477
# Simple test request to verify API connectivity
478+
# Check if the test model is a newer OpenAI model
479+
is_newer_model = model_manager.is_newer_openai_model(config.small_model)
480+
481+
test_request = {
482+
"model": config.small_model,
483+
"messages": [{"role": "user", "content": "Hello"}],
484+
}
485+
486+
# Newer OpenAI models (o1, o3, o4, gpt-5) require max_completion_tokens and temperature=1
487+
if is_newer_model:
488+
test_request["max_completion_tokens"] = 200
489+
test_request["temperature"] = 1
490+
else:
491+
test_request["max_tokens"] = 5
492+
440493
test_response = await openai_client.create_chat_completion(
441-
{
442-
"model": config.small_model,
443-
"messages": [{"role": "user", "content": "Hello"}],
444-
"max_tokens": 5,
445-
},
494+
test_request,
446495
config=config
447496
)
448497

src/conversion/request_converter.py

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,20 @@ def _apply_reasoning_config(
127127
def convert_claude_to_openai(
128128
claude_request: ClaudeMessagesRequest, model_manager
129129
) -> Dict[str, Any]:
130-
"""Convert Claude API request format to OpenAI format."""
130+
"""Convert Claude API request format to OpenAI format with enhanced validation."""
131+
132+
# Validate input request
133+
if not claude_request:
134+
raise ValueError("Claude request cannot be None")
135+
136+
if not claude_request.messages:
137+
raise ValueError("Claude request must contain at least one message")
138+
139+
if not isinstance(claude_request.messages, list):
140+
raise ValueError("Claude request messages must be a list")
141+
142+
if claude_request.max_tokens < 1:
143+
raise ValueError(f"max_tokens must be at least 1, got {claude_request.max_tokens}")
131144

132145
# Parse model name and extract reasoning configuration
133146
openai_model, reasoning_config = model_manager.parse_and_map_model(claude_request.model)
@@ -192,17 +205,31 @@ def convert_claude_to_openai(
192205
i += 1
193206

194207
# Build OpenAI request
208+
# Check if this is a newer OpenAI model (o1, o3, o4, gpt-5)
209+
is_newer_model = model_manager.is_newer_openai_model(openai_model)
210+
211+
# Calculate token limit
212+
token_limit = min(
213+
max(claude_request.max_tokens, config.min_tokens_limit),
214+
config.max_tokens_limit,
215+
)
216+
195217
openai_request = {
196218
"model": openai_model,
197219
"messages": openai_messages,
198-
"max_tokens": min(
199-
max(claude_request.max_tokens, config.min_tokens_limit),
200-
config.max_tokens_limit,
201-
),
202-
"temperature": claude_request.temperature,
203220
"stream": claude_request.stream,
204221
}
205-
logger.debug(f"Converted request: model={openai_model}, messages={len(openai_messages)}, max_tokens={openai_request['max_tokens']}")
222+
223+
# Newer OpenAI models (o1, o3, o4, gpt-5) require max_completion_tokens instead of max_tokens
224+
if is_newer_model:
225+
openai_request["max_completion_tokens"] = token_limit
226+
# Newer reasoning models require temperature=1
227+
openai_request["temperature"] = 1
228+
logger.debug(f"Converted request (newer model): model={openai_model}, messages={len(openai_messages)}, max_completion_tokens={token_limit}, temperature=1")
229+
else:
230+
openai_request["max_tokens"] = token_limit
231+
openai_request["temperature"] = claude_request.temperature
232+
logger.debug(f"Converted request: model={openai_model}, messages={len(openai_messages)}, max_tokens={token_limit}")
206233
# Add optional parameters
207234
if claude_request.stop_sequences:
208235
openai_request["stop"] = claude_request.stop_sequences

src/conversion/response_converter.py

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,36 @@
88
def convert_openai_to_claude_response(
99
openai_response: dict, original_request: ClaudeMessagesRequest
1010
) -> dict:
11-
"""Convert OpenAI response to Claude format."""
11+
"""Convert OpenAI response to Claude format with enhanced error handling."""
1212

13-
# Extract response data
13+
# Validate response structure
14+
if not isinstance(openai_response, dict):
15+
raise HTTPException(
16+
status_code=500,
17+
detail="Invalid OpenAI response format: expected dictionary"
18+
)
19+
20+
# Extract response data with validation
1421
choices = openai_response.get("choices", [])
1522
if not choices:
16-
raise HTTPException(status_code=500, detail="No choices in OpenAI response")
23+
raise HTTPException(
24+
status_code=500,
25+
detail="No choices in OpenAI response"
26+
)
27+
28+
if not isinstance(choices, list):
29+
raise HTTPException(
30+
status_code=500,
31+
detail="Invalid choices format in OpenAI response"
32+
)
1733

1834
choice = choices[0]
35+
if not isinstance(choice, dict):
36+
raise HTTPException(
37+
status_code=500,
38+
detail="Invalid choice format in OpenAI response"
39+
)
40+
1941
message = choice.get("message", {})
2042

2143
# Build Claude content blocks
@@ -58,7 +80,20 @@ def convert_openai_to_claude_response(
5880
"function_call": Constants.STOP_TOOL_USE,
5981
}.get(finish_reason, Constants.STOP_END_TURN)
6082

61-
# Build Claude response
83+
# Build Claude response with prompt cache support
84+
usage = openai_response.get("usage", {})
85+
usage_data = {
86+
"input_tokens": usage.get("prompt_tokens", 0),
87+
"output_tokens": usage.get("completion_tokens", 0),
88+
}
89+
90+
# Add prompt cache tokens if available (OpenAI prompt caching)
91+
prompt_tokens_details = usage.get("prompt_tokens_details", {})
92+
if prompt_tokens_details:
93+
cache_read_input_tokens = prompt_tokens_details.get("cached_tokens", 0)
94+
if cache_read_input_tokens > 0:
95+
usage_data["cache_read_input_tokens"] = cache_read_input_tokens
96+
6297
claude_response = {
6398
"id": openai_response.get("id", f"msg_{uuid.uuid4()}"),
6499
"type": "message",
@@ -67,12 +102,7 @@ def convert_openai_to_claude_response(
67102
"content": content_blocks,
68103
"stop_reason": stop_reason,
69104
"stop_sequence": None,
70-
"usage": {
71-
"input_tokens": openai_response.get("usage", {}).get("prompt_tokens", 0),
72-
"output_tokens": openai_response.get("usage", {}).get(
73-
"completion_tokens", 0
74-
),
75-
},
105+
"usage": usage_data,
76106
}
77107

78108
return claude_response

0 commit comments

Comments
 (0)