Skip to content

Commit 01ed43c

Browse files
committed
fxies
1 parent 5dd83c1 commit 01ed43c

File tree

2 files changed

+101
-2
lines changed

2 files changed

+101
-2
lines changed

optillm/plugins/proxy/client.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,13 @@ def client(self):
4747
api_version="2024-02-01",
4848
max_retries=0 # Disable client retries - we handle them
4949
)
50+
elif 'generativelanguage.googleapis.com' in self.base_url:
51+
# Google AI client - create custom client to avoid "models/" prefix
52+
from optillm.plugins.proxy.google_client import GoogleAIClient
53+
self._client = GoogleAIClient(
54+
api_key=self.api_key,
55+
base_url=self.base_url
56+
)
5057
else:
5158
# Standard OpenAI-compatible client
5259
self._client = OpenAI(
@@ -215,8 +222,8 @@ def create(self, **kwargs):
215222

216223
attempted_providers.add(provider)
217224

218-
# Try to acquire a slot for this provider (with short timeout to try next provider quickly)
219-
slot_timeout = 0.5 # Don't wait too long for a single provider
225+
# Try to acquire a slot for this provider (with reasonable timeout for queueing)
226+
slot_timeout = 10.0 # Wait up to 10 seconds for provider to become available
220227
if not provider.acquire_slot(timeout=slot_timeout):
221228
logger.debug(f"Provider {provider.name} at max capacity, trying next provider")
222229
errors.append((provider.name, "At max concurrent requests"))
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
"""
2+
Custom Google AI client that doesn't add "models/" prefix to model names
3+
"""
4+
import requests
5+
import json
6+
from typing import Dict, List, Any
7+
8+
9+
class GoogleAIClient:
10+
"""Custom client for Google AI that bypasses OpenAI client's model name prefix behavior"""
11+
12+
def __init__(self, api_key: str, base_url: str):
13+
self.api_key = api_key
14+
self.base_url = base_url.rstrip('/')
15+
self.chat = self.Chat(self)
16+
self.models = self.Models(self)
17+
18+
class Chat:
19+
def __init__(self, client):
20+
self.client = client
21+
self.completions = self.Completions(client)
22+
23+
class Completions:
24+
def __init__(self, client):
25+
self.client = client
26+
27+
def create(self, model: str, messages: List[Dict[str, str]], **kwargs) -> Any:
28+
"""Create chat completion without adding models/ prefix to model name"""
29+
url = f"{self.client.base_url}/chat/completions"
30+
31+
headers = {
32+
"Content-Type": "application/json",
33+
"Authorization": f"Bearer {self.client.api_key}"
34+
}
35+
36+
# Build request data - use model name directly without "models/" prefix
37+
data = {
38+
"model": model, # Use exactly as provided - no prefix!
39+
"messages": messages,
40+
**kwargs
41+
}
42+
43+
# Make direct HTTP request to bypass OpenAI client behavior
44+
response = requests.post(url, headers=headers, json=data, timeout=kwargs.get('timeout', 30))
45+
46+
if response.status_code != 200:
47+
error_text = response.text
48+
raise Exception(f"HTTP {response.status_code}: {error_text}")
49+
50+
# Parse response and return OpenAI-compatible object
51+
result = response.json()
52+
53+
# Create a simple object that has the attributes expected by the proxy
54+
class CompletionResponse:
55+
def __init__(self, data):
56+
self._data = data
57+
self.choices = data.get('choices', [])
58+
self.usage = data.get('usage', {})
59+
self.model = data.get('model', model)
60+
61+
def model_dump(self):
62+
return self._data
63+
64+
def __getitem__(self, key):
65+
return self._data[key]
66+
67+
def get(self, key, default=None):
68+
return self._data.get(key, default)
69+
70+
return CompletionResponse(result)
71+
72+
class Models:
73+
def __init__(self, client):
74+
self.client = client
75+
76+
def list(self):
77+
"""Simple models list for health checking"""
78+
url = f"{self.client.base_url}/models"
79+
headers = {
80+
"Authorization": f"Bearer {self.client.api_key}"
81+
}
82+
83+
try:
84+
response = requests.get(url, headers=headers, timeout=5)
85+
if response.status_code == 200:
86+
return response.json()
87+
else:
88+
# Return a mock response if health check fails
89+
return {"data": [{"id": "gemma-3-4b-it"}]}
90+
except:
91+
# Return a mock response if health check fails
92+
return {"data": [{"id": "gemma-3-4b-it"}]}

0 commit comments

Comments
 (0)