Skip to content

Commit 4a8f10c

Browse files
committed
fixes
1 parent 23cbb91 commit 4a8f10c

File tree

5 files changed

+71
-18
lines changed

5 files changed

+71
-18
lines changed

optillm/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Version information
2-
__version__ = "0.2.3"
2+
__version__ = "0.2.4"
33

44
# Import from server module
55
from .server import (

optillm/plugins/proxy/README.md

Lines changed: 53 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,11 @@ optillm
5555
optillm --port 8000
5656
```
5757

58-
> **Note**: The `--approach proxy` flag is not currently supported. Use the model prefix method below.
58+
> **Note**: The `--approach proxy` flag is not currently supported in the command-line interface.
5959
6060
### 3. Usage Examples
6161

62-
#### Using Model Prefix (Currently the only working method)
62+
#### Method 1: Using Model Prefix
6363
```bash
6464
# Use "proxy-" prefix to activate the proxy plugin
6565
curl -X POST http://localhost:8000/v1/chat/completions \
@@ -68,16 +68,26 @@ curl -X POST http://localhost:8000/v1/chat/completions \
6868
"model": "proxy-gpt-4",
6969
"messages": [{"role": "user", "content": "Hello"}]
7070
}'
71+
```
7172

72-
# The proxy will:
73-
# 1. Route to one of your configured providers
74-
# 2. Apply model mapping if configured
75-
# 3. Handle failover automatically
73+
#### Method 2: Using extra_body (Recommended for SDK usage)
74+
```bash
75+
# Use extra_body parameter
76+
curl -X POST http://localhost:8000/v1/chat/completions \
77+
-H "Content-Type: application/json" \
78+
-d '{
79+
"model": "gpt-4",
80+
"messages": [{"role": "user", "content": "Hello"}],
81+
"extra_body": {
82+
"optillm_approach": "proxy"
83+
}
84+
}'
7685
```
7786

78-
> **Known Issues**:
79-
> - `--approach proxy` flag: Not supported in command-line interface
80-
> - `extra_body` method: Currently broken due to parsing bug in server code
87+
Both methods will:
88+
- Route to one of your configured providers
89+
- Apply model mapping if configured
90+
- Handle failover automatically
8191

8292
#### Combined Approaches
8393
```bash
@@ -90,7 +100,20 @@ curl -X POST http://localhost:8000/v1/chat/completions \
90100
}'
91101
```
92102

93-
> **Note**: The proxy wrapping functionality (`proxy_wrap`) is currently not accessible via the working model prefix method. This would require the `extra_body` approach which is currently broken.
103+
#### Proxy Wrapping Other Approaches
104+
```bash
105+
# Use proxy to wrap MOA approach
106+
curl -X POST http://localhost:8000/v1/chat/completions \
107+
-H "Content-Type: application/json" \
108+
-d '{
109+
"model": "gpt-4",
110+
"messages": [{"role": "user", "content": "Solve this problem"}],
111+
"extra_body": {
112+
"optillm_approach": "proxy",
113+
"proxy_wrap": "moa"
114+
}
115+
}'
116+
```
94117

95118
## Configuration Reference
96119

@@ -314,11 +337,30 @@ client = OpenAI(
314337
api_key="dummy" # Can be any string when using proxy
315338
)
316339
317-
# Use proxy with model prefix (currently the only working method)
340+
# Method 1: Use proxy with model prefix
318341
response = client.chat.completions.create(
319342
model="proxy-gpt-4", # Use "proxy-" prefix
320343
messages=[{"role": "user", "content": "Hello"}]
321344
)
345+
346+
# Method 2: Use extra_body (recommended)
347+
response = client.chat.completions.create(
348+
model="gpt-4",
349+
messages=[{"role": "user", "content": "Hello"}],
350+
extra_body={
351+
"optillm_approach": "proxy"
352+
}
353+
)
354+
355+
# Method 3: Proxy wrapping another approach
356+
response = client.chat.completions.create(
357+
model="gpt-4",
358+
messages=[{"role": "user", "content": "Hello"}],
359+
extra_body={
360+
"optillm_approach": "proxy",
361+
"proxy_wrap": "moa"
362+
}
363+
)
322364
```
323365

324366
### With LangChain

optillm/plugins/proxy/client.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,15 @@ class _Completions:
114114
def __init__(self, proxy_client):
115115
self.proxy_client = proxy_client
116116

117+
def _filter_kwargs(self, kwargs: dict) -> dict:
118+
"""Filter out OptiLLM-specific parameters that shouldn't be sent to providers"""
119+
optillm_params = {
120+
'optillm_approach', 'proxy_wrap', 'wrapped_approach', 'wrap',
121+
'mcts_simulations', 'mcts_exploration', 'mcts_depth',
122+
'best_of_n', 'rstar_max_depth', 'rstar_num_rollouts', 'rstar_c'
123+
}
124+
return {k: v for k, v in kwargs.items() if k not in optillm_params}
125+
117126
def create(self, **kwargs):
118127
"""Create completion with load balancing and failover"""
119128
model = kwargs.get('model', 'unknown')
@@ -145,8 +154,8 @@ def create(self, **kwargs):
145154
attempted_providers.add(provider)
146155

147156
try:
148-
# Map model name if needed
149-
request_kwargs = kwargs.copy()
157+
# Map model name if needed and filter out OptiLLM-specific parameters
158+
request_kwargs = self._filter_kwargs(kwargs.copy())
150159
request_kwargs['model'] = provider.map_model(model)
151160

152161
# Track timing
@@ -177,7 +186,7 @@ def create(self, **kwargs):
177186
if self.proxy_client.fallback_client:
178187
logger.warning("All proxy providers failed, using fallback client")
179188
try:
180-
return self.proxy_client.fallback_client.chat.completions.create(**kwargs)
189+
return self.proxy_client.fallback_client.chat.completions.create(**self._filter_kwargs(kwargs))
181190
except Exception as e:
182191
errors.append(("fallback_client", str(e)))
183192

optillm/server.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -992,12 +992,14 @@ def main():
992992
global request_batcher
993993
global conversation_logger
994994
# Call this function at the start of main()
995+
996+
# Load plugins first so they're available in argument parser
997+
load_plugins()
998+
995999
args = parse_args()
9961000
# Update server_config with all argument values
9971001
server_config.update(vars(args))
9981002

999-
load_plugins()
1000-
10011003
port = server_config['port']
10021004

10031005
# Initialize request batcher if batch mode is enabled

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "optillm"
7-
version = "0.2.3"
7+
version = "0.2.4"
88
description = "An optimizing inference proxy for LLMs."
99
readme = "README.md"
1010
license = "Apache-2.0"

0 commit comments

Comments
 (0)