Skip to content

Commit dba3950

Browse files
authored
Merge pull request #268 from algorithmicsuperintelligence/fix-stream-bug
Fix stream bug
2 parents a5e3588 + af30a03 commit dba3950

File tree

3 files changed

+21
-6
lines changed

3 files changed

+21
-6
lines changed

optillm/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Version information
2-
__version__ = "0.3.4"
2+
__version__ = "0.3.5"
33

44
# Import from server module
55
from .server import (

optillm/plugins/proxy_plugin.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -120,12 +120,17 @@ def run(system_prompt: str, initial_query: str, client, model: str,
120120

121121
if not config.get('providers'):
122122
logger.warning("No providers configured, falling back to original client")
123+
# Strip stream parameter to force complete response
124+
api_config = dict(request_config or {})
125+
api_config.pop('stream', None)
126+
123127
response = client.chat.completions.create(
124128
model=model,
125129
messages=[
126130
{"role": "system", "content": system_prompt},
127131
{"role": "user", "content": initial_query}
128-
]
132+
],
133+
**api_config
129134
)
130135
# Return full response dict to preserve all usage information
131136
response_dict = response.model_dump() if hasattr(response, 'model_dump') else response
@@ -204,12 +209,17 @@ def run(system_prompt: str, initial_query: str, client, model: str,
204209
if not supports_system_messages:
205210
logger.info(f"Using fallback message formatting for {model} (no system message support)")
206211

212+
# Strip stream parameter to force complete response
213+
# server.py will handle converting to SSE streaming format if needed
214+
api_config = dict(request_config or {})
215+
api_config.pop('stream', None)
216+
207217
response = proxy_client.chat.completions.create(
208218
model=model,
209219
messages=messages,
210-
**(request_config or {})
220+
**api_config
211221
)
212-
222+
213223
# Return full response dict to preserve all usage information
214224
response_dict = response.model_dump() if hasattr(response, 'model_dump') else response
215225
return response_dict, 0
@@ -218,12 +228,17 @@ def run(system_prompt: str, initial_query: str, client, model: str,
218228
logger.error(f"Proxy plugin error: {e}", exc_info=True)
219229
# Fallback to original client
220230
logger.info("Falling back to original client")
231+
# Strip stream parameter to force complete response
232+
api_config = dict(request_config or {})
233+
api_config.pop('stream', None)
234+
221235
response = client.chat.completions.create(
222236
model=model,
223237
messages=[
224238
{"role": "system", "content": system_prompt},
225239
{"role": "user", "content": initial_query}
226-
]
240+
],
241+
**api_config
227242
)
228243
# Return full response dict to preserve all usage information
229244
response_dict = response.model_dump() if hasattr(response, 'model_dump') else response

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "optillm"
7-
version = "0.3.4"
7+
version = "0.3.5"
88
description = "An optimizing inference proxy for LLMs."
99
readme = "README.md"
1010
license = "Apache-2.0"

0 commit comments

Comments
 (0)