@@ -120,12 +120,17 @@ def run(system_prompt: str, initial_query: str, client, model: str,
120120
121121 if not config .get ('providers' ):
122122 logger .warning ("No providers configured, falling back to original client" )
123+ # Strip stream parameter to force complete response
124+ api_config = dict (request_config or {})
125+ api_config .pop ('stream' , None )
126+
123127 response = client .chat .completions .create (
124128 model = model ,
125129 messages = [
126130 {"role" : "system" , "content" : system_prompt },
127131 {"role" : "user" , "content" : initial_query }
128- ]
132+ ],
133+ ** api_config
129134 )
130135 # Return full response dict to preserve all usage information
131136 response_dict = response .model_dump () if hasattr (response , 'model_dump' ) else response
@@ -204,12 +209,17 @@ def run(system_prompt: str, initial_query: str, client, model: str,
204209 if not supports_system_messages :
205210 logger .info (f"Using fallback message formatting for { model } (no system message support)" )
206211
212+ # Strip stream parameter to force complete response
213+ # server.py will handle converting to SSE streaming format if needed
214+ api_config = dict (request_config or {})
215+ api_config .pop ('stream' , None )
216+
207217 response = proxy_client .chat .completions .create (
208218 model = model ,
209219 messages = messages ,
210- ** ( request_config or {})
220+ ** api_config
211221 )
212-
222+
213223 # Return full response dict to preserve all usage information
214224 response_dict = response .model_dump () if hasattr (response , 'model_dump' ) else response
215225 return response_dict , 0
@@ -218,12 +228,17 @@ def run(system_prompt: str, initial_query: str, client, model: str,
218228 logger .error (f"Proxy plugin error: { e } " , exc_info = True )
219229 # Fallback to original client
220230 logger .info ("Falling back to original client" )
231+ # Strip stream parameter to force complete response
232+ api_config = dict (request_config or {})
233+ api_config .pop ('stream' , None )
234+
221235 response = client .chat .completions .create (
222236 model = model ,
223237 messages = [
224238 {"role" : "system" , "content" : system_prompt },
225239 {"role" : "user" , "content" : initial_query }
226- ]
240+ ],
241+ ** api_config
227242 )
228243 # Return full response dict to preserve all usage information
229244 response_dict = response .model_dump () if hasattr (response , 'model_dump' ) else response
0 commit comments