@@ -246,11 +246,10 @@ def execute_single_approach(approach, system_prompt, initial_query, client, mode
246246 if hasattr (request , 'json' ):
247247 data = request .get_json ()
248248 messages = data .get ('messages' , [])
249- # Copy all parameters except 'model' and 'messages'
249+ # Copy all parameters except 'stream', ' model' , 'n ' and 'messages'
250250 kwargs = {k : v for k , v in data .items ()
251- if k not in ['model' , 'messages' , 'optillm_approach' ]}
251+ if k not in ['model' , 'messages' , 'stream' , 'n' , ' optillm_approach' ]}
252252 response = none_approach (original_messages = messages , client = client , model = model , ** kwargs )
253-
254253 # For none approach, we return the response and a token count of 0
255254 # since the full token count is already in the response
256255 return response , 0
@@ -369,6 +368,22 @@ def generate_streaming_response(final_response, model):
369368 # Yield the final message to indicate the stream has ended
370369 yield "data: [DONE]\n \n "
371370
371+ def extract_contents (response_obj ):
372+ contents = []
373+ # Handle both single response and list of responses
374+ responses = response_obj if isinstance (response_obj , list ) else [response_obj ]
375+
376+ for response in responses :
377+ # Extract content from first choice if it exists
378+ if (response .get ('choices' ) and
379+ len (response ['choices' ]) > 0 and
380+ response ['choices' ][0 ].get ('message' ) and
381+ response ['choices' ][0 ]['message' ].get ('content' )):
382+ contents .append (response ['choices' ][0 ]['message' ]['content' ])
383+
384+ # Return single string if only one content, otherwise return list
385+ return contents [0 ] if len (contents ) == 1 else contents
386+
372387def parse_conversation (messages ):
373388 system_prompt = ""
374389 conversation = []
@@ -523,8 +538,13 @@ def proxy():
523538 result = responses
524539 else :
525540 result , completion_tokens = execute_single_approach (approaches [0 ], system_prompt , initial_query , client , model )
541+
526542 logger .debug (f'Direct proxy response: { result } ' )
527- return jsonify (result ), 200
543+
544+ if stream :
545+ return Response (generate_streaming_response (extract_contents (result ), model ), content_type = 'text/event-stream' )
546+ else :
547+ return jsonify (result ), 200
528548
529549 elif operation == 'AND' or operation == 'OR' :
530550 if contains_none :
@@ -545,7 +565,7 @@ def proxy():
545565 messages = tagged_conversation_to_messages (response )
546566 if messages : # Only take the last message if we have any
547567 response = messages [- 1 ]['content' ]
548-
568+
549569 if stream :
550570 return Response (generate_streaming_response (response , model ), content_type = 'text/event-stream' )
551571 else :
0 commit comments