Skip to content

Commit d540ac2

Browse files
committed
Fix streaming response
- yield response to allow clients that expect streaming
1 parent ed00688 commit d540ac2

File tree

1 file changed

+25
-5
lines changed

1 file changed

+25
-5
lines changed

optillm.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -246,11 +246,10 @@ def execute_single_approach(approach, system_prompt, initial_query, client, mode
246246
if hasattr(request, 'json'):
247247
data = request.get_json()
248248
messages = data.get('messages', [])
249-
# Copy all parameters except 'model' and 'messages'
249+
# Copy all parameters except 'stream', 'model' , 'n' and 'messages'
250250
kwargs = {k: v for k, v in data.items()
251-
if k not in ['model', 'messages', 'optillm_approach']}
251+
if k not in ['model', 'messages', 'stream', 'n', 'optillm_approach']}
252252
response = none_approach(original_messages=messages, client=client, model=model, **kwargs)
253-
254253
# For none approach, we return the response and a token count of 0
255254
# since the full token count is already in the response
256255
return response, 0
@@ -369,6 +368,22 @@ def generate_streaming_response(final_response, model):
369368
# Yield the final message to indicate the stream has ended
370369
yield "data: [DONE]\n\n"
371370

371+
def extract_contents(response_obj):
372+
contents = []
373+
# Handle both single response and list of responses
374+
responses = response_obj if isinstance(response_obj, list) else [response_obj]
375+
376+
for response in responses:
377+
# Extract content from first choice if it exists
378+
if (response.get('choices') and
379+
len(response['choices']) > 0 and
380+
response['choices'][0].get('message') and
381+
response['choices'][0]['message'].get('content')):
382+
contents.append(response['choices'][0]['message']['content'])
383+
384+
# Return single string if only one content, otherwise return list
385+
return contents[0] if len(contents) == 1 else contents
386+
372387
def parse_conversation(messages):
373388
system_prompt = ""
374389
conversation = []
@@ -523,8 +538,13 @@ def proxy():
523538
result = responses
524539
else:
525540
result, completion_tokens = execute_single_approach(approaches[0], system_prompt, initial_query, client, model)
541+
526542
logger.debug(f'Direct proxy response: {result}')
527-
return jsonify(result), 200
543+
544+
if stream:
545+
return Response(generate_streaming_response(extract_contents(result), model), content_type='text/event-stream')
546+
else :
547+
return jsonify(result), 200
528548

529549
elif operation == 'AND' or operation == 'OR':
530550
if contains_none:
@@ -545,7 +565,7 @@ def proxy():
545565
messages = tagged_conversation_to_messages(response)
546566
if messages: # Only take the last message if we have any
547567
response = messages[-1]['content']
548-
568+
549569
if stream:
550570
return Response(generate_streaming_response(response, model), content_type='text/event-stream')
551571
else:

0 commit comments

Comments
 (0)