Skip to content

Commit fd23db3

Browse files
committed
fix
1 parent d1baf93 commit fd23db3

File tree

2 files changed

+25
-3
lines changed

2 files changed

+25
-3
lines changed

optillm/plugins/proxy_plugin.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,9 @@ def run(system_prompt: str, initial_query: str, client, model: str,
5454
{"role": "user", "content": initial_query}
5555
]
5656
)
57-
return response.choices[0].message.content, response.usage.completion_tokens
57+
# Return full response dict to preserve all usage information
58+
response_dict = response.model_dump() if hasattr(response, 'model_dump') else response
59+
return response_dict, 0
5860

5961
# Create or reuse proxy client to maintain state (important for round-robin)
6062
config_key = str(config) # Simple config-based cache key
@@ -128,7 +130,9 @@ def run(system_prompt: str, initial_query: str, client, model: str,
128130
**(request_config or {})
129131
)
130132

131-
return response.choices[0].message.content, response.usage.completion_tokens
133+
# Return full response dict to preserve all usage information
134+
response_dict = response.model_dump() if hasattr(response, 'model_dump') else response
135+
return response_dict, 0
132136

133137
except Exception as e:
134138
logger.error(f"Proxy plugin error: {e}", exc_info=True)
@@ -141,4 +145,6 @@ def run(system_prompt: str, initial_query: str, client, model: str,
141145
{"role": "user", "content": initial_query}
142146
]
143147
)
144-
return response.choices[0].message.content, response.usage.completion_tokens
148+
# Return full response dict to preserve all usage information
149+
response_dict = response.model_dump() if hasattr(response, 'model_dump') else response
150+
return response_dict, 0

optillm/server.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,22 @@ def proxy():
788788

789789
# Handle non-none approaches with n attempts
790790
response, completion_tokens = execute_n_times(n, approaches, operation, system_prompt, initial_query, client, model, request_config, request_id)
791+
792+
# Check if the response is a full dict (like from proxy plugin or none approach)
793+
if operation == 'SINGLE' and isinstance(response, dict) and 'choices' in response and 'usage' in response:
794+
# This is a full response dict, return it directly
795+
if conversation_logger and request_id:
796+
conversation_logger.log_final_response(request_id, response)
797+
conversation_logger.finalize_conversation(request_id)
798+
799+
if stream:
800+
if request_id:
801+
logger.info(f'Request {request_id}: Completed (streaming response)')
802+
return Response(generate_streaming_response(extract_contents(response), model), content_type='text/event-stream')
803+
else:
804+
if request_id:
805+
logger.info(f'Request {request_id}: Completed')
806+
return jsonify(response), 200
791807

792808
except Exception as e:
793809
# Log error to conversation logger if enabled

0 commit comments

Comments
 (0)