11from __future__ import annotations
22
33import json
4+ import datetime
45import time
56from typing import Any , Dict , List
67
7- from flask import Blueprint , Response , current_app , jsonify , make_response , request
8+ from flask import Blueprint , Response , current_app , jsonify , make_response , request , stream_with_context
89
910from .config import BASE_INSTRUCTIONS
1011from .http import build_cors_headers
@@ -160,7 +161,8 @@ def ollama_chat() -> Response:
160161 upstream .status_code ,
161162 )
162163
163- created_at = str (int (time .time () * 1000 ))
164+ created_at = datetime .datetime .utcnow ().strftime ("%Y-%m-%dT%H:%M:%SZ" )
165+ model_out = normalize_model_name (model )
164166
165167 if stream_req :
166168 def _gen ():
@@ -169,6 +171,7 @@ def _gen():
169171 think_closed = False
170172 saw_any_summary = False
171173 pending_summary_paragraph = False
174+ full_parts : List [str ] = []
172175 try :
173176 for raw_line in upstream .iter_lines (decode_unicode = False ):
174177 if not raw_line :
@@ -196,31 +199,134 @@ def _gen():
196199 delta_txt = evt .get ("delta" ) or ""
197200 if compat == "o3" :
198201 if kind == "response.reasoning_summary_text.delta" and pending_summary_paragraph :
199- yield json .dumps ({"message" : {"role" : "assistant" , "content" : "\n " }}) + "\n "
202+ yield (
203+ json .dumps (
204+ {
205+ "model" : model_out ,
206+ "created_at" : created_at ,
207+ "message" : {"role" : "assistant" , "content" : "\n " },
208+ "done" : False ,
209+ }
210+ )
211+ + "\n "
212+ )
213+ full_parts .append ("\n " )
200214 pending_summary_paragraph = False
215+ if delta_txt :
216+ yield (
217+ json .dumps (
218+ {
219+ "model" : model_out ,
220+ "created_at" : created_at ,
221+ "message" : {"role" : "assistant" , "content" : delta_txt },
222+ "done" : False ,
223+ }
224+ )
225+ + "\n "
226+ )
227+ full_parts .append (delta_txt )
201228 elif compat == "think-tags" :
202229 if not think_open and not think_closed :
203- yield json .dumps ({"message" : {"role" : "assistant" , "content" : "<think>" }}) + "\n "
230+ yield (
231+ json .dumps (
232+ {
233+ "model" : model_out ,
234+ "created_at" : created_at ,
235+ "message" : {"role" : "assistant" , "content" : "<think>" },
236+ "done" : False ,
237+ }
238+ )
239+ + "\n "
240+ )
241+ full_parts .append ("<think>" )
204242 think_open = True
205243 if think_open and not think_closed :
206244 if kind == "response.reasoning_summary_text.delta" and pending_summary_paragraph :
207- yield json .dumps ({"message" : {"role" : "assistant" , "content" : "\n " }}) + "\n "
245+ yield (
246+ json .dumps (
247+ {
248+ "model" : model_out ,
249+ "created_at" : created_at ,
250+ "message" : {"role" : "assistant" , "content" : "\n " },
251+ "done" : False ,
252+ }
253+ )
254+ + "\n "
255+ )
256+ full_parts .append ("\n " )
208257 pending_summary_paragraph = False
258+ if delta_txt :
259+ yield (
260+ json .dumps (
261+ {
262+ "model" : model_out ,
263+ "created_at" : created_at ,
264+ "message" : {"role" : "assistant" , "content" : delta_txt },
265+ "done" : False ,
266+ }
267+ )
268+ + "\n "
269+ )
270+ full_parts .append (delta_txt )
209271 else :
210272 pass
211273 elif kind == "response.output_text.delta" :
212274 delta = evt .get ("delta" ) or ""
213275 if compat == "think-tags" and think_open and not think_closed :
214- yield json .dumps ({"message" : {"role" : "assistant" , "content" : "</think>" }}) + "\n "
276+ yield (
277+ json .dumps (
278+ {
279+ "model" : model_out ,
280+ "created_at" : created_at ,
281+ "message" : {"role" : "assistant" , "content" : "</think>" },
282+ "done" : False ,
283+ }
284+ )
285+ + "\n "
286+ )
287+ full_parts .append ("</think>" )
215288 think_open = False
216289 think_closed = True
217- yield json .dumps ({"message" : {"role" : "assistant" , "content" : delta }}) + "\n "
290+ if delta :
291+ yield (
292+ json .dumps (
293+ {
294+ "model" : model_out ,
295+ "created_at" : created_at ,
296+ "message" : {"role" : "assistant" , "content" : delta },
297+ "done" : False ,
298+ }
299+ )
300+ + "\n "
301+ )
302+ full_parts .append (delta )
218303 elif kind == "response.completed" :
219304 break
220305 finally :
221306 upstream .close ()
307+ if compat == "think-tags" and think_open and not think_closed :
308+ yield (
309+ json .dumps (
310+ {
311+ "model" : model_out ,
312+ "created_at" : created_at ,
313+ "message" : {"role" : "assistant" , "content" : "</think>" },
314+ "done" : False ,
315+ }
316+ )
317+ + "\n "
318+ )
319+ full_parts .append ("</think>" )
320+ done_obj = {
321+ "model" : model_out ,
322+ "created_at" : created_at ,
323+ "message" : {"role" : "assistant" , "content" : "" .join (full_parts )},
324+ "done" : True ,
325+ }
326+ done_obj .update (_OLLAMA_FAKE_EVAL )
327+ yield json .dumps (done_obj ) + "\n "
222328 resp = current_app .response_class (
223- _gen (),
329+ stream_with_context ( _gen () ),
224330 status = 200 ,
225331 mimetype = "application/x-ndjson" ,
226332 )
@@ -296,4 +402,3 @@ def _gen():
296402 for k , v in build_cors_headers ().items ():
297403 resp .headers .setdefault (k , v )
298404 return resp
299-
0 commit comments