Skip to content

Commit 2ed6dbe

Browse files
committed
fix ollama regression
1 parent 554ec53 commit 2ed6dbe

File tree

1 file changed

+114
-9
lines changed

1 file changed

+114
-9
lines changed

chatmock/routes_ollama.py

Lines changed: 114 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
from __future__ import annotations
22

33
import json
4+
import datetime
45
import time
56
from typing import Any, Dict, List
67

7-
from flask import Blueprint, Response, current_app, jsonify, make_response, request
8+
from flask import Blueprint, Response, current_app, jsonify, make_response, request, stream_with_context
89

910
from .config import BASE_INSTRUCTIONS
1011
from .http import build_cors_headers
@@ -160,7 +161,8 @@ def ollama_chat() -> Response:
160161
upstream.status_code,
161162
)
162163

163-
created_at = str(int(time.time() * 1000))
164+
created_at = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
165+
model_out = normalize_model_name(model)
164166

165167
if stream_req:
166168
def _gen():
@@ -169,6 +171,7 @@ def _gen():
169171
think_closed = False
170172
saw_any_summary = False
171173
pending_summary_paragraph = False
174+
full_parts: List[str] = []
172175
try:
173176
for raw_line in upstream.iter_lines(decode_unicode=False):
174177
if not raw_line:
@@ -196,31 +199,134 @@ def _gen():
196199
delta_txt = evt.get("delta") or ""
197200
if compat == "o3":
198201
if kind == "response.reasoning_summary_text.delta" and pending_summary_paragraph:
199-
yield json.dumps({"message": {"role": "assistant", "content": "\n"}}) + "\n"
202+
yield (
203+
json.dumps(
204+
{
205+
"model": model_out,
206+
"created_at": created_at,
207+
"message": {"role": "assistant", "content": "\n"},
208+
"done": False,
209+
}
210+
)
211+
+ "\n"
212+
)
213+
full_parts.append("\n")
200214
pending_summary_paragraph = False
215+
if delta_txt:
216+
yield (
217+
json.dumps(
218+
{
219+
"model": model_out,
220+
"created_at": created_at,
221+
"message": {"role": "assistant", "content": delta_txt},
222+
"done": False,
223+
}
224+
)
225+
+ "\n"
226+
)
227+
full_parts.append(delta_txt)
201228
elif compat == "think-tags":
202229
if not think_open and not think_closed:
203-
yield json.dumps({"message": {"role": "assistant", "content": "<think>"}}) + "\n"
230+
yield (
231+
json.dumps(
232+
{
233+
"model": model_out,
234+
"created_at": created_at,
235+
"message": {"role": "assistant", "content": "<think>"},
236+
"done": False,
237+
}
238+
)
239+
+ "\n"
240+
)
241+
full_parts.append("<think>")
204242
think_open = True
205243
if think_open and not think_closed:
206244
if kind == "response.reasoning_summary_text.delta" and pending_summary_paragraph:
207-
yield json.dumps({"message": {"role": "assistant", "content": "\n"}}) + "\n"
245+
yield (
246+
json.dumps(
247+
{
248+
"model": model_out,
249+
"created_at": created_at,
250+
"message": {"role": "assistant", "content": "\n"},
251+
"done": False,
252+
}
253+
)
254+
+ "\n"
255+
)
256+
full_parts.append("\n")
208257
pending_summary_paragraph = False
258+
if delta_txt:
259+
yield (
260+
json.dumps(
261+
{
262+
"model": model_out,
263+
"created_at": created_at,
264+
"message": {"role": "assistant", "content": delta_txt},
265+
"done": False,
266+
}
267+
)
268+
+ "\n"
269+
)
270+
full_parts.append(delta_txt)
209271
else:
210272
pass
211273
elif kind == "response.output_text.delta":
212274
delta = evt.get("delta") or ""
213275
if compat == "think-tags" and think_open and not think_closed:
214-
yield json.dumps({"message": {"role": "assistant", "content": "</think>"}}) + "\n"
276+
yield (
277+
json.dumps(
278+
{
279+
"model": model_out,
280+
"created_at": created_at,
281+
"message": {"role": "assistant", "content": "</think>"},
282+
"done": False,
283+
}
284+
)
285+
+ "\n"
286+
)
287+
full_parts.append("</think>")
215288
think_open = False
216289
think_closed = True
217-
yield json.dumps({"message": {"role": "assistant", "content": delta}}) + "\n"
290+
if delta:
291+
yield (
292+
json.dumps(
293+
{
294+
"model": model_out,
295+
"created_at": created_at,
296+
"message": {"role": "assistant", "content": delta},
297+
"done": False,
298+
}
299+
)
300+
+ "\n"
301+
)
302+
full_parts.append(delta)
218303
elif kind == "response.completed":
219304
break
220305
finally:
221306
upstream.close()
307+
if compat == "think-tags" and think_open and not think_closed:
308+
yield (
309+
json.dumps(
310+
{
311+
"model": model_out,
312+
"created_at": created_at,
313+
"message": {"role": "assistant", "content": "</think>"},
314+
"done": False,
315+
}
316+
)
317+
+ "\n"
318+
)
319+
full_parts.append("</think>")
320+
done_obj = {
321+
"model": model_out,
322+
"created_at": created_at,
323+
"message": {"role": "assistant", "content": "".join(full_parts)},
324+
"done": True,
325+
}
326+
done_obj.update(_OLLAMA_FAKE_EVAL)
327+
yield json.dumps(done_obj) + "\n"
222328
resp = current_app.response_class(
223-
_gen(),
329+
stream_with_context(_gen()),
224330
status=200,
225331
mimetype="application/x-ndjson",
226332
)
@@ -296,4 +402,3 @@ def _gen():
296402
for k, v in build_cors_headers().items():
297403
resp.headers.setdefault(k, v)
298404
return resp
299-

0 commit comments

Comments
 (0)