fix ollama regression

RayBytes · RayBytes · commit 2ed6dbe2d4eb · 2025-08-19T21:26:47.000+05:00
diff --git a/chatmock/routes_ollama.py b/chatmock/routes_ollama.py
@@ -1,10 +1,11 @@
 from __future__ import annotations
 
 import json
+import datetime
 import time
 from typing import Any, Dict, List
 
-from flask import Blueprint, Response, current_app, jsonify, make_response, request
+from flask import Blueprint, Response, current_app, jsonify, make_response, request, stream_with_context
 
 from .config import BASE_INSTRUCTIONS
 from .http import build_cors_headers
@@ -160,7 +161,8 @@ def ollama_chat() -> Response:
             upstream.status_code,
         )
 
-    created_at = str(int(time.time() * 1000))
+    created_at = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
+    model_out = normalize_model_name(model)
 
     if stream_req:
         def _gen():
@@ -169,6 +171,7 @@ def _gen():
             think_closed = False
             saw_any_summary = False
             pending_summary_paragraph = False
+            full_parts: List[str] = []
             try:
                 for raw_line in upstream.iter_lines(decode_unicode=False):
                     if not raw_line:
@@ -196,31 +199,134 @@ def _gen():
                         delta_txt = evt.get("delta") or ""
                         if compat == "o3":
                             if kind == "response.reasoning_summary_text.delta" and pending_summary_paragraph:
-                                yield json.dumps({"message": {"role": "assistant", "content": "\n"}}) + "\n"
+                                yield (
+                                    json.dumps(
+                                        {
+                                            "model": model_out,
+                                            "created_at": created_at,
+                                            "message": {"role": "assistant", "content": "\n"},
+                                            "done": False,
+                                        }
+                                    )
+                                    + "\n"
+                                )
+                                full_parts.append("\n")
                                 pending_summary_paragraph = False
+                            if delta_txt:
+                                yield (
+                                    json.dumps(
+                                        {
+                                            "model": model_out,
+                                            "created_at": created_at,
+                                            "message": {"role": "assistant", "content": delta_txt},
+                                            "done": False,
+                                        }
+                                    )
+                                    + "\n"
+                                )
+                                full_parts.append(delta_txt)
                         elif compat == "think-tags":
                             if not think_open and not think_closed:
-                                yield json.dumps({"message": {"role": "assistant", "content": "<think>"}}) + "\n"
+                                yield (
+                                    json.dumps(
+                                        {
+                                            "model": model_out,
+                                            "created_at": created_at,
+                                            "message": {"role": "assistant", "content": "<think>"},
+                                            "done": False,
+                                        }
+                                    )
+                                    + "\n"
+                                )
+                                full_parts.append("<think>")
                                 think_open = True
                             if think_open and not think_closed:
                                 if kind == "response.reasoning_summary_text.delta" and pending_summary_paragraph:
-                                    yield json.dumps({"message": {"role": "assistant", "content": "\n"}}) + "\n"
+                                    yield (
+                                        json.dumps(
+                                            {
+                                                "model": model_out,
+                                                "created_at": created_at,
+                                                "message": {"role": "assistant", "content": "\n"},
+                                                "done": False,
+                                            }
+                                        )
+                                        + "\n"
+                                    )
+                                    full_parts.append("\n")
                                     pending_summary_paragraph = False
+                                if delta_txt:
+                                    yield (
+                                        json.dumps(
+                                            {
+                                                "model": model_out,
+                                                "created_at": created_at,
+                                                "message": {"role": "assistant", "content": delta_txt},
+                                                "done": False,
+                                            }
+                                        )
+                                        + "\n"
+                                    )
+                                    full_parts.append(delta_txt)
                         else:
                             pass
                     elif kind == "response.output_text.delta":
                         delta = evt.get("delta") or ""
                         if compat == "think-tags" and think_open and not think_closed:
-                            yield json.dumps({"message": {"role": "assistant", "content": "</think>"}}) + "\n"
+                            yield (
+                                json.dumps(
+                                    {
+                                        "model": model_out,
+                                        "created_at": created_at,
+                                        "message": {"role": "assistant", "content": "</think>"},
+                                        "done": False,
+                                    }
+                                )
+                                + "\n"
+                            )
+                            full_parts.append("</think>")
                             think_open = False
                             think_closed = True
-                        yield json.dumps({"message": {"role": "assistant", "content": delta}}) + "\n"
+                        if delta:
+                            yield (
+                                json.dumps(
+                                    {
+                                        "model": model_out,
+                                        "created_at": created_at,
+                                        "message": {"role": "assistant", "content": delta},
+                                        "done": False,
+                                    }
+                                )
+                                + "\n"
+                            )
+                            full_parts.append(delta)
                     elif kind == "response.completed":
                         break
             finally:
                 upstream.close()
+                if compat == "think-tags" and think_open and not think_closed:
+                    yield (
+                        json.dumps(
+                            {
+                                "model": model_out,
+                                "created_at": created_at,
+                                "message": {"role": "assistant", "content": "</think>"},
+                                "done": False,
+                            }
+                        )
+                        + "\n"
+                    )
+                    full_parts.append("</think>")
+                done_obj = {
+                    "model": model_out,
+                    "created_at": created_at,
+                    "message": {"role": "assistant", "content": "".join(full_parts)},
+                    "done": True,
+                }
+                done_obj.update(_OLLAMA_FAKE_EVAL)
+                yield json.dumps(done_obj) + "\n"
         resp = current_app.response_class(
-            _gen(),
+            stream_with_context(_gen()),
             status=200,
             mimetype="application/x-ndjson",
         )
@@ -296,4 +402,3 @@ def _gen():
     for k, v in build_cors_headers().items():
         resp.headers.setdefault(k, v)
     return resp
-