feat(ollama_chat/transformation.py): handle thinking content on streaming for ollama chat models

krrishdholakia · krrishdholakia · commit e6429f6565c3 · 2025-08-31T20:55:01.000-07:00
Output parse correctly to 'reasoning_content'
diff --git a/litellm/llms/ollama/chat/transformation.py b/litellm/llms/ollama/chat/transformation.py
@@ -437,6 +437,9 @@ def get_model_response_iterator(
 
 
 class OllamaChatCompletionResponseIterator(BaseModelResponseIterator):
+    started_reasoning_content: bool = False
+    finished_reasoning_content: bool = False
+
     def _is_function_call_complete(self, function_args: Union[str, dict]) -> bool:
         if isinstance(function_args, dict):
             return True
@@ -490,8 +493,49 @@ def chunk_parser(self, chunk: dict) -> ModelResponseStream:
                         if is_function_call_complete:
                             tool_call["id"] = str(uuid.uuid4())
 
+            # PROCESS REASONING CONTENT
+            reasoning_content: Optional[str] = None
+            content: Optional[str] = None
+            if chunk["message"].get("thinking") is not None:
+                if self.started_reasoning_content is False:
+                    reasoning_content = chunk["message"].get("thinking")
+                    self.started_reasoning_content = True
+                elif self.finished_reasoning_content is False:
+                    reasoning_content = chunk["message"].get("thinking")
+                    self.finished_reasoning_content = True
+            elif chunk["message"].get("content") is not None:
+                if "<think>" in chunk["message"].get("content"):
+                    reasoning_content = (
+                        chunk["message"].get("content").replace("<think>", "")
+                    )
+
+                    self.started_reasoning_content = True
+
+                if (
+                    "</think>" in chunk["message"].get("content")
+                    and self.started_reasoning_content
+                ):
+                    reasoning_content = chunk["message"].get("content")
+                    remaining_content = (
+                        chunk["message"].get("content").split("</think>")
+                    )
+                    if len(remaining_content) > 1:
+                        content = remaining_content[1]
+                    self.finished_reasoning_content = True
+
+                if (
+                    self.started_reasoning_content is True
+                    and self.finished_reasoning_content is False
+                ):
+                    reasoning_content = (
+                        chunk["message"].get("content").replace("<think>", "")
+                    )
+                else:
+                    content = chunk["message"].get("content")
+
             delta = Delta(
-                content=chunk["message"].get("content", ""),
+                content=content,
+                reasoning_content=reasoning_content,
                 tool_calls=tool_calls,
             )
 
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
@@ -21,15 +21,3 @@ model_list:
 
 router_settings:
   model_group_alias: {"my-fake-gpt-4": "fake-openai-endpoint"}
-
-litellm_settings:
-  callbacks: ["otel"]
-  cache: true
-  cache_params:
-    type: redis
-    ttl: 600
-    supported_call_types: ["acompletion", "completion"]
-
-  model_group_settings:
-    forward_client_headers_to_llm_api:
-      - fake-openai-endpoint