BerriAI
diff --git a/‎litellm/llms/ollama/chat/transformation.py‎
Lines changed: 64 additions & 6 deletions b/‎litellm/llms/ollama/chat/transformation.py‎
Lines changed: 64 additions & 6 deletions
diff --git a/‎litellm/llms/ollama/completion/transformation.py‎
Lines changed: 83 additions & 19 deletions b/‎litellm/llms/ollama/completion/transformation.py‎
Lines changed: 83 additions & 19 deletions
diff --git a/‎litellm/proxy/_experimental/out/model_hub_table.html‎ renamed to ‎litellm/proxy/_experimental/out/model_hub_table/index.html‎ b/‎litellm/proxy/_experimental/out/model_hub_table.html‎ renamed to ‎litellm/proxy/_experimental/out/model_hub_table/index.html‎
diff --git a/‎litellm/proxy/_experimental/out/onboarding.html‎
Lines changed: 0 additions & 1 deletion b/‎litellm/proxy/_experimental/out/onboarding.html‎
Lines changed: 0 additions & 1 deletion
@@ -137,6 +137,7 @@ def get_supported_openai_params(self, model: str):
             "tool_choice",
             "functions",
             "response_format",
+            "reasoning_effort",
         ]
 
     def map_openai_params(
@@ -175,6 +176,8 @@ def map_openai_params(
                 if value.get("json_schema") and value["json_schema"].get("schema"):
                     optional_params["format"] = value["json_schema"]["schema"]
             ### FUNCTION CALLING LOGIC ###
+            if param == "reasoning_effort" and value is not None:
+                optional_params["think"] = True
             if param == "tools":
                 ## CHECK IF MODEL SUPPORTS TOOL CALLING ##
                 try:
@@ -212,9 +215,9 @@ def map_openai_params(
                     litellm.add_function_to_prompt = (
                         True  # so that main.py adds the function call to the prompt
                     )
-                    optional_params[
-                        "functions_unsupported_model"
-                    ] = non_default_params.get("functions")
+                    optional_params["functions_unsupported_model"] = (
+                        non_default_params.get("functions")
+                    )
         non_default_params.pop("tool_choice", None)  # causes ollama requests to hang
         non_default_params.pop("functions", None)  # causes ollama requests to hang
         return optional_params
@@ -346,11 +349,31 @@ def transform_response(
 
         ## RESPONSE OBJECT
         model_response.choices[0].finish_reason = "stop"
+        response_json_message = response_json.get("message")
+        if response_json_message is not None:
+            if "thinking" in response_json_message:
+                # remap 'thinking' to 'reasoning_content'
+                response_json_message["reasoning_content"] = response_json_message[
+                    "thinking"
+                ]
+                del response_json_message["thinking"]
+            elif response_json_message.get("content") is not None:
+                # parse reasoning content from content
+                from litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response import (
+                    _parse_content_for_reasoning,
+                )
+
+                reasoning_content, content = _parse_content_for_reasoning(
+                    response_json_message["content"]
+                )
+                response_json_message["reasoning_content"] = reasoning_content
+                response_json_message["content"] = content
+
         if (
             request_data.get("format", "") == "json"
             and litellm_params.get("function_name") is not None
         ):
-            function_call = json.loads(response_json["message"]["content"])
+            function_call = json.loads(response_json_message["content"])
             message = litellm.Message(
                 content=None,
                 tool_calls=[
@@ -367,11 +390,13 @@ def transform_response(
                         "type": "function",
                     }
                 ],
+                reasoning_content=response_json_message.get("reasoning_content"),
             )
             model_response.choices[0].message = message  # type: ignore
             model_response.choices[0].finish_reason = "tool_calls"
         else:
-            _message = litellm.Message(**response_json["message"])
+
+            _message = litellm.Message(**response_json_message)
             model_response.choices[0].message = _message  # type: ignore
         model_response.created = int(time.time())
         model_response.model = "ollama_chat/" + model
@@ -412,6 +437,9 @@ def get_model_response_iterator(
 
 
 class OllamaChatCompletionResponseIterator(BaseModelResponseIterator):
+    started_reasoning_content: bool = False
+    finished_reasoning_content: bool = False
+
     def _is_function_call_complete(self, function_args: Union[str, dict]) -> bool:
         if isinstance(function_args, dict):
             return True
@@ -465,8 +493,38 @@ def chunk_parser(self, chunk: dict) -> ModelResponseStream:
                         if is_function_call_complete:
                             tool_call["id"] = str(uuid.uuid4())
 
+            # PROCESS REASONING CONTENT
+            reasoning_content: Optional[str] = None
+            content: Optional[str] = None
+            if chunk["message"].get("thinking") is not None:
+                if self.started_reasoning_content is False:
+                    reasoning_content = chunk["message"].get("thinking")
+                    self.started_reasoning_content = True
+                elif self.finished_reasoning_content is False:
+                    reasoning_content = chunk["message"].get("thinking")
+                    self.finished_reasoning_content = True
+            elif chunk["message"].get("content") is not None:
+                message_content = chunk["message"].get("content")
+                if "<think>" in message_content:
+                    message_content = message_content.replace("<think>", "")
+
+                    self.started_reasoning_content = True
+
+                if "</think>" in message_content and self.started_reasoning_content:
+                    message_content = message_content.replace("</think>", "")
+                    self.finished_reasoning_content = True
+
+                if (
+                    self.started_reasoning_content
+                    and not self.finished_reasoning_content
+                ):
+                    reasoning_content = message_content
+                else:
+                    content = message_content
+
             delta = Delta(
-                content=chunk["message"].get("content", ""),
+                content=content,
+                reasoning_content=reasoning_content,
                 tool_calls=tool_calls,
             )
 
 
@@ -19,13 +19,13 @@
 from litellm.secret_managers.main import get_secret_str
 from litellm.types.llms.openai import AllMessageValues, ChatCompletionUsageBlock
 from litellm.types.utils import (
+    Delta,
     GenericStreamingChunk,
     ModelInfoBase,
     ModelResponse,
     ModelResponseStream,
     ProviderField,
     StreamingChoices,
-    Delta,
 )
 
 from ..common_utils import OllamaError, _convert_image
@@ -92,9 +92,9 @@ class OllamaConfig(BaseConfig):
     repeat_penalty: Optional[float] = None
     temperature: Optional[float] = None
     seed: Optional[int] = None
-    stop: Optional[
-        list
-    ] = None  # stop is a list based on this - https://github.com/ollama/ollama/pull/442
+    stop: Optional[list] = (
+        None  # stop is a list based on this - https://github.com/ollama/ollama/pull/442
+    )
     tfs_z: Optional[float] = None
     num_predict: Optional[int] = None
     top_k: Optional[int] = None
@@ -154,6 +154,7 @@ def get_supported_openai_params(self, model: str):
             "stop",
             "response_format",
             "max_completion_tokens",
+            "reasoning_effort",
         ]
 
     def map_openai_params(
@@ -166,19 +167,21 @@ def map_openai_params(
         for param, value in non_default_params.items():
             if param == "max_tokens" or param == "max_completion_tokens":
                 optional_params["num_predict"] = value
-            if param == "stream":
+            elif param == "stream":
                 optional_params["stream"] = value
-            if param == "temperature":
+            elif param == "temperature":
                 optional_params["temperature"] = value
-            if param == "seed":
+            elif param == "seed":
                 optional_params["seed"] = value
-            if param == "top_p":
+            elif param == "top_p":
                 optional_params["top_p"] = value
-            if param == "frequency_penalty":
+            elif param == "frequency_penalty":
                 optional_params["frequency_penalty"] = value
-            if param == "stop":
+            elif param == "stop":
                 optional_params["stop"] = value
-            if param == "response_format" and isinstance(value, dict):
+            elif param == "reasoning_effort" and value is not None:
+                optional_params["think"] = True
+            elif param == "response_format" and isinstance(value, dict):
                 if value["type"] == "json_object":
                     optional_params["format"] = "json"
                 elif value["type"] == "json_schema":
@@ -258,12 +261,17 @@ def transform_response(
         api_key: Optional[str] = None,
         json_mode: Optional[bool] = None,
     ) -> ModelResponse:
+        from litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response import (
+            _parse_content_for_reasoning,
+        )
+
         response_json = raw_response.json()
         ## RESPONSE OBJECT
         model_response.choices[0].finish_reason = "stop"
         if request_data.get("format", "") == "json":
             # Check if response field exists and is not empty before parsing JSON
             response_text = response_json.get("response", "")
+
             if not response_text or not response_text.strip():
                 # Handle empty response gracefully - set empty content
                 message = litellm.Message(content="")
@@ -288,7 +296,9 @@ def transform_response(
                                     "id": f"call_{str(uuid.uuid4())}",
                                     "function": {
                                         "name": function_call["name"],
-                                        "arguments": json.dumps(function_call["arguments"]),
+                                        "arguments": json.dumps(
+                                            function_call["arguments"]
+                                        ),
                                     },
                                     "type": "function",
                                 }
@@ -305,11 +315,26 @@ def transform_response(
                         model_response.choices[0].finish_reason = "stop"
                 except json.JSONDecodeError:
                     # If JSON parsing fails, treat as regular text response
-                    message = litellm.Message(content=response_text)
+                    ## output parse reasoning content from response_text
+                    reasoning_content: Optional[str] = None
+                    content: Optional[str] = None
+                    if response_text is not None:
+                        reasoning_content, content = _parse_content_for_reasoning(
+                            response_text
+                        )
+                    message = litellm.Message(
+                        content=content, reasoning_content=reasoning_content
+                    )
                     model_response.choices[0].message = message  # type: ignore
                     model_response.choices[0].finish_reason = "stop"
         else:
-            model_response.choices[0].message.content = response_json["response"]  # type: ignore
+            response_text = response_json.get("response", "")
+            content = None
+            reasoning_content = None
+            if response_text is not None:
+                reasoning_content, content = _parse_content_for_reasoning(response_text)
+            model_response.choices[0].message.content = content  # type: ignore
+            model_response.choices[0].message.reasoning_content = reasoning_content  # type: ignore
         model_response.created = int(time.time())
         model_response.model = "ollama/" + model
         _prompt = request_data.get("prompt", "")
@@ -434,12 +459,21 @@ def get_model_response_iterator(
 
 
 class OllamaTextCompletionResponseIterator(BaseModelResponseIterator):
+    def __init__(
+        self, streaming_response, sync_stream: bool, json_mode: Optional[bool] = False
+    ):
+        super().__init__(streaming_response, sync_stream, json_mode)
+        self.started_reasoning_content: bool = False
+        self.finished_reasoning_content: bool = False
+
     def _handle_string_chunk(
         self, str_line: str
     ) -> Union[GenericStreamingChunk, ModelResponseStream]:
         return self.chunk_parser(json.loads(str_line))
 
-    def chunk_parser(self, chunk: dict) -> Union[GenericStreamingChunk, ModelResponseStream]:
+    def chunk_parser(
+        self, chunk: dict
+    ) -> Union[GenericStreamingChunk, ModelResponseStream]:
         try:
             if "error" in chunk:
                 raise Exception(f"Ollama Error - {chunk}")
@@ -469,12 +503,42 @@ def chunk_parser(self, chunk: dict) -> Union[GenericStreamingChunk, ModelRespons
                 )
             elif chunk["response"]:
                 text = chunk["response"]
-                return GenericStreamingChunk(
-                    text=text,
-                    is_finished=is_finished,
-                    finish_reason="stop",
+                reasoning_content: Optional[str] = None
+                content: Optional[str] = None
+                if text is not None:
+                    if "<think>" in text:
+                        text = text.replace("<think>", "")
+                        self.started_reasoning_content = True
+                    elif "</think>" in text:
+                        text = text.replace("</think>", "")
+                        self.finished_reasoning_content = True
+
+                    if (
+                        self.started_reasoning_content
+                        and not self.finished_reasoning_content
+                    ):
+                        reasoning_content = text
+                    else:
+                        content = text
+
+                return ModelResponseStream(
+                    choices=[
+                        StreamingChoices(
+                            index=0,
+                            delta=Delta(
+                                reasoning_content=reasoning_content, content=content
+                            ),
+                        )
+                    ],
+                    finish_reason=finish_reason,
                     usage=None,
                 )
+                # return GenericStreamingChunk(
+                #     text=text,
+                #     is_finished=is_finished,
+                #     finish_reason="stop",
+                #     usage=None,
+                # )
             elif "thinking" in chunk and not chunk["response"]:
                 # Return reasoning content as ModelResponseStream so UIs can render it
                 thinking_content = chunk.get("thinking") or ""