diff --git a/litellm/llms/ollama/chat/transformation.py b/litellm/llms/ollama/chat/transformation.py
index 3b755e79330c..f8d2643f5d5b 100644
--- a/litellm/llms/ollama/chat/transformation.py
+++ b/litellm/llms/ollama/chat/transformation.py
@@ -476,7 +476,7 @@ def _is_function_call_complete(self, function_args: Union[str, dict]) -> bool:
except Exception:
return False
- def chunk_parser(self, chunk: dict) -> ModelResponseStream:
+ def chunk_parser(self, chunk: dict) -> ModelResponseStream: # noqa: PLR0915
try:
"""
Expected chunk format:
@@ -521,8 +521,8 @@ def chunk_parser(self, chunk: dict) -> ModelResponseStream:
tool_call["id"] = str(uuid.uuid4())
# PROCESS REASONING CONTENT
- reasoning_content: Optional[str] = None
- content: Optional[str] = None
+ reasoning_content: str = ""
+ content: str = ""
if chunk["message"].get("thinking") is not None:
if self.started_reasoning_content is False:
reasoning_content = chunk["message"].get("thinking")
@@ -532,21 +532,39 @@ def chunk_parser(self, chunk: dict) -> ModelResponseStream:
self.finished_reasoning_content = True
elif chunk["message"].get("content") is not None:
message_content = chunk["message"].get("content")
- if "" in message_content:
- message_content = message_content.replace("", "")
+ # Check if both tags are present (complete reasoning block in single chunk)
+ has_start_tag = "" in message_content
+ has_end_tag = "" in message_content
+
+ if has_start_tag and has_end_tag:
+ # Complete reasoning block in single chunk
+ import re
+ # Extract reasoning content
+ match = re.search(r'(.*?)', message_content)
+ if match:
+ reasoning_content = match.group(1)
+ # Remove think tags and normalize whitespace
+ content = re.sub(r'\s*.*?\s*', ' ', message_content).strip()
self.started_reasoning_content = True
-
- if "" in message_content and self.started_reasoning_content:
- message_content = message_content.replace("", "")
self.finished_reasoning_content = True
-
- if (
- self.started_reasoning_content
- and not self.finished_reasoning_content
- ):
+ elif has_start_tag:
+ # Start of reasoning - split content before and after
+ parts = message_content.split("", 1)
+ content = parts[0] if len(parts) > 0 else ""
+ reasoning_content = parts[1] if len(parts) > 1 else ""
+ self.started_reasoning_content = True
+ elif has_end_tag and self.started_reasoning_content:
+ # End of reasoning - split content before and after
+ parts = message_content.split("", 1)
+ reasoning_content = parts[0] if len(parts) > 0 else ""
+ content = parts[1] if len(parts) > 1 else ""
+ self.finished_reasoning_content = True
+ elif self.started_reasoning_content and not self.finished_reasoning_content:
+ # Middle of reasoning
reasoning_content = message_content
else:
+ # Regular content
content = message_content
delta = Delta(
diff --git a/tests/test_litellm/llms/ollama/test_ollama_chunk_parser.py b/tests/test_litellm/llms/ollama/test_ollama_chunk_parser.py
new file mode 100644
index 000000000000..72d589473e3e
--- /dev/null
+++ b/tests/test_litellm/llms/ollama/test_ollama_chunk_parser.py
@@ -0,0 +1,144 @@
+import pytest
+from unittest.mock import patch
+from litellm.llms.ollama.chat.transformation import OllamaChatCompletionResponseIterator
+from litellm.types.utils import Delta, StreamingChoices
+
+# Mock the Delta and other necessary classes if they are not directly importable or need special setup
+class MockModelResponseStream:
+ def __init__(self, choices, model, object_type, system_fingerprint, usage=None, **kwargs):
+ self.choices = choices
+ self.model = model
+ self.object = object_type
+ self.system_fingerprint = system_fingerprint
+ self.usage = usage
+ for key, value in kwargs.items():
+ setattr(self, key, value)
+
+@pytest.fixture
+def mock_iterator():
+ """Fixture to create a mock OllamaChatCompletionResponseIterator."""
+ iterator = OllamaChatCompletionResponseIterator(
+ streaming_response=iter([]),
+ sync_stream=True,
+ json_mode=False
+ )
+ return iterator
+
+def test_full_think_block_in_one_chunk(mock_iterator):
+ """Test case where a complete ... block is in a single chunk."""
+ chunk = {"message": {"content": "This is a thought."}, "done": False, "model": "test-model"}
+ with patch("litellm.llms.ollama.chat.transformation.uuid.uuid4", return_value="1234"):
+ result = mock_iterator.chunk_parser(chunk)
+ assert result.choices[0].delta.content == ""
+ assert result.choices[0].delta.reasoning_content == "This is a thought."
+ assert mock_iterator.started_reasoning_content
+ assert mock_iterator.finished_reasoning_content
+
+def test_think_tags_split_across_chunks(mock_iterator):
+ """Test case where and tags are in separate chunks."""
+ chunk1 = {"message": {"content": "This is a thought."}, "done": False, "model": "test-model"}
+ chunk2 = {"message": {"content": " And it continues."}, "done": True, "model": "test-model"}
+
+ with patch("litellm.llms.ollama.chat.transformation.uuid.uuid4", return_value="1234"):
+ result1 = mock_iterator.chunk_parser(chunk1)
+ assert result1.choices[0].delta.reasoning_content == "This is a thought."
+ assert mock_iterator.started_reasoning_content
+ assert not mock_iterator.finished_reasoning_content
+
+ result2 = mock_iterator.chunk_parser(chunk2)
+ assert result2.choices[0].delta.reasoning_content == " And it continues."
+ assert mock_iterator.started_reasoning_content
+ assert mock_iterator.finished_reasoning_content
+
+def test_content_before_and_after_think_tag(mock_iterator):
+ """Test case where there is content before and after the ... block"""
+ chunk = {"message": {"content": "Here is a preamble. This is a thought. Here is a postamble."}, "done": True, "model": "test-model"}
+
+ with patch("litellm.llms.ollama.chat.transformation.uuid.uuid4", return_value="1234"):
+ result = mock_iterator.chunk_parser(chunk)
+
+ assert result.choices[0].delta.content == "Here is a preamble. Here is a postamble."
+ assert result.choices[0].delta.reasoning_content == "This is a thought."
+ assert mock_iterator.started_reasoning_content
+ assert mock_iterator.finished_reasoning_content
+
+@patch('litellm.llms.ollama.chat.transformation.OllamaChatCompletionResponseIterator.construct_empty_chunk', create=True)
+def test_whitespace_chunks(mock_construct_empty_chunk, mock_iterator):
+ """Test case where chunks contain only whitespace."""
+ mock_construct_empty_chunk.return_value = MockModelResponseStream(
+ choices=[StreamingChoices(index=0, delta=Delta(content="", reasoning_content=None, role="assistant", tool_calls=None), finish_reason=None)],
+ model="test-model",
+ object_type="chat.completion.chunk",
+ system_fingerprint=None
+ )
+ chunk1 = {"message": {"content": " "}, "done": False, "model": "test-model"}
+ chunk2 = {"message": {"content": "\n\n"}, "done": True, "model": "test-model"}
+
+ result1 = mock_iterator.chunk_parser(chunk1)
+ assert result1.choices[0].delta.content == " "
+ assert result1.choices[0].delta.reasoning_content == ""
+
+ result2 = mock_iterator.chunk_parser(chunk2)
+ assert result2.choices[0].delta.content == "\n\n"
+ assert result2.choices[0].delta.reasoning_content == ""
+
+def test_content_before_think_tag(mock_iterator):
+ """Test case where there is regular content before the tag in the same chunk."""
+ chunk = {"message": {"content": "Regular content starting thought"}, "done": False, "model": "test-model"}
+
+ with patch("litellm.llms.ollama.chat.transformation.uuid.uuid4", return_value="1234"):
+ result = mock_iterator.chunk_parser(chunk)
+
+ assert result.choices[0].delta.content == "Regular content "
+ assert result.choices[0].delta.reasoning_content == "starting thought"
+ assert mock_iterator.started_reasoning_content
+ assert not mock_iterator.finished_reasoning_content
+
+def test_content_after_think_end_tag(mock_iterator):
+ """Test case where there is regular content after the tag in the same chunk."""
+ # First start the reasoning
+ chunk1 = {"message": {"content": "This is a thought"}, "done": False, "model": "test-model"}
+ with patch("litellm.llms.ollama.chat.transformation.uuid.uuid4", return_value="1234"):
+ mock_iterator.chunk_parser(chunk1)
+
+ # Then end it with content after
+ chunk2 = {"message": {"content": " continued. More regular content"}, "done": True, "model": "test-model"}
+ with patch("litellm.llms.ollama.chat.transformation.uuid.uuid4", return_value="1234"):
+ result = mock_iterator.chunk_parser(chunk2)
+
+ assert result.choices[0].delta.reasoning_content == " continued."
+ assert result.choices[0].delta.content == " More regular content"
+ assert mock_iterator.started_reasoning_content
+ assert mock_iterator.finished_reasoning_content
+
+def test_mixed_content_across_multiple_chunks(mock_iterator):
+ """Test case with mixed content and reasoning across multiple chunks."""
+ chunk1 = {"message": {"content": "Hello "}, "done": False, "model": "test-model"}
+ chunk2 = {"message": {"content": "world I'm thinking"}, "done": False, "model": "test-model"}
+ chunk3 = {"message": {"content": " about this and "}, "done": False, "model": "test-model"}
+ chunk4 = {"message": {"content": "continuing."}, "done": True, "model": "test-model"}
+
+ with patch("litellm.llms.ollama.chat.transformation.uuid.uuid4", return_value="1234"):
+ # Chunk 1: Regular content before any reasoning
+ result1 = mock_iterator.chunk_parser(chunk1)
+ assert result1.choices[0].delta.content == "Hello "
+ assert result1.choices[0].delta.reasoning_content == ""
+ assert not mock_iterator.started_reasoning_content
+
+ # Chunk 2: Content before and start of reasoning
+ result2 = mock_iterator.chunk_parser(chunk2)
+ assert result2.choices[0].delta.content == "world "
+ assert result2.choices[0].delta.reasoning_content == "I'm thinking"
+ assert mock_iterator.started_reasoning_content
+ assert not mock_iterator.finished_reasoning_content
+
+ # Chunk 3: End of reasoning and content after
+ result3 = mock_iterator.chunk_parser(chunk3)
+ assert result3.choices[0].delta.reasoning_content == " about this"
+ assert result3.choices[0].delta.content == " and "
+ assert mock_iterator.finished_reasoning_content
+
+ # Chunk 4: Regular content after reasoning finished
+ result4 = mock_iterator.chunk_parser(chunk4)
+ assert result4.choices[0].delta.content == "continuing."
+ assert result4.choices[0].delta.reasoning_content == ""