diff --git a/litellm/llms/ollama/chat/transformation.py b/litellm/llms/ollama/chat/transformation.py index 3b755e79330c..f8d2643f5d5b 100644 --- a/litellm/llms/ollama/chat/transformation.py +++ b/litellm/llms/ollama/chat/transformation.py @@ -476,7 +476,7 @@ def _is_function_call_complete(self, function_args: Union[str, dict]) -> bool: except Exception: return False - def chunk_parser(self, chunk: dict) -> ModelResponseStream: + def chunk_parser(self, chunk: dict) -> ModelResponseStream: # noqa: PLR0915 try: """ Expected chunk format: @@ -521,8 +521,8 @@ def chunk_parser(self, chunk: dict) -> ModelResponseStream: tool_call["id"] = str(uuid.uuid4()) # PROCESS REASONING CONTENT - reasoning_content: Optional[str] = None - content: Optional[str] = None + reasoning_content: str = "" + content: str = "" if chunk["message"].get("thinking") is not None: if self.started_reasoning_content is False: reasoning_content = chunk["message"].get("thinking") @@ -532,21 +532,39 @@ def chunk_parser(self, chunk: dict) -> ModelResponseStream: self.finished_reasoning_content = True elif chunk["message"].get("content") is not None: message_content = chunk["message"].get("content") - if "" in message_content: - message_content = message_content.replace("", "") + # Check if both tags are present (complete reasoning block in single chunk) + has_start_tag = "" in message_content + has_end_tag = "" in message_content + + if has_start_tag and has_end_tag: + # Complete reasoning block in single chunk + import re + # Extract reasoning content + match = re.search(r'(.*?)', message_content) + if match: + reasoning_content = match.group(1) + # Remove think tags and normalize whitespace + content = re.sub(r'\s*.*?\s*', ' ', message_content).strip() self.started_reasoning_content = True - - if "" in message_content and self.started_reasoning_content: - message_content = message_content.replace("", "") self.finished_reasoning_content = True - - if ( - self.started_reasoning_content - and not self.finished_reasoning_content - ): + elif has_start_tag: + # Start of reasoning - split content before and after + parts = message_content.split("", 1) + content = parts[0] if len(parts) > 0 else "" + reasoning_content = parts[1] if len(parts) > 1 else "" + self.started_reasoning_content = True + elif has_end_tag and self.started_reasoning_content: + # End of reasoning - split content before and after + parts = message_content.split("", 1) + reasoning_content = parts[0] if len(parts) > 0 else "" + content = parts[1] if len(parts) > 1 else "" + self.finished_reasoning_content = True + elif self.started_reasoning_content and not self.finished_reasoning_content: + # Middle of reasoning reasoning_content = message_content else: + # Regular content content = message_content delta = Delta( diff --git a/tests/test_litellm/llms/ollama/test_ollama_chunk_parser.py b/tests/test_litellm/llms/ollama/test_ollama_chunk_parser.py new file mode 100644 index 000000000000..72d589473e3e --- /dev/null +++ b/tests/test_litellm/llms/ollama/test_ollama_chunk_parser.py @@ -0,0 +1,144 @@ +import pytest +from unittest.mock import patch +from litellm.llms.ollama.chat.transformation import OllamaChatCompletionResponseIterator +from litellm.types.utils import Delta, StreamingChoices + +# Mock the Delta and other necessary classes if they are not directly importable or need special setup +class MockModelResponseStream: + def __init__(self, choices, model, object_type, system_fingerprint, usage=None, **kwargs): + self.choices = choices + self.model = model + self.object = object_type + self.system_fingerprint = system_fingerprint + self.usage = usage + for key, value in kwargs.items(): + setattr(self, key, value) + +@pytest.fixture +def mock_iterator(): + """Fixture to create a mock OllamaChatCompletionResponseIterator.""" + iterator = OllamaChatCompletionResponseIterator( + streaming_response=iter([]), + sync_stream=True, + json_mode=False + ) + return iterator + +def test_full_think_block_in_one_chunk(mock_iterator): + """Test case where a complete ... block is in a single chunk.""" + chunk = {"message": {"content": "This is a thought."}, "done": False, "model": "test-model"} + with patch("litellm.llms.ollama.chat.transformation.uuid.uuid4", return_value="1234"): + result = mock_iterator.chunk_parser(chunk) + assert result.choices[0].delta.content == "" + assert result.choices[0].delta.reasoning_content == "This is a thought." + assert mock_iterator.started_reasoning_content + assert mock_iterator.finished_reasoning_content + +def test_think_tags_split_across_chunks(mock_iterator): + """Test case where and tags are in separate chunks.""" + chunk1 = {"message": {"content": "This is a thought."}, "done": False, "model": "test-model"} + chunk2 = {"message": {"content": " And it continues."}, "done": True, "model": "test-model"} + + with patch("litellm.llms.ollama.chat.transformation.uuid.uuid4", return_value="1234"): + result1 = mock_iterator.chunk_parser(chunk1) + assert result1.choices[0].delta.reasoning_content == "This is a thought." + assert mock_iterator.started_reasoning_content + assert not mock_iterator.finished_reasoning_content + + result2 = mock_iterator.chunk_parser(chunk2) + assert result2.choices[0].delta.reasoning_content == " And it continues." + assert mock_iterator.started_reasoning_content + assert mock_iterator.finished_reasoning_content + +def test_content_before_and_after_think_tag(mock_iterator): + """Test case where there is content before and after the ... block""" + chunk = {"message": {"content": "Here is a preamble. This is a thought. Here is a postamble."}, "done": True, "model": "test-model"} + + with patch("litellm.llms.ollama.chat.transformation.uuid.uuid4", return_value="1234"): + result = mock_iterator.chunk_parser(chunk) + + assert result.choices[0].delta.content == "Here is a preamble. Here is a postamble." + assert result.choices[0].delta.reasoning_content == "This is a thought." + assert mock_iterator.started_reasoning_content + assert mock_iterator.finished_reasoning_content + +@patch('litellm.llms.ollama.chat.transformation.OllamaChatCompletionResponseIterator.construct_empty_chunk', create=True) +def test_whitespace_chunks(mock_construct_empty_chunk, mock_iterator): + """Test case where chunks contain only whitespace.""" + mock_construct_empty_chunk.return_value = MockModelResponseStream( + choices=[StreamingChoices(index=0, delta=Delta(content="", reasoning_content=None, role="assistant", tool_calls=None), finish_reason=None)], + model="test-model", + object_type="chat.completion.chunk", + system_fingerprint=None + ) + chunk1 = {"message": {"content": " "}, "done": False, "model": "test-model"} + chunk2 = {"message": {"content": "\n\n"}, "done": True, "model": "test-model"} + + result1 = mock_iterator.chunk_parser(chunk1) + assert result1.choices[0].delta.content == " " + assert result1.choices[0].delta.reasoning_content == "" + + result2 = mock_iterator.chunk_parser(chunk2) + assert result2.choices[0].delta.content == "\n\n" + assert result2.choices[0].delta.reasoning_content == "" + +def test_content_before_think_tag(mock_iterator): + """Test case where there is regular content before the tag in the same chunk.""" + chunk = {"message": {"content": "Regular content starting thought"}, "done": False, "model": "test-model"} + + with patch("litellm.llms.ollama.chat.transformation.uuid.uuid4", return_value="1234"): + result = mock_iterator.chunk_parser(chunk) + + assert result.choices[0].delta.content == "Regular content " + assert result.choices[0].delta.reasoning_content == "starting thought" + assert mock_iterator.started_reasoning_content + assert not mock_iterator.finished_reasoning_content + +def test_content_after_think_end_tag(mock_iterator): + """Test case where there is regular content after the tag in the same chunk.""" + # First start the reasoning + chunk1 = {"message": {"content": "This is a thought"}, "done": False, "model": "test-model"} + with patch("litellm.llms.ollama.chat.transformation.uuid.uuid4", return_value="1234"): + mock_iterator.chunk_parser(chunk1) + + # Then end it with content after + chunk2 = {"message": {"content": " continued. More regular content"}, "done": True, "model": "test-model"} + with patch("litellm.llms.ollama.chat.transformation.uuid.uuid4", return_value="1234"): + result = mock_iterator.chunk_parser(chunk2) + + assert result.choices[0].delta.reasoning_content == " continued." + assert result.choices[0].delta.content == " More regular content" + assert mock_iterator.started_reasoning_content + assert mock_iterator.finished_reasoning_content + +def test_mixed_content_across_multiple_chunks(mock_iterator): + """Test case with mixed content and reasoning across multiple chunks.""" + chunk1 = {"message": {"content": "Hello "}, "done": False, "model": "test-model"} + chunk2 = {"message": {"content": "world I'm thinking"}, "done": False, "model": "test-model"} + chunk3 = {"message": {"content": " about this and "}, "done": False, "model": "test-model"} + chunk4 = {"message": {"content": "continuing."}, "done": True, "model": "test-model"} + + with patch("litellm.llms.ollama.chat.transformation.uuid.uuid4", return_value="1234"): + # Chunk 1: Regular content before any reasoning + result1 = mock_iterator.chunk_parser(chunk1) + assert result1.choices[0].delta.content == "Hello " + assert result1.choices[0].delta.reasoning_content == "" + assert not mock_iterator.started_reasoning_content + + # Chunk 2: Content before and start of reasoning + result2 = mock_iterator.chunk_parser(chunk2) + assert result2.choices[0].delta.content == "world " + assert result2.choices[0].delta.reasoning_content == "I'm thinking" + assert mock_iterator.started_reasoning_content + assert not mock_iterator.finished_reasoning_content + + # Chunk 3: End of reasoning and content after + result3 = mock_iterator.chunk_parser(chunk3) + assert result3.choices[0].delta.reasoning_content == " about this" + assert result3.choices[0].delta.content == " and " + assert mock_iterator.finished_reasoning_content + + # Chunk 4: Regular content after reasoning finished + result4 = mock_iterator.chunk_parser(chunk4) + assert result4.choices[0].delta.content == "continuing." + assert result4.choices[0].delta.reasoning_content == ""