Ignore empty text deltas when streaming gpt-oss via Ollama (#3216)

DouweM · web-flow · commit 7f439680c42b · 2025-10-22T00:36:19.000Z
diff --git a/pydantic_ai_slim/pydantic_ai/models/groq.py b/pydantic_ai_slim/pydantic_ai/models/groq.py
@@ -330,7 +330,7 @@ def _process_response(self, response: chat.ChatCompletion) -> ModelResponse:
                 if call_part and return_part:  # pragma: no branch
                     items.append(call_part)
                     items.append(return_part)
-        if choice.message.content is not None:
+        if choice.message.content:
             # NOTE: The `<think>` tag is only present if `groq_reasoning_format` is set to `raw`.
             items.extend(split_content_into_text_and_thinking(choice.message.content, self.profile.thinking_tags))
         if choice.message.tool_calls is not None:
@@ -563,7 +563,7 @@ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
 
                 # Handle the text part of the response
                 content = choice.delta.content
-                if content is not None:
+                if content:
                     maybe_event = self._parts_manager.handle_text_delta(
                         vendor_part_id='content',
                         content=content,
diff --git a/pydantic_ai_slim/pydantic_ai/models/huggingface.py b/pydantic_ai_slim/pydantic_ai/models/huggingface.py
@@ -277,7 +277,7 @@ def _process_response(self, response: ChatCompletionOutput) -> ModelResponse:
 
         items: list[ModelResponsePart] = []
 
-        if content is not None:
+        if content:
             items.extend(split_content_into_text_and_thinking(content, self.profile.thinking_tags))
         if tool_calls is not None:
             for c in tool_calls:
@@ -482,7 +482,7 @@ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
 
             # Handle the text part of the response
             content = choice.delta.content
-            if content is not None:
+            if content:
                 maybe_event = self._parts_manager.handle_text_delta(
                     vendor_part_id='content',
                     content=content,
diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py
@@ -559,24 +559,7 @@ def _process_response(self, response: chat.ChatCompletion | str) -> ModelRespons
         # - https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks
         # If you need this, please file an issue.
 
-        vendor_details: dict[str, Any] = {}
-
-        # Add logprobs to vendor_details if available
-        if choice.logprobs is not None and choice.logprobs.content:
-            # Convert logprobs to a serializable format
-            vendor_details['logprobs'] = [
-                {
-                    'token': lp.token,
-                    'bytes': lp.bytes,
-                    'logprob': lp.logprob,
-                    'top_logprobs': [
-                        {'token': tlp.token, 'bytes': tlp.bytes, 'logprob': tlp.logprob} for tlp in lp.top_logprobs
-                    ],
-                }
-                for lp in choice.logprobs.content
-            ]
-
-        if choice.message.content is not None:
+        if choice.message.content:
             items.extend(
                 (replace(part, id='content', provider_name=self.system) if isinstance(part, ThinkingPart) else part)
                 for part in split_content_into_text_and_thinking(choice.message.content, self.profile.thinking_tags)
@@ -594,6 +577,23 @@ def _process_response(self, response: chat.ChatCompletion | str) -> ModelRespons
                 part.tool_call_id = _guard_tool_call_id(part)
                 items.append(part)
 
+        vendor_details: dict[str, Any] = {}
+
+        # Add logprobs to vendor_details if available
+        if choice.logprobs is not None and choice.logprobs.content:
+            # Convert logprobs to a serializable format
+            vendor_details['logprobs'] = [
+                {
+                    'token': lp.token,
+                    'bytes': lp.bytes,
+                    'logprob': lp.logprob,
+                    'top_logprobs': [
+                        {'token': tlp.token, 'bytes': tlp.bytes, 'logprob': tlp.logprob} for tlp in lp.top_logprobs
+                    ],
+                }
+                for lp in choice.logprobs.content
+            ]
+
         raw_finish_reason = choice.finish_reason
         vendor_details['finish_reason'] = raw_finish_reason
         finish_reason = _CHAT_FINISH_REASON_MAP.get(raw_finish_reason)
@@ -1616,21 +1616,6 @@ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
                 self.provider_details = {'finish_reason': raw_finish_reason}
                 self.finish_reason = _CHAT_FINISH_REASON_MAP.get(raw_finish_reason)
 
-            # Handle the text part of the response
-            content = choice.delta.content
-            if content is not None:
-                maybe_event = self._parts_manager.handle_text_delta(
-                    vendor_part_id='content',
-                    content=content,
-                    thinking_tags=self._model_profile.thinking_tags,
-                    ignore_leading_whitespace=self._model_profile.ignore_streamed_leading_whitespace,
-                )
-                if maybe_event is not None:  # pragma: no branch
-                    if isinstance(maybe_event, PartStartEvent) and isinstance(maybe_event.part, ThinkingPart):
-                        maybe_event.part.id = 'content'
-                        maybe_event.part.provider_name = self.provider_name
-                    yield maybe_event
-
             # The `reasoning_content` field is only present in DeepSeek models.
             # https://api-docs.deepseek.com/guides/reasoning_model
             if reasoning_content := getattr(choice.delta, 'reasoning_content', None):
@@ -1652,6 +1637,21 @@ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
                     provider_name=self.provider_name,
                 )
 
+            # Handle the text part of the response
+            content = choice.delta.content
+            if content:
+                maybe_event = self._parts_manager.handle_text_delta(
+                    vendor_part_id='content',
+                    content=content,
+                    thinking_tags=self._model_profile.thinking_tags,
+                    ignore_leading_whitespace=self._model_profile.ignore_streamed_leading_whitespace,
+                )
+                if maybe_event is not None:  # pragma: no branch
+                    if isinstance(maybe_event, PartStartEvent) and isinstance(maybe_event.part, ThinkingPart):
+                        maybe_event.part.id = 'content'
+                        maybe_event.part.provider_name = self.provider_name
+                    yield maybe_event
+
             for dtc in choice.delta.tool_calls or []:
                 maybe_event = self._parts_manager.handle_tool_call_delta(
                     vendor_part_id=dtc.index,
diff --git a/tests/models/test_deepseek.py b/tests/models/test_deepseek.py
@@ -1,24 +1,17 @@
 from __future__ import annotations as _annotations
 
-from typing import Any
-
 import pytest
-from dirty_equals import IsListOrTuple
 from inline_snapshot import snapshot
 
 from pydantic_ai import (
     Agent,
-    FinalResultEvent,
     ModelRequest,
     ModelResponse,
-    PartDeltaEvent,
-    PartStartEvent,
     TextPart,
-    TextPartDelta,
     ThinkingPart,
-    ThinkingPartDelta,
     UserPromptPart,
 )
+from pydantic_ai.run import AgentRunResult, AgentRunResultEvent
 from pydantic_ai.usage import RequestUsage
 
 from ..conftest import IsDatetime, IsStr, try_import
@@ -71,27 +64,42 @@ async def test_deepseek_model_thinking_stream(allow_model_requests: None, deepse
     deepseek_model = OpenAIChatModel('deepseek-reasoner', provider=DeepSeekProvider(api_key=deepseek_api_key))
     agent = Agent(model=deepseek_model)
 
-    event_parts: list[Any] = []
-    async with agent.iter(user_prompt='Hello') as agent_run:
-        async for node in agent_run:
-            if Agent.is_model_request_node(node) or Agent.is_call_tools_node(node):
-                async with node.stream(agent_run.ctx) as request_stream:
-                    async for event in request_stream:
-                        event_parts.append(event)
+    result: AgentRunResult | None = None
+    async for event in agent.run_stream_events(user_prompt='How do I cross the street?'):
+        if isinstance(event, AgentRunResultEvent):
+            result = event.result
 
-    assert event_parts == IsListOrTuple(
-        positions={
-            0: snapshot(
-                PartStartEvent(
-                    index=0, part=ThinkingPart(content='H', id='reasoning_content', provider_name='deepseek')
-                )
+    assert result is not None
+    assert result.all_messages() == snapshot(
+        [
+            ModelRequest(
+                parts=[
+                    UserPromptPart(
+                        content='How do I cross the street?',
+                        timestamp=IsDatetime(),
+                    )
+                ]
+            ),
+            ModelResponse(
+                parts=[
+                    ThinkingPart(
+                        content=IsStr(),
+                        id='reasoning_content',
+                        provider_name='deepseek',
+                    ),
+                    TextPart(content='Hello there! 😊 How can I help you today?'),
+                ],
+                usage=RequestUsage(
+                    input_tokens=6,
+                    output_tokens=212,
+                    details={'prompt_cache_hit_tokens': 0, 'prompt_cache_miss_tokens': 6, 'reasoning_tokens': 198},
+                ),
+                model_name='deepseek-reasoner',
+                timestamp=IsDatetime(),
+                provider_name='deepseek',
+                provider_details={'finish_reason': 'stop'},
+                provider_response_id='33be18fc-3842-486c-8c29-dd8e578f7f20',
+                finish_reason='stop',
             ),
-            1: snapshot(PartDeltaEvent(index=0, delta=ThinkingPartDelta(content_delta='mm', provider_name='deepseek'))),
-            2: snapshot(PartDeltaEvent(index=0, delta=ThinkingPartDelta(content_delta=',', provider_name='deepseek'))),
-            198: snapshot(PartStartEvent(index=1, part=TextPart(content='Hello'))),
-            199: snapshot(FinalResultEvent(tool_name=None, tool_call_id=None)),
-            200: snapshot(PartDeltaEvent(index=1, delta=TextPartDelta(content_delta=' there'))),
-            201: snapshot(PartDeltaEvent(index=1, delta=TextPartDelta(content_delta='!'))),
-        },
-        length=211,
+        ]
     )
diff --git a/tests/models/test_groq.py b/tests/models/test_groq.py
@@ -5160,7 +5160,6 @@ async def get_something_by_name(name: str) -> str:
             ),
             ModelResponse(
                 parts=[
-                    TextPart(content=''),
                     ThinkingPart(
                         content="""\
 The user requests to call the tool with non-existent parameters to test error handling. We need to call the function "get_something_by_name" with wrong parameters. The function expects a single argument object with "name". Non-existent parameters means we could provide a wrong key, or missing name. Let's provide an object with wrong key "nonexistent": "value". That should cause error. So we call the function with {"nonexistent": "test"}.
@@ -5205,7 +5204,6 @@ async def get_something_by_name(name: str) -> str:
             ),
             ModelResponse(
                 parts=[
-                    TextPart(content=''),
                     ThinkingPart(content='We need to call with correct param: name. Use a placeholder name.'),
                     ToolCallPart(
                         tool_name='get_something_by_name',
diff --git a/tests/models/test_huggingface.py b/tests/models/test_huggingface.py
@@ -9,7 +9,6 @@
 from unittest.mock import Mock
 
 import pytest
-from dirty_equals import IsListOrTuple
 from inline_snapshot import snapshot
 from typing_extensions import TypedDict
 
@@ -18,26 +17,22 @@
     AudioUrl,
     BinaryContent,
     DocumentUrl,
-    FinalResultEvent,
     ImageUrl,
     ModelRequest,
     ModelResponse,
     ModelRetry,
-    PartDeltaEvent,
-    PartStartEvent,
     RetryPromptPart,
     SystemPromptPart,
     TextPart,
-    TextPartDelta,
     ThinkingPart,
-    ThinkingPartDelta,
     ToolCallPart,
     ToolReturnPart,
     UserPromptPart,
     VideoUrl,
 )
 from pydantic_ai.exceptions import ModelHTTPError
 from pydantic_ai.result import RunUsage
+from pydantic_ai.run import AgentRunResult, AgentRunResultEvent
 from pydantic_ai.settings import ModelSettings
 from pydantic_ai.tools import RunContext
 from pydantic_ai.usage import RequestUsage
@@ -978,35 +973,32 @@ async def test_hf_model_thinking_part_iter(allow_model_requests: None, huggingfa
     )
     agent = Agent(m)
 
-    event_parts: list[Any] = []
-    async with agent.iter(user_prompt='How do I cross the street?') as agent_run:
-        async for node in agent_run:
-            if Agent.is_model_request_node(node) or Agent.is_call_tools_node(node):
-                async with node.stream(agent_run.ctx) as request_stream:
-                    async for event in request_stream:
-                        event_parts.append(event)
-
-    assert event_parts == snapshot(
-        IsListOrTuple(
-            positions={
-                0: PartStartEvent(index=0, part=ThinkingPart(content='')),
-                1: PartDeltaEvent(index=0, delta=ThinkingPartDelta(content_delta='\n')),
-                2: PartDeltaEvent(index=0, delta=ThinkingPartDelta(content_delta='Okay')),
-                3: PartDeltaEvent(index=0, delta=ThinkingPartDelta(content_delta=',')),
-                4: PartDeltaEvent(index=0, delta=ThinkingPartDelta(content_delta=' the')),
-                5: PartDeltaEvent(index=0, delta=ThinkingPartDelta(content_delta=' user')),
-                6: PartDeltaEvent(index=0, delta=ThinkingPartDelta(content_delta=' is')),
-                7: PartDeltaEvent(index=0, delta=ThinkingPartDelta(content_delta=' asking')),
-                413: PartStartEvent(index=1, part=TextPart(content='Cross')),
-                414: FinalResultEvent(tool_name=None, tool_call_id=None),
-                415: PartDeltaEvent(index=1, delta=TextPartDelta(content_delta='ing')),
-                416: PartDeltaEvent(index=1, delta=TextPartDelta(content_delta=' the')),
-                417: PartDeltaEvent(index=1, delta=TextPartDelta(content_delta=' street')),
-                418: PartDeltaEvent(index=1, delta=TextPartDelta(content_delta=' safely')),
-                419: PartDeltaEvent(index=1, delta=TextPartDelta(content_delta=' requires')),
-                420: PartDeltaEvent(index=1, delta=TextPartDelta(content_delta=' attent')),
-                421: PartDeltaEvent(index=1, delta=TextPartDelta(content_delta='iveness')),
-            },
-            length=1062,
-        )
+    result: AgentRunResult | None = None
+    async for event in agent.run_stream_events(user_prompt='How do I cross the street?'):
+        if isinstance(event, AgentRunResultEvent):
+            result = event.result
+
+    assert result is not None
+    assert result.all_messages() == snapshot(
+        [
+            ModelRequest(
+                parts=[
+                    UserPromptPart(
+                        content='How do I cross the street?',
+                        timestamp=IsDatetime(),
+                    )
+                ]
+            ),
+            ModelResponse(
+                parts=[
+                    ThinkingPart(content=IsStr()),
+                    TextPart(content=IsStr()),
+                ],
+                model_name='Qwen/Qwen3-235B-A22B',
+                timestamp=IsDatetime(),
+                provider_name='huggingface',
+                provider_details={'finish_reason': 'stop'},
+                provider_response_id='chatcmpl-357f347a3f5d4897b36a128fb4e4cf7b',
+            ),
+        ]
     )
diff --git a/tests/models/test_openai.py b/tests/models/test_openai.py
@@ -583,7 +583,7 @@ async def test_stream_tool_call_with_empty_text(allow_model_requests: None):
         chunk([]),
     ]
     mock_client = MockOpenAI.create_mock_stream(stream)
-    m = OpenAIChatModel('qwen3', provider=OllamaProvider(openai_client=mock_client))
+    m = OpenAIChatModel('gpt-oss:20b', provider=OllamaProvider(openai_client=mock_client))
     agent = Agent(m, output_type=[str, MyTypedDict])
 
     async with agent.run_stream('') as result:
diff --git a/tests/test_temporal.py b/tests/test_temporal.py
@@ -1132,9 +1132,8 @@ async def test_temporal_agent_run_stream_events(allow_model_requests: None):
     events = [event async for event in simple_temporal_agent.run_stream_events('What is the capital of Mexico?')]
     assert events == snapshot(
         [
-            PartStartEvent(index=0, part=TextPart(content='')),
+            PartStartEvent(index=0, part=TextPart(content='The')),
             FinalResultEvent(tool_name=None, tool_call_id=None),
-            PartDeltaEvent(index=0, delta=TextPartDelta(content_delta='The')),
             PartDeltaEvent(index=0, delta=TextPartDelta(content_delta=' capital')),
             PartDeltaEvent(index=0, delta=TextPartDelta(content_delta=' of')),
             PartDeltaEvent(index=0, delta=TextPartDelta(content_delta=' Mexico')),

Original file line number	Diff line number	Diff line change
`@@ -583,7 +583,7 @@ async def test_stream_tool_call_with_empty_text(allow_model_requests: None):`
`583`	`583`	`chunk([]),`
`584`	`584`	`]`
`585`	`585`	`mock_client = MockOpenAI.create_mock_stream(stream)`
`586`		`- m = OpenAIChatModel('qwen3', provider=OllamaProvider(openai_client=mock_client))`
	`586`	`+ m = OpenAIChatModel('gpt-oss:20b', provider=OllamaProvider(openai_client=mock_client))`
`587`	`587`	`agent = Agent(m, output_type=[str, MyTypedDict])`
`588`	`588`
`589`	`589`	`async with agent.run_stream('') as result:`