Skip to content

Commit dc3b0da

Browse files
authored
Ignore leading whitespace when streaming from Qwen or DeepSeek (#2554)
1 parent f25a4e1 commit dc3b0da

26 files changed

+380
-27
lines changed

docs/api/providers.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,5 @@
3535
::: pydantic_ai.providers.huggingface.HuggingFaceProvider
3636

3737
::: pydantic_ai.providers.moonshotai.MoonshotAIProvider
38+
39+
::: pydantic_ai.providers.ollama.OllamaProvider

docs/models/openai.md

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,8 @@ To use [Ollama](https://ollama.com/), you must first download the Ollama client,
238238

239239
You must also ensure the Ollama server is running when trying to make requests to it. For more information, please see the [Ollama documentation](https://github.com/ollama/ollama/tree/main/docs).
240240

241+
You can then use the model with the [`OllamaProvider`][pydantic_ai.providers.ollama.OllamaProvider].
242+
241243
#### Example local usage
242244

243245
With `ollama` installed, you can run the server with the model you want to use:
@@ -255,7 +257,7 @@ from pydantic import BaseModel
255257

256258
from pydantic_ai import Agent
257259
from pydantic_ai.models.openai import OpenAIModel
258-
from pydantic_ai.providers.openai import OpenAIProvider
260+
from pydantic_ai.providers.ollama import OllamaProvider
259261

260262

261263
class CityLocation(BaseModel):
@@ -264,7 +266,8 @@ class CityLocation(BaseModel):
264266

265267

266268
ollama_model = OpenAIModel(
267-
model_name='llama3.2', provider=OpenAIProvider(base_url='http://localhost:11434/v1')
269+
model_name='llama3.2',
270+
provider=OllamaProvider(base_url='http://localhost:11434/v1'),
268271
)
269272
agent = Agent(ollama_model, output_type=CityLocation)
270273

@@ -282,11 +285,11 @@ from pydantic import BaseModel
282285

283286
from pydantic_ai import Agent
284287
from pydantic_ai.models.openai import OpenAIModel
285-
from pydantic_ai.providers.openai import OpenAIProvider
288+
from pydantic_ai.providers.ollama import OllamaProvider
286289

287290
ollama_model = OpenAIModel(
288291
model_name='qwen2.5-coder:7b', # (1)!
289-
provider=OpenAIProvider(base_url='http://192.168.1.74:11434/v1'), # (2)!
292+
provider=OllamaProvider(base_url='http://192.168.1.74:11434/v1'), # (2)!
290293
)
291294

292295

pydantic_ai_slim/pydantic_ai/_parts_manager.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ def handle_text_delta(
7272
vendor_part_id: VendorId | None,
7373
content: str,
7474
thinking_tags: tuple[str, str] | None = None,
75+
ignore_leading_whitespace: bool = False,
7576
) -> ModelResponseStreamEvent | None:
7677
"""Handle incoming text content, creating or updating a TextPart in the manager as appropriate.
7778
@@ -85,6 +86,7 @@ def handle_text_delta(
8586
a TextPart.
8687
content: The text content to append to the appropriate TextPart.
8788
thinking_tags: If provided, will handle content between the thinking tags as thinking parts.
89+
ignore_leading_whitespace: If True, will ignore leading whitespace in the content.
8890
8991
Returns:
9092
- A `PartStartEvent` if a new part was created.
@@ -128,10 +130,9 @@ def handle_text_delta(
128130
return self.handle_thinking_delta(vendor_part_id=vendor_part_id, content='')
129131

130132
if existing_text_part_and_index is None:
131-
# If the first text delta is all whitespace, don't emit a new part yet.
132-
# This is a workaround for models that emit `<think>\n</think>\n\n` ahead of tool calls (e.g. Ollama + Qwen3),
133-
# which we don't want to end up treating as a final result.
134-
if content.isspace():
133+
# This is a workaround for models that emit `<think>\n</think>\n\n` or an empty text part ahead of tool calls (e.g. Ollama + Qwen3),
134+
# which we don't want to end up treating as a final result when using `run_stream` with `str` a valid `output_type`.
135+
if ignore_leading_whitespace and (len(content) == 0 or content.isspace()):
135136
return None
136137

137138
# There is no existing text part that should be updated, so create a new one

pydantic_ai_slim/pydantic_ai/models/bedrock.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -648,7 +648,7 @@ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
648648
)
649649
if 'text' in delta:
650650
maybe_event = self._parts_manager.handle_text_delta(vendor_part_id=index, content=delta['text'])
651-
if maybe_event is not None:
651+
if maybe_event is not None: # pragma: no branch
652652
yield maybe_event
653653
if 'toolUse' in delta:
654654
tool_use = delta['toolUse']

pydantic_ai_slim/pydantic_ai/models/groq.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,7 @@ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
457457
vendor_part_id='content',
458458
content=content,
459459
thinking_tags=self._model_profile.thinking_tags,
460+
ignore_leading_whitespace=self._model_profile.ignore_streamed_leading_whitespace,
460461
)
461462
if maybe_event is not None: # pragma: no branch
462463
yield maybe_event

pydantic_ai_slim/pydantic_ai/models/huggingface.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
UserPromptPart,
3636
VideoUrl,
3737
)
38-
from ..profiles import ModelProfile
38+
from ..profiles import ModelProfile, ModelProfileSpec
3939
from ..providers import Provider, infer_provider
4040
from ..settings import ModelSettings
4141
from ..tools import ToolDefinition
@@ -121,20 +121,26 @@ def __init__(
121121
model_name: str,
122122
*,
123123
provider: Literal['huggingface'] | Provider[AsyncInferenceClient] = 'huggingface',
124+
profile: ModelProfileSpec | None = None,
125+
settings: ModelSettings | None = None,
124126
):
125127
"""Initialize a Hugging Face model.
126128
127129
Args:
128130
model_name: The name of the Model to use. You can browse available models [here](https://huggingface.co/models?pipeline_tag=text-generation&inference_provider=all&sort=trending).
129131
provider: The provider to use for Hugging Face Inference Providers. Can be either the string 'huggingface' or an
130132
instance of `Provider[AsyncInferenceClient]`. If not provided, the other parameters will be used.
133+
profile: The model profile to use. Defaults to a profile picked by the provider based on the model name.
134+
settings: Model-specific settings that will be used as defaults for this model.
131135
"""
132136
self._model_name = model_name
133137
self._provider = provider
134138
if isinstance(provider, str):
135139
provider = infer_provider(provider)
136140
self.client = provider.client
137141

142+
super().__init__(settings=settings, profile=profile or provider.model_profile)
143+
138144
async def request(
139145
self,
140146
messages: list[ModelMessage],
@@ -444,11 +450,12 @@ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
444450

445451
# Handle the text part of the response
446452
content = choice.delta.content
447-
if content:
453+
if content is not None:
448454
maybe_event = self._parts_manager.handle_text_delta(
449455
vendor_part_id='content',
450456
content=content,
451457
thinking_tags=self._model_profile.thinking_tags,
458+
ignore_leading_whitespace=self._model_profile.ignore_streamed_leading_whitespace,
452459
)
453460
if maybe_event is not None: # pragma: no branch
454461
yield maybe_event

pydantic_ai_slim/pydantic_ai/models/openai.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ def __init__(
217217
'together',
218218
'heroku',
219219
'github',
220+
'ollama',
220221
]
221222
| Provider[AsyncOpenAI] = 'openai',
222223
profile: ModelProfileSpec | None = None,
@@ -1094,11 +1095,12 @@ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
10941095

10951096
# Handle the text part of the response
10961097
content = choice.delta.content
1097-
if content:
1098+
if content is not None:
10981099
maybe_event = self._parts_manager.handle_text_delta(
10991100
vendor_part_id='content',
11001101
content=content,
11011102
thinking_tags=self._model_profile.thinking_tags,
1103+
ignore_leading_whitespace=self._model_profile.ignore_streamed_leading_whitespace,
11021104
)
11031105
if maybe_event is not None: # pragma: no branch
11041106
yield maybe_event

pydantic_ai_slim/pydantic_ai/profiles/__init__.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
@dataclass
2222
class ModelProfile:
23-
"""Describes how requests to a specific model or family of models need to be constructed to get the best results, independent of the model and provider classes used."""
23+
"""Describes how requests to and responses from specific models or families of models need to be constructed and processed to get the best results, independent of the model and provider classes used."""
2424

2525
supports_tools: bool = True
2626
"""Whether the model supports tools."""
@@ -46,6 +46,15 @@ class ModelProfile:
4646
thinking_tags: tuple[str, str] = ('<think>', '</think>')
4747
"""The tags used to indicate thinking parts in the model's output. Defaults to ('<think>', '</think>')."""
4848

49+
ignore_streamed_leading_whitespace: bool = False
50+
"""Whether to ignore leading whitespace when streaming a response.
51+
52+
This is a workaround for models that emit `<think>\n</think>\n\n` or an empty text part ahead of tool calls (e.g. Ollama + Qwen3),
53+
which we don't want to end up treating as a final result when using `run_stream` with `str` a valid `output_type`.
54+
55+
This is currently only used by `OpenAIModel`, `HuggingFaceModel`, and `GroqModel`.
56+
"""
57+
4958
@classmethod
5059
def from_profile(cls, profile: ModelProfile | None) -> Self:
5160
"""Build a ModelProfile subclass instance from a ModelProfile instance."""

pydantic_ai_slim/pydantic_ai/profiles/deepseek.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@
55

66
def deepseek_model_profile(model_name: str) -> ModelProfile | None:
77
"""Get the model profile for a DeepSeek model."""
8-
return None
8+
return ModelProfile(ignore_streamed_leading_whitespace='r1' in model_name)

pydantic_ai_slim/pydantic_ai/profiles/moonshotai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@
55

66
def moonshotai_model_profile(model_name: str) -> ModelProfile | None:
77
"""Get the model profile for a MoonshotAI model."""
8-
return None
8+
return ModelProfile(ignore_streamed_leading_whitespace=True)

0 commit comments

Comments
 (0)