Skip to content

Commit d40fd5a

Browse files
Copilotmdrxy
andauthored
feat(ollama): warn on empty load responses (#32161)
## Problem When using `ChatOllama` with `create_react_agent`, agents would sometimes terminate prematurely with empty responses when Ollama returned `done_reason: 'load'` responses with no content. This caused agents to return empty `AIMessage` objects instead of actual generated text. ```python from langchain_ollama import ChatOllama from langgraph.prebuilt import create_react_agent from langchain_core.messages import HumanMessage llm = ChatOllama(model='qwen2.5:7b', temperature=0) agent = create_react_agent(model=llm, tools=[]) result = agent.invoke(HumanMessage('Hello'), {"configurable": {"thread_id": "1"}}) # Before fix: AIMessage(content='', response_metadata={'done_reason': 'load'}) # Expected: AIMessage with actual generated content ``` ## Root Cause The `_iterate_over_stream` and `_aiterate_over_stream` methods treated any response with `done: True` as final, regardless of `done_reason`. When Ollama returns `done_reason: 'load'` with empty content, it indicates the model was loaded but no actual generation occurred - this should not be considered a complete response. ## Solution Modified the streaming logic to skip responses when: - `done: True` - `done_reason: 'load'` - Content is empty or contains only whitespace This ensures agents only receive actual generated content while preserving backward compatibility for load responses that do contain content. ## Changes - **`_iterate_over_stream`**: Skip empty load responses instead of yielding them - **`_aiterate_over_stream`**: Apply same fix to async streaming - **Tests**: Added comprehensive test cases covering all edge cases ## Testing All scenarios now work correctly: - ✅ Empty load responses are skipped (fixes original issue) - ✅ Load responses with actual content are preserved (backward compatibility) - ✅ Normal stop responses work unchanged - ✅ Streaming behavior preserved - ✅ `create_react_agent` integration fixed Fixes #31482. <!-- START COPILOT CODING AGENT TIPS --> --- 💡 You can make Copilot smarter by setting up custom instructions, customizing its development environment and configuring Model Context Protocol (MCP) servers. Learn more [Copilot coding agent tips](https://gh.io/copilot-coding-agent-tips) in the docs. --------- Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: mdrxy <[email protected]> Co-authored-by: Mason Daugherty <[email protected]>
1 parent 116b758 commit d40fd5a

File tree

3 files changed

+179
-16
lines changed

3 files changed

+179
-16
lines changed

libs/partners/ollama/langchain_ollama/chat_models.py

Lines changed: 47 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import ast
66
import json
7+
import logging
78
from collections.abc import AsyncIterator, Iterator, Mapping, Sequence
89
from operator import itemgetter
910
from typing import (
@@ -58,6 +59,8 @@
5859

5960
from ._utils import validate_model
6061

62+
log = logging.getLogger(__name__)
63+
6164

6265
def _get_usage_metadata_from_generation_info(
6366
generation_info: Optional[Mapping[str, Any]],
@@ -837,6 +840,28 @@ def _iterate_over_stream(
837840
reasoning = kwargs.get("reasoning", self.reasoning)
838841
for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
839842
if not isinstance(stream_resp, str):
843+
content = (
844+
stream_resp["message"]["content"]
845+
if "message" in stream_resp and "content" in stream_resp["message"]
846+
else ""
847+
)
848+
849+
# Warn and skip responses with done_reason: 'load' and empty content
850+
# These indicate the model was loaded but no actual generation occurred
851+
is_load_response_with_empty_content = (
852+
stream_resp.get("done") is True
853+
and stream_resp.get("done_reason") == "load"
854+
and not content.strip()
855+
)
856+
857+
if is_load_response_with_empty_content:
858+
log.warning(
859+
"Ollama returned empty response with done_reason='load'."
860+
"This typically indicates the model was loaded but no content "
861+
"was generated. Skipping this response."
862+
)
863+
continue
864+
840865
if stream_resp.get("done") is True:
841866
generation_info = dict(stream_resp)
842867
if "model" in generation_info:
@@ -845,12 +870,6 @@ def _iterate_over_stream(
845870
else:
846871
generation_info = None
847872

848-
content = (
849-
stream_resp["message"]["content"]
850-
if "message" in stream_resp and "content" in stream_resp["message"]
851-
else ""
852-
)
853-
854873
additional_kwargs = {}
855874
if (
856875
reasoning
@@ -897,6 +916,28 @@ async def _aiterate_over_stream(
897916
reasoning = kwargs.get("reasoning", self.reasoning)
898917
async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
899918
if not isinstance(stream_resp, str):
919+
content = (
920+
stream_resp["message"]["content"]
921+
if "message" in stream_resp and "content" in stream_resp["message"]
922+
else ""
923+
)
924+
925+
# Warn and skip responses with done_reason: 'load' and empty content
926+
# These indicate the model was loaded but no actual generation occurred
927+
is_load_response_with_empty_content = (
928+
stream_resp.get("done") is True
929+
and stream_resp.get("done_reason") == "load"
930+
and not content.strip()
931+
)
932+
933+
if is_load_response_with_empty_content:
934+
log.warning(
935+
"Ollama returned empty response with done_reason='load'. "
936+
"This typically indicates the model was loaded but no content "
937+
"was generated. Skipping this response."
938+
)
939+
continue
940+
900941
if stream_resp.get("done") is True:
901942
generation_info = dict(stream_resp)
902943
if "model" in generation_info:
@@ -905,12 +946,6 @@ async def _aiterate_over_stream(
905946
else:
906947
generation_info = None
907948

908-
content = (
909-
stream_resp["message"]["content"]
910-
if "message" in stream_resp and "content" in stream_resp["message"]
911-
else ""
912-
)
913-
914949
additional_kwargs = {}
915950
if (
916951
reasoning

libs/partners/ollama/tests/unit_tests/test_chat_models.py

Lines changed: 130 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
"""Test chat model integration."""
22

33
import json
4+
import logging
45
from collections.abc import Generator
56
from contextlib import contextmanager
67
from typing import Any
7-
from unittest.mock import patch
8+
from unittest.mock import MagicMock, patch
89

910
import pytest
1011
from httpx import Client, Request, Response
1112
from langchain_core.exceptions import OutputParserException
12-
from langchain_core.messages import ChatMessage
13+
from langchain_core.messages import ChatMessage, HumanMessage
1314
from langchain_tests.unit_tests import ChatModelUnitTests
1415

1516
from langchain_ollama.chat_models import (
@@ -140,3 +141,130 @@ def test_parse_json_string_skip_returns_input_on_failure() -> None:
140141
skip=True,
141142
)
142143
assert result == malformed_string
144+
145+
146+
def test_load_response_with_empty_content_is_skipped(
147+
caplog: pytest.LogCaptureFixture,
148+
) -> None:
149+
"""Test that load responses with empty content log a warning and are skipped."""
150+
load_only_response = [
151+
{
152+
"model": "test-model",
153+
"created_at": "2025-01-01T00:00:00.000000000Z",
154+
"done": True,
155+
"done_reason": "load",
156+
"message": {"role": "assistant", "content": ""},
157+
}
158+
]
159+
160+
with patch("langchain_ollama.chat_models.Client") as mock_client_class:
161+
mock_client = MagicMock()
162+
mock_client_class.return_value = mock_client
163+
mock_client.chat.return_value = load_only_response
164+
165+
llm = ChatOllama(model="test-model")
166+
167+
with (
168+
caplog.at_level(logging.WARNING),
169+
pytest.raises(ValueError, match="No data received from Ollama stream"),
170+
):
171+
llm.invoke([HumanMessage("Hello")])
172+
173+
assert "Ollama returned empty response with done_reason='load'" in caplog.text
174+
175+
176+
def test_load_response_with_whitespace_content_is_skipped(
177+
caplog: pytest.LogCaptureFixture,
178+
) -> None:
179+
"""Test load responses w/ only whitespace content log a warning and are skipped."""
180+
load_whitespace_response = [
181+
{
182+
"model": "test-model",
183+
"created_at": "2025-01-01T00:00:00.000000000Z",
184+
"done": True,
185+
"done_reason": "load",
186+
"message": {"role": "assistant", "content": " \n \t "},
187+
}
188+
]
189+
190+
with patch("langchain_ollama.chat_models.Client") as mock_client_class:
191+
mock_client = MagicMock()
192+
mock_client_class.return_value = mock_client
193+
mock_client.chat.return_value = load_whitespace_response
194+
195+
llm = ChatOllama(model="test-model")
196+
197+
with (
198+
caplog.at_level(logging.WARNING),
199+
pytest.raises(ValueError, match="No data received from Ollama stream"),
200+
):
201+
llm.invoke([HumanMessage("Hello")])
202+
assert "Ollama returned empty response with done_reason='load'" in caplog.text
203+
204+
205+
def test_load_followed_by_content_response(
206+
caplog: pytest.LogCaptureFixture,
207+
) -> None:
208+
"""Test load responses log a warning and are skipped when followed by content."""
209+
load_then_content_response = [
210+
{
211+
"model": "test-model",
212+
"created_at": "2025-01-01T00:00:00.000000000Z",
213+
"done": True,
214+
"done_reason": "load",
215+
"message": {"role": "assistant", "content": ""},
216+
},
217+
{
218+
"model": "test-model",
219+
"created_at": "2025-01-01T00:00:01.000000000Z",
220+
"done": True,
221+
"done_reason": "stop",
222+
"message": {
223+
"role": "assistant",
224+
"content": "Hello! How can I help you today?",
225+
},
226+
},
227+
]
228+
229+
with patch("langchain_ollama.chat_models.Client") as mock_client_class:
230+
mock_client = MagicMock()
231+
mock_client_class.return_value = mock_client
232+
mock_client.chat.return_value = load_then_content_response
233+
234+
llm = ChatOllama(model="test-model")
235+
236+
with caplog.at_level(logging.WARNING):
237+
result = llm.invoke([HumanMessage("Hello")])
238+
239+
assert "Ollama returned empty response with done_reason='load'" in caplog.text
240+
assert result.content == "Hello! How can I help you today?"
241+
assert result.response_metadata.get("done_reason") == "stop"
242+
243+
244+
def test_load_response_with_actual_content_is_not_skipped(
245+
caplog: pytest.LogCaptureFixture,
246+
) -> None:
247+
"""Test load responses with actual content are NOT skipped and log no warning."""
248+
load_with_content_response = [
249+
{
250+
"model": "test-model",
251+
"created_at": "2025-01-01T00:00:00.000000000Z",
252+
"done": True,
253+
"done_reason": "load",
254+
"message": {"role": "assistant", "content": "This is actual content"},
255+
}
256+
]
257+
258+
with patch("langchain_ollama.chat_models.Client") as mock_client_class:
259+
mock_client = MagicMock()
260+
mock_client_class.return_value = mock_client
261+
mock_client.chat.return_value = load_with_content_response
262+
263+
llm = ChatOllama(model="test-model")
264+
265+
with caplog.at_level(logging.WARNING):
266+
result = llm.invoke([HumanMessage("Hello")])
267+
268+
assert result.content == "This is actual content"
269+
assert result.response_metadata.get("done_reason") == "load"
270+
assert not caplog.text

libs/partners/ollama/uv.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)