Skip to content

Commit 0002b1d

Browse files
authored
ollama[patch]: fix model validation, ensure per-call reasoning can be set, tests (#31927)
* update model validation due to change in [Ollama client](https://github.com/ollama/ollama) - ensure you are running the latest version (0.9.6) to use `validate_model_on_init` * add code example and fix formatting for ChatOllama reasoning * ensure that setting `reasoning` in invocation kwargs overrides class-level setting * tests
1 parent f33a257 commit 0002b1d

File tree

5 files changed

+180
-182
lines changed

5 files changed

+180
-182
lines changed

libs/partners/ollama/langchain_ollama/_utils.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ def validate_model(client: Client, model_name: str) -> None:
1616
"""
1717
try:
1818
response = client.list()
19-
model_names: list[str] = [model["name"] for model in response["models"]]
19+
20+
model_names: list[str] = [model["model"] for model in response["models"]]
21+
2022
if not any(
2123
model_name == m or m.startswith(f"{model_name}:") for m in model_names
2224
):
@@ -27,10 +29,7 @@ def validate_model(client: Client, model_name: str) -> None:
2729
)
2830
raise ValueError(msg)
2931
except ConnectError as e:
30-
msg = (
31-
"Connection to Ollama failed. Please make sure Ollama is running "
32-
f"and accessible at {client._client.base_url}. "
33-
)
32+
msg = "Failed to connect to Ollama. Please check that Ollama is downloaded, running and accessible. https://ollama.com/download" # noqa: E501
3433
raise ValueError(msg) from e
3534
except ResponseError as e:
3635
msg = (

libs/partners/ollama/langchain_ollama/chat_models.py

Lines changed: 54 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -217,17 +217,17 @@ class ChatOllama(BaseChatModel):
217217
`supported models <https://ollama.com/search?c=thinking>`__.
218218
219219
- ``True``: Enables reasoning mode. The model's reasoning process will be
220-
captured and returned separately in the ``additional_kwargs`` of the
221-
response message, under ``reasoning_content``. The main response
222-
content will not include the reasoning tags.
220+
captured and returned separately in the ``additional_kwargs`` of the
221+
response message, under ``reasoning_content``. The main response
222+
content will not include the reasoning tags.
223223
- ``False``: Disables reasoning mode. The model will not perform any reasoning,
224-
and the response will not include any reasoning content.
224+
and the response will not include any reasoning content.
225225
- ``None`` (Default): The model will use its default reasoning behavior. Note
226-
however, if the model's default behavior *is* to perform reasoning, think tags
227-
()``<think>`` and ``</think>``) will be present within the main response content
228-
unless you set ``reasoning`` to ``True``.
226+
however, if the model's default behavior *is* to perform reasoning, think tags
227+
(``<think>`` and ``</think>``) will be present within the main response content
228+
unless you set ``reasoning`` to ``True``.
229229
temperature: float
230-
Sampling temperature. Ranges from 0.0 to 1.0.
230+
Sampling temperature. Ranges from ``0.0`` to ``1.0``.
231231
num_predict: Optional[int]
232232
Max number of tokens to generate.
233233
@@ -343,7 +343,6 @@ class ChatOllama(BaseChatModel):
343343
'{"location": "Pune, India", "time_of_day": "morning"}'
344344
345345
Tool Calling:
346-
347346
.. code-block:: python
348347
349348
from langchain_ollama import ChatOllama
@@ -362,6 +361,48 @@ class Multiply(BaseModel):
362361
'args': {'a': 45, 'b': 67},
363362
'id': '420c3f3b-df10-4188-945f-eb3abdb40622',
364363
'type': 'tool_call'}]
364+
365+
Thinking / Reasoning:
366+
You can enable reasoning mode for models that support it by setting
367+
the ``reasoning`` parameter to ``True`` in either the constructor or
368+
the ``invoke``/``stream`` methods. This will enable the model to think
369+
through the problem and return the reasoning process separately in the
370+
``additional_kwargs`` of the response message, under ``reasoning_content``.
371+
372+
If ``reasoning`` is set to ``None``, the model will use its default reasoning
373+
behavior, and any reasoning content will *not* be captured under the
374+
``reasoning_content`` key, but will be present within the main response content
375+
as think tags (``<think>`` and ``</think>``).
376+
377+
.. note::
378+
This feature is only available for `models that support reasoning <https://ollama.com/search?c=thinking>`__.
379+
380+
.. code-block:: python
381+
382+
from langchain_ollama import ChatOllama
383+
384+
llm = ChatOllama(
385+
model = "deepseek-r1:8b",
386+
reasoning= True,
387+
)
388+
389+
user_message = HumanMessage(content="how many r in the word strawberry?")
390+
messages: List[Any] = [user_message]
391+
llm.invoke(messages)
392+
393+
# or, on an invocation basis:
394+
395+
llm.invoke(messages, reasoning=True)
396+
# or llm.stream(messages, reasoning=True)
397+
398+
# If not provided, the invocation will default to the ChatOllama reasoning
399+
# param provided (None by default).
400+
401+
.. code-block:: python
402+
403+
AIMessage(content='The word "strawberry" contains **three \'r\' letters**. Here\'s a breakdown for clarity:\n\n- The spelling of "strawberry" has two parts ... be 3.\n\nTo be thorough, let\'s confirm with an online source or common knowledge.\n\nI can recall that "strawberry" has: s-t-r-a-w-b-e-r-r-y — yes, three r\'s.\n\nPerhaps it\'s misspelled by some, but standard is correct.\n\nSo I think the response should be 3.\n'}, response_metadata={'model': 'deepseek-r1:8b', 'created_at': '2025-07-08T19:33:55.891269Z', 'done': True, 'done_reason': 'stop', 'total_duration': 98232561292, 'load_duration': 28036792, 'prompt_eval_count': 10, 'prompt_eval_duration': 40171834, 'eval_count': 3615, 'eval_duration': 98163832416, 'model_name': 'deepseek-r1:8b'}, id='run--18f8269f-6a35-4a7c-826d-b89d52c753b3-0', usage_metadata={'input_tokens': 10, 'output_tokens': 3615, 'total_tokens': 3625})
404+
405+
365406
""" # noqa: E501, pylint: disable=line-too-long
366407

367408
model: str
@@ -777,6 +818,7 @@ def _iterate_over_stream(
777818
stop: Optional[list[str]] = None,
778819
**kwargs: Any,
779820
) -> Iterator[ChatGenerationChunk]:
821+
reasoning = kwargs.get("reasoning", self.reasoning)
780822
for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
781823
if not isinstance(stream_resp, str):
782824
if stream_resp.get("done") is True:
@@ -795,7 +837,7 @@ def _iterate_over_stream(
795837

796838
additional_kwargs = {}
797839
if (
798-
self.reasoning
840+
reasoning
799841
and "message" in stream_resp
800842
and (thinking_content := stream_resp["message"].get("thinking"))
801843
):
@@ -836,6 +878,7 @@ async def _aiterate_over_stream(
836878
stop: Optional[list[str]] = None,
837879
**kwargs: Any,
838880
) -> AsyncIterator[ChatGenerationChunk]:
881+
reasoning = kwargs.get("reasoning", self.reasoning)
839882
async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
840883
if not isinstance(stream_resp, str):
841884
if stream_resp.get("done") is True:
@@ -854,7 +897,7 @@ async def _aiterate_over_stream(
854897

855898
additional_kwargs = {}
856899
if (
857-
self.reasoning
900+
reasoning
858901
and "message" in stream_resp
859902
and (thinking_content := stream_resp["message"].get("thinking"))
860903
):

libs/partners/ollama/langchain_ollama/llms.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class OllamaLLM(BaseLLM):
3838
model: str
3939
"""Model name to use."""
4040

41-
reasoning: Optional[bool] = True
41+
reasoning: Optional[bool] = None
4242
"""Controls the reasoning/thinking mode for
4343
`supported models <https://ollama.com/search?c=thinking>`__.
4444
@@ -272,8 +272,11 @@ async def _astream_with_aggregation(
272272
**kwargs: Any,
273273
) -> GenerationChunk:
274274
final_chunk = None
275+
thinking_content = ""
275276
async for stream_resp in self._acreate_generate_stream(prompt, stop, **kwargs):
276277
if not isinstance(stream_resp, str):
278+
if stream_resp.get("thinking"):
279+
thinking_content += stream_resp["thinking"]
277280
chunk = GenerationChunk(
278281
text=stream_resp.get("response", ""),
279282
generation_info=(
@@ -294,6 +297,12 @@ async def _astream_with_aggregation(
294297
msg = "No data received from Ollama stream."
295298
raise ValueError(msg)
296299

300+
if thinking_content:
301+
if final_chunk.generation_info:
302+
final_chunk.generation_info["thinking"] = thinking_content
303+
else:
304+
final_chunk.generation_info = {"thinking": thinking_content}
305+
297306
return final_chunk
298307

299308
def _stream_with_aggregation(
@@ -305,8 +314,11 @@ def _stream_with_aggregation(
305314
**kwargs: Any,
306315
) -> GenerationChunk:
307316
final_chunk = None
317+
thinking_content = ""
308318
for stream_resp in self._create_generate_stream(prompt, stop, **kwargs):
309319
if not isinstance(stream_resp, str):
320+
if stream_resp.get("thinking"):
321+
thinking_content += stream_resp["thinking"]
310322
chunk = GenerationChunk(
311323
text=stream_resp.get("response", ""),
312324
generation_info=(
@@ -327,6 +339,12 @@ def _stream_with_aggregation(
327339
msg = "No data received from Ollama stream."
328340
raise ValueError(msg)
329341

342+
if thinking_content:
343+
if final_chunk.generation_info:
344+
final_chunk.generation_info["thinking"] = thinking_content
345+
else:
346+
final_chunk.generation_info = {"thinking": thinking_content}
347+
330348
return final_chunk
331349

332350
def _generate(
@@ -374,10 +392,11 @@ def _stream(
374392
run_manager: Optional[CallbackManagerForLLMRun] = None,
375393
**kwargs: Any,
376394
) -> Iterator[GenerationChunk]:
395+
reasoning = kwargs.get("reasoning", self.reasoning)
377396
for stream_resp in self._create_generate_stream(prompt, stop, **kwargs):
378397
if not isinstance(stream_resp, str):
379398
additional_kwargs = {}
380-
if thinking_content := stream_resp.get("thinking"):
399+
if reasoning and (thinking_content := stream_resp.get("thinking")):
381400
additional_kwargs["reasoning_content"] = thinking_content
382401

383402
chunk = GenerationChunk(
@@ -404,10 +423,11 @@ async def _astream(
404423
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
405424
**kwargs: Any,
406425
) -> AsyncIterator[GenerationChunk]:
426+
reasoning = kwargs.get("reasoning", self.reasoning)
407427
async for stream_resp in self._acreate_generate_stream(prompt, stop, **kwargs):
408428
if not isinstance(stream_resp, str):
409429
additional_kwargs = {}
410-
if thinking_content := stream_resp.get("thinking"):
430+
if reasoning and (thinking_content := stream_resp.get("thinking")):
411431
additional_kwargs["reasoning_content"] = thinking_content
412432

413433
chunk = GenerationChunk(

0 commit comments

Comments
 (0)