fix(core): fix string content when streaming output_version="v1" (#33261)

ccurme · mdrxy · web-flow · commit 7404338786b4 · 2025-10-06T11:03:03.000-04:00
Co-authored-by: Mason Daugherty &lt;mason@langchain.dev&gt;
diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py
@@ -43,6 +43,7 @@
     is_data_content_block,
     message_chunk_to_message,
 )
+from langchain_core.messages import content as types
 from langchain_core.messages.block_translators.openai import (
     convert_to_openai_image_block,
 )
@@ -533,6 +534,8 @@ def stream(
                 input_messages = _normalize_messages(messages)
                 run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id)))
                 yielded = False
+                index = -1
+                index_type = ""
                 for chunk in self._stream(input_messages, stop=stop, **kwargs):
                     if chunk.message.id is None:
                         chunk.message.id = run_id
@@ -542,6 +545,14 @@ def stream(
                         chunk.message = _update_message_content_to_blocks(
                             chunk.message, "v1"
                         )
+                        for block in cast(
+                            "list[types.ContentBlock]", chunk.message.content
+                        ):
+                            if block["type"] != index_type:
+                                index_type = block["type"]
+                                index = index + 1
+                            if "index" not in block:
+                                block["index"] = index
                     run_manager.on_llm_new_token(
                         cast("str", chunk.message.content), chunk=chunk
                     )
@@ -651,6 +662,8 @@ async def astream(
             input_messages = _normalize_messages(messages)
             run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id)))
             yielded = False
+            index = -1
+            index_type = ""
             async for chunk in self._astream(
                 input_messages,
                 stop=stop,
@@ -664,6 +677,14 @@ async def astream(
                     chunk.message = _update_message_content_to_blocks(
                         chunk.message, "v1"
                     )
+                    for block in cast(
+                        "list[types.ContentBlock]", chunk.message.content
+                    ):
+                        if block["type"] != index_type:
+                            index_type = block["type"]
+                            index = index + 1
+                        if "index" not in block:
+                            block["index"] = index
                 await run_manager.on_llm_new_token(
                     cast("str", chunk.message.content), chunk=chunk
                 )
@@ -1145,13 +1166,23 @@ def _generate_with_cache(
                 f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None
             )
             yielded = False
+            index = -1
+            index_type = ""
             for chunk in self._stream(messages, stop=stop, **kwargs):
                 chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
                 if self.output_version == "v1":
                     # Overwrite .content with .content_blocks
                     chunk.message = _update_message_content_to_blocks(
                         chunk.message, "v1"
                     )
+                    for block in cast(
+                        "list[types.ContentBlock]", chunk.message.content
+                    ):
+                        if block["type"] != index_type:
+                            index_type = block["type"]
+                            index = index + 1
+                        if "index" not in block:
+                            block["index"] = index
                 if run_manager:
                     if chunk.message.id is None:
                         chunk.message.id = run_id
@@ -1253,13 +1284,23 @@ async def _agenerate_with_cache(
                 f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None
             )
             yielded = False
+            index = -1
+            index_type = ""
             async for chunk in self._astream(messages, stop=stop, **kwargs):
                 chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
                 if self.output_version == "v1":
                     # Overwrite .content with .content_blocks
                     chunk.message = _update_message_content_to_blocks(
                         chunk.message, "v1"
                     )
+                    for block in cast(
+                        "list[types.ContentBlock]", chunk.message.content
+                    ):
+                        if block["type"] != index_type:
+                            index_type = block["type"]
+                            index = index + 1
+                        if "index" not in block:
+                            block["index"] = index
                 if run_manager:
                     if chunk.message.id is None:
                         chunk.message.id = run_id
diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py
@@ -8,7 +8,10 @@
 import pytest
 from typing_extensions import override
 
-from langchain_core.callbacks import CallbackManagerForLLMRun
+from langchain_core.callbacks import (
+    AsyncCallbackManagerForLLMRun,
+    CallbackManagerForLLMRun,
+)
 from langchain_core.language_models import (
     BaseChatModel,
     FakeListChatModel,
@@ -23,7 +26,6 @@
     AIMessage,
     AIMessageChunk,
     BaseMessage,
-    BaseMessageChunk,
     HumanMessage,
     SystemMessage,
 )
@@ -907,6 +909,56 @@ async def test_output_version_ainvoke(monkeypatch: Any) -> None:
     assert response.response_metadata["output_version"] == "v1"
 
 
+class _AnotherFakeChatModel(BaseChatModel):
+    responses: Iterator[AIMessage]
+    """Responses for _generate."""
+
+    chunks: Iterator[AIMessageChunk]
+    """Responses for _stream."""
+
+    @property
+    def _llm_type(self) -> str:
+        return "another-fake-chat-model"
+
+    def _generate(
+        self,
+        messages: list[BaseMessage],  # noqa: ARG002
+        stop: list[str] | None = None,  # noqa: ARG002
+        run_manager: CallbackManagerForLLMRun | None = None,  # noqa: ARG002
+        **kwargs: Any,  # noqa: ARG002
+    ) -> ChatResult:
+        return ChatResult(generations=[ChatGeneration(message=next(self.responses))])
+
+    async def _agenerate(
+        self,
+        messages: list[BaseMessage],  # noqa: ARG002
+        stop: list[str] | None = None,  # noqa: ARG002
+        run_manager: AsyncCallbackManagerForLLMRun | None = None,  # noqa: ARG002
+        **kwargs: Any,  # noqa: ARG002
+    ) -> ChatResult:
+        return ChatResult(generations=[ChatGeneration(message=next(self.responses))])
+
+    def _stream(
+        self,
+        messages: list[BaseMessage],  # noqa: ARG002
+        stop: list[str] | None = None,  # noqa: ARG002
+        run_manager: CallbackManagerForLLMRun | None = None,  # noqa: ARG002
+        **kwargs: Any,  # noqa: ARG002
+    ) -> Iterator[ChatGenerationChunk]:
+        for chunk in self.chunks:
+            yield ChatGenerationChunk(message=chunk)
+
+    async def _astream(
+        self,
+        messages: list[BaseMessage],  # noqa: ARG002
+        stop: list[str] | None = None,  # noqa: ARG002
+        run_manager: AsyncCallbackManagerForLLMRun | None = None,  # noqa: ARG002
+        **kwargs: Any,  # noqa: ARG002
+    ) -> AsyncIterator[ChatGenerationChunk]:
+        for chunk in self.chunks:
+            yield ChatGenerationChunk(message=chunk)
+
+
 def test_output_version_stream(monkeypatch: Any) -> None:
     messages = [AIMessage("foo bar")]
 
@@ -923,7 +975,7 @@ def test_output_version_stream(monkeypatch: Any) -> None:
 
     # v1
     llm = GenericFakeChatModel(messages=iter(messages), output_version="v1")
-    full_v1: BaseMessageChunk | None = None
+    full_v1: AIMessageChunk | None = None
     for chunk in llm.stream("hello"):
         assert isinstance(chunk, AIMessageChunk)
         assert isinstance(chunk.content, list)
@@ -936,6 +988,58 @@ def test_output_version_stream(monkeypatch: Any) -> None:
     assert isinstance(full_v1, AIMessageChunk)
     assert full_v1.response_metadata["output_version"] == "v1"
 
+    assert full_v1.content == [{"type": "text", "text": "foo bar", "index": 0}]
+
+    # Test text blocks
+    llm_with_rich_content = _AnotherFakeChatModel(
+        responses=iter([]),
+        chunks=iter(
+            [
+                AIMessageChunk(content="foo "),
+                AIMessageChunk(content="bar"),
+            ]
+        ),
+        output_version="v1",
+    )
+    full_v1 = None
+    for chunk in llm_with_rich_content.stream("hello"):
+        full_v1 = chunk if full_v1 is None else full_v1 + chunk
+    assert isinstance(full_v1, AIMessageChunk)
+    assert full_v1.content_blocks == [{"type": "text", "text": "foo bar", "index": 0}]
+
+    # Test content blocks of different types
+    chunks = [
+        AIMessageChunk(content="", additional_kwargs={"reasoning_content": "<rea"}),
+        AIMessageChunk(content="", additional_kwargs={"reasoning_content": "soning>"}),
+        AIMessageChunk(content="<some "),
+        AIMessageChunk(content="text>"),
+    ]
+    llm_with_rich_content = _AnotherFakeChatModel(
+        responses=iter([]),
+        chunks=iter(chunks),
+        output_version="v1",
+    )
+    full_v1 = None
+    for chunk in llm_with_rich_content.stream("hello"):
+        full_v1 = chunk if full_v1 is None else full_v1 + chunk
+    assert isinstance(full_v1, AIMessageChunk)
+    assert full_v1.content_blocks == [
+        {"type": "reasoning", "reasoning": "<reasoning>", "index": 0},
+        {"type": "text", "text": "<some text>", "index": 1},
+    ]
+
+    # Test invoke with stream=True
+    llm_with_rich_content = _AnotherFakeChatModel(
+        responses=iter([]),
+        chunks=iter(chunks),
+        output_version="v1",
+    )
+    response_v1 = llm_with_rich_content.invoke("hello", stream=True)
+    assert response_v1.content_blocks == [
+        {"type": "reasoning", "reasoning": "<reasoning>", "index": 0},
+        {"type": "text", "text": "<some text>", "index": 1},
+    ]
+
     # v1 from env var
     monkeypatch.setenv("LC_OUTPUT_VERSION", "v1")
     llm = GenericFakeChatModel(messages=iter(messages))
@@ -969,7 +1073,7 @@ async def test_output_version_astream(monkeypatch: Any) -> None:
 
     # v1
     llm = GenericFakeChatModel(messages=iter(messages), output_version="v1")
-    full_v1: BaseMessageChunk | None = None
+    full_v1: AIMessageChunk | None = None
     async for chunk in llm.astream("hello"):
         assert isinstance(chunk, AIMessageChunk)
         assert isinstance(chunk.content, list)
@@ -982,6 +1086,58 @@ async def test_output_version_astream(monkeypatch: Any) -> None:
     assert isinstance(full_v1, AIMessageChunk)
     assert full_v1.response_metadata["output_version"] == "v1"
 
+    assert full_v1.content == [{"type": "text", "text": "foo bar", "index": 0}]
+
+    # Test text blocks
+    llm_with_rich_content = _AnotherFakeChatModel(
+        responses=iter([]),
+        chunks=iter(
+            [
+                AIMessageChunk(content="foo "),
+                AIMessageChunk(content="bar"),
+            ]
+        ),
+        output_version="v1",
+    )
+    full_v1 = None
+    async for chunk in llm_with_rich_content.astream("hello"):
+        full_v1 = chunk if full_v1 is None else full_v1 + chunk
+    assert isinstance(full_v1, AIMessageChunk)
+    assert full_v1.content_blocks == [{"type": "text", "text": "foo bar", "index": 0}]
+
+    # Test content blocks of different types
+    chunks = [
+        AIMessageChunk(content="", additional_kwargs={"reasoning_content": "<rea"}),
+        AIMessageChunk(content="", additional_kwargs={"reasoning_content": "soning>"}),
+        AIMessageChunk(content="<some "),
+        AIMessageChunk(content="text>"),
+    ]
+    llm_with_rich_content = _AnotherFakeChatModel(
+        responses=iter([]),
+        chunks=iter(chunks),
+        output_version="v1",
+    )
+    full_v1 = None
+    async for chunk in llm_with_rich_content.astream("hello"):
+        full_v1 = chunk if full_v1 is None else full_v1 + chunk
+    assert isinstance(full_v1, AIMessageChunk)
+    assert full_v1.content_blocks == [
+        {"type": "reasoning", "reasoning": "<reasoning>", "index": 0},
+        {"type": "text", "text": "<some text>", "index": 1},
+    ]
+
+    # Test invoke with stream=True
+    llm_with_rich_content = _AnotherFakeChatModel(
+        responses=iter([]),
+        chunks=iter(chunks),
+        output_version="v1",
+    )
+    response_v1 = await llm_with_rich_content.ainvoke("hello", stream=True)
+    assert response_v1.content_blocks == [
+        {"type": "reasoning", "reasoning": "<reasoning>", "index": 0},
+        {"type": "text", "text": "<some text>", "index": 1},
+    ]
+
     # v1 from env var
     monkeypatch.setenv("LC_OUTPUT_VERSION", "v1")
     llm = GenericFakeChatModel(messages=iter(messages))