fix(aws): convert string tool_use input to dict in _lc_content_to_bedrock (#880)

michaelnchin · web-flow · commit 7fb50d5ed682 · 2026-02-18T15:35:40.000-08:00
Fixes #827. When streaming tool-use responses, Bedrock's Converse API sends tool input as incremental JSON string fragments across `contentBlockDelta` events. LangChain accumulates these into a JSON string on `content[].tool_use.input`. If this message is sent back to Bedrock, `_lc_content_to_bedrock` passes the string directly to the Bedrock API, which expects a dict, causing a `ValidationException`. PR #843 attempted to fix this by parsing input inside `_bedrock_to_lc`, but this broke streaming: `tool_call_chunks[].args` must be string, and parsing to dict at that level violated this requirement, resulting in Pydantic validation failures and chunk merging type mismatches. This fix in this PR instead parses string input to dict in `_lc_content_to_bedrock`, leaving the initial Bedrock->LC streaming accumulation and chunk merging untouched.
diff --git a/libs/aws/langchain_aws/chat_models/bedrock_converse.py b/libs/aws/langchain_aws/chat_models/bedrock_converse.py
@@ -58,6 +58,7 @@
     convert_to_openai_function,
     convert_to_openai_tool,
 )
+from langchain_core.utils.json import parse_partial_json
 from langchain_core.utils.pydantic import TypeBaseModel, is_basemodel_subclass
 from langchain_core.utils.utils import _build_model_kwargs
 from pydantic import BaseModel, ConfigDict, Field, SecretStr, model_validator
@@ -2020,22 +2021,28 @@ def _lc_content_to_bedrock(
             # Assume block in bedrock document format
             bedrock_content.append({"document": block["document"]})
         elif block["type"] == "tool_use":
+            tool_input = block["input"]
+            if isinstance(tool_input, str):
+                tool_input = parse_partial_json(tool_input) if tool_input else {}
             bedrock_content.append(
                 {
                     "toolUse": {
                         "toolUseId": block["id"],
-                        "input": block["input"],
+                        "input": tool_input,
                         "name": block["name"],
                     }
                 }
             )
         elif block["type"] == "server_tool_use":
             # System tools use toolUse format (same as regular tools)
+            tool_input = block["input"]
+            if isinstance(tool_input, str):
+                tool_input = parse_partial_json(tool_input) if tool_input else {}
             bedrock_content.append(
                 {
                     "toolUse": {
                         "toolUseId": block["id"],
-                        "input": block["input"],
+                        "input": tool_input,
                         "name": block["name"],
                     }
                 }
diff --git a/libs/aws/tests/integration_tests/chat_models/test_bedrock_converse.py b/libs/aws/tests/integration_tests/chat_models/test_bedrock_converse.py
@@ -587,6 +587,63 @@ def get_weather(location: str) -> str:
     assert isinstance(response, AIMessage)
 
 
+def test_streaming_tool_use_round_trip() -> None:
+    """Test that streaming tool call messages can be sent back to Bedrock.
+
+    Regression test for https://github.com/langchain-ai/langchain-aws/issues/827.
+    After streaming, content[].tool_use.input is a JSON string instead of a
+    dict. When a message is reconstructed from content alone (e.g., loaded
+    from a checkpoint without tool_calls), _lc_content_to_bedrock must parse
+    string input to a dict to avoid Bedrock ValidationException.
+    """
+
+    @tool
+    def get_weather(city: str) -> str:
+        """Get the current weather for a city."""
+        return "It's sunny and 72F."
+
+    llm = ChatBedrockConverse(
+        model="us.anthropic.claude-sonnet-4-5-20250929-v1:0",
+    )
+    llm_with_tools = llm.bind_tools([get_weather], tool_choice="any")
+
+    input_message = HumanMessage("What is the weather in Paris?")
+
+    full: Optional[BaseMessageChunk] = None
+    for chunk in llm_with_tools.stream([input_message]):
+        assert isinstance(chunk, AIMessageChunk)
+        full = chunk if full is None else full + chunk
+    assert isinstance(full, AIMessageChunk)
+
+    for tc_chunk in full.tool_call_chunks:
+        assert tc_chunk["args"] is None or isinstance(tc_chunk["args"], str)
+
+    assert len(full.tool_calls) == 1
+    tool_call = full.tool_calls[0]
+    assert tool_call["name"] == "get_weather"
+    assert isinstance(tool_call["args"], dict)
+    assert isinstance(full.content, list)
+    tool_block = next(
+        b for b in full.content if isinstance(b, dict) and b.get("type") == "tool_use"
+    )
+    assert isinstance(tool_block["input"], str), (
+        "After streaming accumulation, content[].tool_use.input should be a "
+        "string. If this assertion fails, the streaming behavior has changed "
+        "and this test may need updating."
+    )
+
+    restored_msg = AIMessage(content=full.content)
+    assert restored_msg.tool_calls == []
+
+    tool_result = ToolMessage(
+        content=get_weather.invoke(tool_call).content,
+        tool_call_id=tool_call["id"],
+    )
+
+    response = llm_with_tools.invoke([input_message, restored_msg, tool_result])
+    assert isinstance(response, AIMessage)
+
+
 @pytest.mark.default_cassette("test_thinking.yaml.gz")
 @pytest.mark.vcr
 @pytest.mark.parametrize("output_version", ["v0", "v1"])
diff --git a/libs/aws/tests/unit_tests/chat_models/test_bedrock_converse.py b/libs/aws/tests/unit_tests/chat_models/test_bedrock_converse.py
@@ -9,6 +9,7 @@
 from langchain_core.language_models import BaseChatModel
 from langchain_core.messages import (
     AIMessage,
+    AIMessageChunk,
     BaseMessage,
     HumanMessage,
     SystemMessage,
@@ -30,6 +31,7 @@
     _has_tool_use_or_result_blocks,
     _lc_content_to_bedrock,
     _messages_to_bedrock,
+    _parse_stream_event,
     _snake_to_camel,
     _snake_to_camel_keys,
 )
@@ -3528,3 +3530,189 @@ def test_ls_invocation_params_prefers_explicit_region_over_inferred(
     invocation_params = ls_params["ls_invocation_params"]  # type: ignore[typeddict-item]
     # Explicit region_name should be used, not the one from client
     assert invocation_params["region_name"] == "us-west-2"
+
+
+def test__lc_content_to_bedrock_tool_use_dict_input_unchanged() -> None:
+    """Dict input should pass through unchanged."""
+    content: List[Union[str, Dict[str, Any]]] = [
+        {
+            "type": "tool_use",
+            "id": "tool_1",
+            "name": "get_weather",
+            "input": {"city": "Paris"},
+        }
+    ]
+    result = _lc_content_to_bedrock(content)
+    assert result == [
+        {
+            "toolUse": {
+                "toolUseId": "tool_1",
+                "name": "get_weather",
+                "input": {"city": "Paris"},
+            }
+        }
+    ]
+
+
+def test__lc_content_to_bedrock_tool_use_string_input_parsed_to_dict() -> None:
+    """String input (from streaming accumulation) should be parsed to dict."""
+    content: List[Union[str, Dict[str, Any]]] = [
+        {
+            "type": "tool_use",
+            "id": "tool_1",
+            "name": "get_weather",
+            "input": '{"city": "Paris"}',
+        }
+    ]
+    result = _lc_content_to_bedrock(content)
+    assert result == [
+        {
+            "toolUse": {
+                "toolUseId": "tool_1",
+                "name": "get_weather",
+                "input": {"city": "Paris"},
+            }
+        }
+    ]
+
+
+def test__lc_content_to_bedrock_tool_use_empty_string_input() -> None:
+    """Empty string input should become empty dict."""
+    content: List[Union[str, Dict[str, Any]]] = [
+        {
+            "type": "tool_use",
+            "id": "tool_1",
+            "name": "no_args_tool",
+            "input": "",
+        }
+    ]
+    result = _lc_content_to_bedrock(content)
+    assert result == [
+        {
+            "toolUse": {
+                "toolUseId": "tool_1",
+                "name": "no_args_tool",
+                "input": {},
+            }
+        }
+    ]
+
+
+def test__lc_content_to_bedrock_server_tool_use_string_input_parsed() -> None:
+    """String input on server_tool_use should also be parsed to dict."""
+    content: List[Union[str, Dict[str, Any]]] = [
+        {
+            "type": "server_tool_use",
+            "id": "tool_1",
+            "name": "grounding",
+            "input": '{"query": "latest news"}',
+        }
+    ]
+    result = _lc_content_to_bedrock(content)
+    assert result == [
+        {
+            "toolUse": {
+                "toolUseId": "tool_1",
+                "name": "grounding",
+                "input": {"query": "latest news"},
+            }
+        }
+    ]
+
+
+def test_content_block_start_tool_call_chunk_args_type() -> None:
+    """contentBlockStart should produce tool_call_chunk with string/None args."""
+    event = {
+        "contentBlockStart": {
+            "contentBlockIndex": 0,
+            "start": {
+                "toolUse": {
+                    "toolUseId": "tool_1",
+                    "name": "get_weather",
+                }
+            },
+        }
+    }
+    chunk = _parse_stream_event(event)
+    assert isinstance(chunk, AIMessageChunk)
+    assert len(chunk.tool_call_chunks) == 1
+    args = chunk.tool_call_chunks[0]["args"]
+    assert args is None or isinstance(args, str)
+
+
+def test_content_block_delta_tool_call_chunk_args_type() -> None:
+    """contentBlockDelta should produce tool_call_chunk with string args."""
+    event = {
+        "contentBlockDelta": {
+            "contentBlockIndex": 0,
+            "delta": {
+                "toolUse": {
+                    "input": '{"city": "Paris"}',
+                }
+            },
+        }
+    }
+    chunk = _parse_stream_event(event)
+    assert isinstance(chunk, AIMessageChunk)
+    assert len(chunk.tool_call_chunks) == 1
+    args = chunk.tool_call_chunks[0]["args"]
+    assert isinstance(args, str)
+    assert args == '{"city": "Paris"}'
+
+
+def test_streaming_tool_use_round_trip() -> None:
+    """Simulate streaming tool call, accumulate chunks, convert back to Bedrock."""
+    events = [
+        {
+            "contentBlockStart": {
+                "contentBlockIndex": 0,
+                "start": {
+                    "toolUse": {
+                        "toolUseId": "tool_abc",
+                        "name": "get_weather",
+                    }
+                },
+            }
+        },
+        {
+            "contentBlockDelta": {
+                "contentBlockIndex": 0,
+                "delta": {
+                    "toolUse": {
+                        "input": '{"city":',
+                    }
+                },
+            }
+        },
+        {
+            "contentBlockDelta": {
+                "contentBlockIndex": 0,
+                "delta": {
+                    "toolUse": {
+                        "input": ' "Paris"}',
+                    }
+                },
+            }
+        },
+    ]
+
+    full = None
+    for event in events:
+        chunk = _parse_stream_event(event)
+        if chunk is not None:
+            full = chunk if full is None else full + chunk
+
+    assert isinstance(full, AIMessageChunk)
+    assert isinstance(full.content, list)
+    tool_block = next(
+        b for b in full.content if isinstance(b, dict) and b.get("type") == "tool_use"
+    )
+    assert isinstance(tool_block["input"], str)
+    assert len(full.tool_call_chunks) > 0
+
+    bedrock_content = _lc_content_to_bedrock(
+        cast(List[Union[str, Dict[str, Any]]], full.content)
+    )
+    tool_use_block = bedrock_content[0]["toolUse"]
+    assert isinstance(tool_use_block["input"], dict)
+    assert tool_use_block["input"] == {"city": "Paris"}

Original file line number	Diff line number	Diff line change
`@@ -58,6 +58,7 @@`
`58`	`58`	`convert_to_openai_function,`
`59`	`59`	`convert_to_openai_tool,`
`60`	`60`	`)`
	`61`	`+from langchain_core.utils.json import parse_partial_json`
`61`	`62`	`from langchain_core.utils.pydantic import TypeBaseModel, is_basemodel_subclass`
`62`	`63`	`from langchain_core.utils.utils import _build_model_kwargs`
`63`	`64`	`from pydantic import BaseModel, ConfigDict, Field, SecretStr, model_validator`
`@@ -2020,22 +2021,28 @@ def _lc_content_to_bedrock(`
`2020`	`2021`	`# Assume block in bedrock document format`
`2021`	`2022`	`bedrock_content.append({"document": block["document"]})`
`2022`	`2023`	`elif block["type"] == "tool_use":`
	`2024`	`+ tool_input = block["input"]`
	`2025`	`+ if isinstance(tool_input, str):`
	`2026`	`+ tool_input = parse_partial_json(tool_input) if tool_input else {}`
`2023`	`2027`	`bedrock_content.append(`
`2024`	`2028`	`{`
`2025`	`2029`	`"toolUse": {`
`2026`	`2030`	`"toolUseId": block["id"],`
`2027`		`- "input": block["input"],`
	`2031`	`+ "input": tool_input,`
`2028`	`2032`	`"name": block["name"],`
`2029`	`2033`	`}`
`2030`	`2034`	`}`
`2031`	`2035`	`)`
`2032`	`2036`	`elif block["type"] == "server_tool_use":`
`2033`	`2037`	`# System tools use toolUse format (same as regular tools)`
	`2038`	`+ tool_input = block["input"]`
	`2039`	`+ if isinstance(tool_input, str):`
	`2040`	`+ tool_input = parse_partial_json(tool_input) if tool_input else {}`
`2034`	`2041`	`bedrock_content.append(`
`2035`	`2042`	`{`
`2036`	`2043`	`"toolUse": {`
`2037`	`2044`	`"toolUseId": block["id"],`
`2038`		`- "input": block["input"],`
	`2045`	`+ "input": tool_input,`
`2039`	`2046`	`"name": block["name"],`
`2040`	`2047`	`}`
`2041`	`2048`	`}`