fix: Bedrock OpenAI models response parsing (reasoning before text)

len-foss · len-foss · commit aea936567658 · 2025-10-24T15:33:11.000+02:00
diff --git a/instructor/processing/function_calls.py b/instructor/processing/function_calls.py
@@ -402,8 +402,12 @@ def parse_bedrock_json(
         strict: Optional[bool] = None,
     ) -> BaseModel:
         if isinstance(completion, dict):
-            text = completion.get("output").get("message").get("content")[0].get("text")
-
+            # OpenAI will send the first content to be 'reasoningText', and then 'text'
+            content = completion["output"]["message"]["content"]
+            text_content = next((c for c in content if "text" in c), None)
+            if not text_content:
+                raise ValueError("Unexpected format. No text content found.")
+            text = text_content["text"]
             match = re.search(r"```?json(.*?)```?", text, re.DOTALL)
             if match:
                 text = match.group(1).strip()
diff --git a/tests/test_json_extraction.py b/tests/test_json_extraction.py
@@ -9,6 +9,7 @@
 from instructor.processing.function_calls import (
     _extract_text_content,
     _validate_model_from_json,
+    OpenAISchema,
 )
 from pydantic import BaseModel
 
@@ -275,3 +276,109 @@ def test_validate_model_json_error(self):
 
         # Pydantic directly raises validation errors now, not our custom message
         assert "Invalid JSON" in str(excinfo.value)
+
+
+class PersonSchema(OpenAISchema):
+    """Test model that inherits from OpenAISchema."""
+
+    name: str
+    age: int
+    skills: list[str] = []
+
+
+class TestBedrockJSONParsing:
+    """Test the parse_bedrock_json functionality."""
+
+    def test_parse_bedrock_json_simple(self):
+        """Test parsing Bedrock JSON with simple text content."""
+        completion = {
+            "output": {
+                "message": {
+                    "content": [{"text": '{"name": "John", "age": 30, "skills": []}'}]
+                }
+            }
+        }
+
+        result = PersonSchema.parse_bedrock_json(completion)
+        assert result.name == "John"
+        assert result.age == 30
+        assert result.skills == []
+
+    def test_parse_bedrock_json_with_reasoning_content(self):
+        """Test parsing Bedrock JSON when reasoningText comes before text content.
+
+        This tests the fix for reasoning models where content array may have
+        reasoningText as first element instead of text.
+        """
+        completion = {
+            "output": {
+                "message": {
+                    "content": [
+                        {"reasoningText": "Thinking about the response..."},
+                        {"text": '{"name": "Alice", "age": 25, "skills": ["python"]}'},
+                    ]
+                }
+            }
+        }
+
+        result = PersonSchema.parse_bedrock_json(completion)
+        assert result.name == "Alice"
+        assert result.age == 25
+        assert result.skills == ["python"]
+
+    def test_parse_bedrock_json_with_codeblock(self):
+        """Test parsing Bedrock JSON when response is wrapped in markdown codeblock."""
+        completion = {
+            "output": {
+                "message": {
+                    "content": [
+                        {
+                            "text": '```json\n{"name": "Bob", "age": 40, "skills": ["go", "rust"]}\n```'
+                        }
+                    ]
+                }
+            }
+        }
+
+        result = PersonSchema.parse_bedrock_json(completion)
+        assert result.name == "Bob"
+        assert result.age == 40
+        assert result.skills == ["go", "rust"]
+
+    def test_parse_bedrock_json_no_text_content(self):
+        """Test parsing Bedrock JSON when no text content is found."""
+        completion = {
+            "output": {
+                "message": {
+                    "content": [
+                        {"reasoningText": "Only reasoning, no text response"},
+                        {"otherContent": "Some other type"},
+                    ]
+                }
+            }
+        }
+
+        with pytest.raises(ValueError) as excinfo:
+            PersonSchema.parse_bedrock_json(completion)
+
+        assert "No text content found" in str(excinfo.value)
+
+    def test_parse_bedrock_json_multiple_text_contents(self):
+        """Test parsing Bedrock JSON picks the first text content when multiple exist."""
+        completion = {
+            "output": {
+                "message": {
+                    "content": [
+                        {"reasoningText": "Thinking..."},
+                        {"text": '{"name": "First", "age": 30, "skills": ["python"]}'},
+                        {"text": '{"name": "Second", "age": 40, "skills": ["java"]}'},
+                    ]
+                }
+            }
+        }
+
+        result = PersonSchema.parse_bedrock_json(completion)
+        # Should pick the first text content
+        assert result.name == "First"
+        assert result.age == 30
+        assert result.skills == ["python"]