Skip to content

Commit aea9365

Browse files
committed
fix: Bedrock OpenAI models response parsing (reasoning before text)
1 parent 0921f9c commit aea9365

File tree

2 files changed

+113
-2
lines changed

2 files changed

+113
-2
lines changed

instructor/processing/function_calls.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -402,8 +402,12 @@ def parse_bedrock_json(
402402
strict: Optional[bool] = None,
403403
) -> BaseModel:
404404
if isinstance(completion, dict):
405-
text = completion.get("output").get("message").get("content")[0].get("text")
406-
405+
# OpenAI will send the first content to be 'reasoningText', and then 'text'
406+
content = completion["output"]["message"]["content"]
407+
text_content = next((c for c in content if "text" in c), None)
408+
if not text_content:
409+
raise ValueError("Unexpected format. No text content found.")
410+
text = text_content["text"]
407411
match = re.search(r"```?json(.*?)```?", text, re.DOTALL)
408412
if match:
409413
text = match.group(1).strip()

tests/test_json_extraction.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from instructor.processing.function_calls import (
1010
_extract_text_content,
1111
_validate_model_from_json,
12+
OpenAISchema,
1213
)
1314
from pydantic import BaseModel
1415

@@ -275,3 +276,109 @@ def test_validate_model_json_error(self):
275276

276277
# Pydantic directly raises validation errors now, not our custom message
277278
assert "Invalid JSON" in str(excinfo.value)
279+
280+
281+
class PersonSchema(OpenAISchema):
282+
"""Test model that inherits from OpenAISchema."""
283+
284+
name: str
285+
age: int
286+
skills: list[str] = []
287+
288+
289+
class TestBedrockJSONParsing:
290+
"""Test the parse_bedrock_json functionality."""
291+
292+
def test_parse_bedrock_json_simple(self):
293+
"""Test parsing Bedrock JSON with simple text content."""
294+
completion = {
295+
"output": {
296+
"message": {
297+
"content": [{"text": '{"name": "John", "age": 30, "skills": []}'}]
298+
}
299+
}
300+
}
301+
302+
result = PersonSchema.parse_bedrock_json(completion)
303+
assert result.name == "John"
304+
assert result.age == 30
305+
assert result.skills == []
306+
307+
def test_parse_bedrock_json_with_reasoning_content(self):
308+
"""Test parsing Bedrock JSON when reasoningText comes before text content.
309+
310+
This tests the fix for reasoning models where content array may have
311+
reasoningText as first element instead of text.
312+
"""
313+
completion = {
314+
"output": {
315+
"message": {
316+
"content": [
317+
{"reasoningText": "Thinking about the response..."},
318+
{"text": '{"name": "Alice", "age": 25, "skills": ["python"]}'},
319+
]
320+
}
321+
}
322+
}
323+
324+
result = PersonSchema.parse_bedrock_json(completion)
325+
assert result.name == "Alice"
326+
assert result.age == 25
327+
assert result.skills == ["python"]
328+
329+
def test_parse_bedrock_json_with_codeblock(self):
330+
"""Test parsing Bedrock JSON when response is wrapped in markdown codeblock."""
331+
completion = {
332+
"output": {
333+
"message": {
334+
"content": [
335+
{
336+
"text": '```json\n{"name": "Bob", "age": 40, "skills": ["go", "rust"]}\n```'
337+
}
338+
]
339+
}
340+
}
341+
}
342+
343+
result = PersonSchema.parse_bedrock_json(completion)
344+
assert result.name == "Bob"
345+
assert result.age == 40
346+
assert result.skills == ["go", "rust"]
347+
348+
def test_parse_bedrock_json_no_text_content(self):
349+
"""Test parsing Bedrock JSON when no text content is found."""
350+
completion = {
351+
"output": {
352+
"message": {
353+
"content": [
354+
{"reasoningText": "Only reasoning, no text response"},
355+
{"otherContent": "Some other type"},
356+
]
357+
}
358+
}
359+
}
360+
361+
with pytest.raises(ValueError) as excinfo:
362+
PersonSchema.parse_bedrock_json(completion)
363+
364+
assert "No text content found" in str(excinfo.value)
365+
366+
def test_parse_bedrock_json_multiple_text_contents(self):
367+
"""Test parsing Bedrock JSON picks the first text content when multiple exist."""
368+
completion = {
369+
"output": {
370+
"message": {
371+
"content": [
372+
{"reasoningText": "Thinking..."},
373+
{"text": '{"name": "First", "age": 30, "skills": ["python"]}'},
374+
{"text": '{"name": "Second", "age": 40, "skills": ["java"]}'},
375+
]
376+
}
377+
}
378+
}
379+
380+
result = PersonSchema.parse_bedrock_json(completion)
381+
# Should pick the first text content
382+
assert result.name == "First"
383+
assert result.age == 30
384+
assert result.skills == ["python"]

0 commit comments

Comments
 (0)