fix collecting final response with openai responses streaming (#20037)

logan-markewich · web-flow · commit 05cdd46cb61e · 2025-10-06T21:51:39.000-06:00
diff --git a/llama-index-integrations/llms/llama-index-llms-openai/llama_index/llms/openai/responses.py b/llama-index-integrations/llms/llama-index-llms-openai/llama_index/llms/openai/responses.py
@@ -450,7 +450,8 @@ def stream_complete(
 
         return stream_complete_fn(prompt, **kwargs)
 
-    def _parse_response_output(self, output: List[ResponseOutputItem]) -> ChatResponse:
+    @staticmethod
+    def _parse_response_output(output: List[ResponseOutputItem]) -> ChatResponse:
         message = ChatMessage(role=MessageRole.ASSISTANT, blocks=[])
         additional_kwargs = {"built_in_tool_calls": []}
         tool_calls = []
@@ -526,7 +527,7 @@ def _chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
         if self.track_previous_responses:
             self._previous_response_id = response.id
 
-        chat_response = self._parse_response_output(response.output)
+        chat_response = OpenAIResponses._parse_response_output(response.output)
         chat_response.raw = response
         chat_response.additional_kwargs["usage"] = response.usage
         if hasattr(response.usage.output_tokens_details, "reasoning_tokens"):
@@ -590,7 +591,6 @@ def process_response_event(
         elif isinstance(event, ResponseTextDeltaEvent):
             # Text content is being added
             delta = event.delta
-            blocks.append(TextBlock(text=delta))
         elif isinstance(event, ResponseImageGenCallPartialImageEvent):
             # Partial image
             if event.partial_image_b64:
@@ -653,6 +653,8 @@ def process_response_event(
             # Response is complete
             if hasattr(event, "response") and hasattr(event.response, "usage"):
                 additional_kwargs["usage"] = event.response.usage
+            resp = OpenAIResponses._parse_response_output(event.response.output)
+            blocks = resp.message.blocks
 
         return (
             blocks,
@@ -782,7 +784,7 @@ async def _achat(
         if self.track_previous_responses:
             self._previous_response_id = response.id
 
-        chat_response = self._parse_response_output(response.output)
+        chat_response = OpenAIResponses._parse_response_output(response.output)
         chat_response.raw = response
         chat_response.additional_kwargs["usage"] = response.usage
 
diff --git a/llama-index-integrations/llms/llama-index-llms-openai/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-openai/pyproject.toml
@@ -27,7 +27,7 @@ dev = [
 
 [project]
 name = "llama-index-llms-openai"
-version = "0.6.2"
+version = "0.6.3"
 description = "llama-index llms openai integration"
 authors = [{name = "llama-index"}]
 requires-python = ">=3.9,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-openai/tests/test_openai_responses.py b/llama-index-integrations/llms/llama-index-llms-openai/tests/test_openai_responses.py
@@ -153,7 +153,7 @@ def test_process_response_event():
     )
 
     updated_blocks, updated_tool_calls, _, _, _, _, delta = result
-    assert updated_blocks == [TextBlock(text="Hello")]
+    assert updated_blocks == []
     assert delta == "Hello"
     assert updated_tool_calls == []
 

Original file line number	Diff line number	Diff line change
`@@ -153,7 +153,7 @@ def test_process_response_event():`
`153`	`153`	`)`
`154`	`154`
`155`	`155`	`updated_blocks, updated_tool_calls, _, _, _, _, delta = result`
`156`		`- assert updated_blocks == [TextBlock(text="Hello")]`
	`156`	`+ assert updated_blocks == []`
`157`	`157`	`assert delta == "Hello"`
`158`	`158`	`assert updated_tool_calls == []`
`159`	`159`