fix: address CodeRabbit v2 review findings

MatejKosec · claude · MatejKosec · commit 1e2cc8dfc788 · 2026-02-11T12:29:58.000-08:00
- Add stream_handle.arm() to streaming responses path for client
  disconnect detection (matches chat_completions handler pattern)
- Add check_for_backend_error() to non-streaming responses path
  to detect backend errors before committing to HTTP 200
- Fix error messages referencing "chat completions" → "responses"
- Populate output items in response.completed streaming event
  (was sending empty output array)
- Remove dead current_fc_index field from ResponseStreamConverter
- Remove duplicate ListInputItemsOrder enum (reuse ListOrder)
- Remove redundant with_model override in ResponsesStreamPayload
- Add tracing::debug for skipped unsupported input item types

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
Signed-off-by: Matej Kosec &lt;mkosec@nvidia.com&gt;
diff --git a/lib/async-openai/src/types/responses/api.rs b/lib/async-openai/src/types/responses/api.rs
@@ -30,16 +30,6 @@ pub struct ListConversationItemsQuery {
     pub include: Option<Vec<IncludeParam>>,
 }
 
-/// Sort order for listing input items.
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, ToSchema)]
-#[serde(rename_all = "lowercase")]
-pub enum ListInputItemsOrder {
-    /// Ascending order
-    Asc,
-    /// Descending order
-    Desc,
-}
-
 /// Query parameters for getting a response.
 #[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq, ToSchema)]
 #[builder(name = "GetResponseQueryArgs")]
@@ -75,7 +65,7 @@ pub struct ListInputItemsQuery {
     pub limit: Option<u32>,
     /// The order to return the input items in. Default is `desc`.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub order: Option<ListInputItemsOrder>,
+    pub order: Option<ListOrder>,
     /// An item ID to list items after, used in pagination.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub after: Option<String>,
diff --git a/lib/llm/src/http/service/openai.rs b/lib/llm/src/http/service/openai.rs
@@ -1164,7 +1164,7 @@ async fn handler_responses(
             .await
             .map_err(|e| {
                 ErrorMessage::internal_server_error(&format!(
-                    "Failed to await chat completions task: {:?}",
+                    "Failed to await responses task: {:?}",
                     e,
                 ))
             })?;
@@ -1181,7 +1181,7 @@ async fn responses(
     state: Arc<service_v2::State>,
     template: Option<RequestTemplate>,
     mut request: Context<NvCreateResponse>,
-    stream_handle: ConnectionHandle,
+    mut stream_handle: ConnectionHandle,
 ) -> Result<Response, ErrorResponse> {
     // return a 503 if the service is not ready
     check_ready(&state)?;
@@ -1267,6 +1267,11 @@ async fn responses(
             .create_inflight_guard(&model, Endpoint::Responses, streaming);
 
     if streaming {
+        // For streaming responses, we return HTTP 200 immediately without checking for errors.
+        // Once HTTP 200 OK is sent, we cannot change the status code, so any backend errors
+        // must be delivered as SSE events in the stream. This is standard SSE behavior.
+        stream_handle.arm(); // allows the system to detect client disconnects and cancel the LLM generation
+
         // Streaming path: convert chat completion stream chunks to Responses API SSE events.
         // The engine yields Annotated<NvCreateChatCompletionStreamResponse>. We extract the
         // inner stream response data and convert it to Responses API events.
@@ -1328,8 +1333,18 @@ async fn responses(
         Ok(sse_stream.into_response())
     } else {
         // Non-streaming path: aggregate stream into single response
+
+        // Check first event for backend errors before aggregating (non-streaming only)
+        let stream_with_check =
+            check_for_backend_error(engine_stream)
+                .await
+                .map_err(|error_response| {
+                    tracing::error!(request_id, "Backend error detected: {:?}", error_response);
+                    error_response
+                })?;
+
         let mut http_queue_guard = Some(http_queue_guard);
-        let stream = engine_stream.inspect(move |response| {
+        let stream = stream_with_check.inspect(move |response| {
             process_response_and_observe_metrics(
                 response,
                 &mut response_collector,
@@ -1343,11 +1358,11 @@ async fn responses(
                 .map_err(|e| {
                     tracing::error!(
                         request_id,
-                        "Failed to fold chat completions stream for: {:?}",
+                        "Failed to fold responses stream: {:?}",
                         e
                     );
                     ErrorMessage::internal_server_error(&format!(
-                        "Failed to fold chat completions stream: {}",
+                        "Failed to fold responses stream: {}",
                         e
                     ))
                 })?;
diff --git a/lib/llm/src/protocols/openai/responses/mod.rs b/lib/llm/src/protocols/openai/responses/mod.rs
@@ -319,8 +319,8 @@ fn convert_input_items_to_messages(
                         },
                     ));
                 }
-                _ => {
-                    // Skip other item types (file search, computer call, etc.)
+                other => {
+                    tracing::debug!("Skipping unsupported input item type during conversion: {:?}", std::mem::discriminant(other));
                 }
             },
             InputItem::EasyMessage(easy) => {
diff --git a/lib/llm/src/protocols/openai/responses/stream_converter.rs b/lib/llm/src/protocols/openai/responses/stream_converter.rs
@@ -41,7 +41,6 @@ pub struct ResponseStreamConverter {
     accumulated_text: String,
     // Function call tracking
     function_call_items: Vec<FunctionCallState>,
-    current_fc_index: Option<usize>,
     // Output index counter
     next_output_index: u32,
 }
@@ -72,7 +71,6 @@ impl ResponseStreamConverter {
             message_output_index: 0,
             accumulated_text: String::new(),
             function_call_items: Vec::new(),
-            current_fc_index: None,
             next_output_index: 0,
         }
     }
@@ -83,7 +81,7 @@ impl ResponseStreamConverter {
         seq
     }
 
-    fn make_response(&self, status: Status) -> Response {
+    fn make_response(&self, status: Status, output: Vec<OutputItem>) -> Response {
         let completed_at = if status == Status::Completed {
             Some(
                 SystemTime::now()
@@ -101,7 +99,7 @@ impl ResponseStreamConverter {
             completed_at,
             status,
             model: self.model.clone(),
-            output: vec![],
+            output,
             // Spec-required defaults
             background: Some(false),
             frequency_penalty: Some(0.0),
@@ -144,13 +142,13 @@ impl ResponseStreamConverter {
 
         let created = ResponseStreamEvent::ResponseCreated(ResponseCreatedEvent {
             sequence_number: self.next_seq(),
-            response: self.make_response(Status::InProgress),
+            response: self.make_response(Status::InProgress, vec![]),
         });
         events.push(make_sse_event(&created));
 
         let in_progress = ResponseStreamEvent::ResponseInProgress(ResponseInProgressEvent {
             sequence_number: self.next_seq(),
-            response: self.make_response(Status::InProgress),
+            response: self.make_response(Status::InProgress, vec![]),
         });
         events.push(make_sse_event(&in_progress));
 
@@ -301,7 +299,6 @@ impl ResponseStreamConverter {
                         }
                     }
 
-                    self.current_fc_index = Some(tc_index);
                 }
             }
         }
@@ -399,10 +396,36 @@ impl ResponseStreamConverter {
             events.push(make_sse_event(&item_done));
         }
 
+        // Build the final output vector from accumulated state
+        let mut output = Vec::new();
+        if self.message_started {
+            output.push(OutputItem::Message(OutputMessage {
+                id: self.message_item_id.clone(),
+                content: vec![OutputMessageContent::OutputText(OutputTextContent {
+                    text: self.accumulated_text.clone(),
+                    annotations: vec![],
+                    logprobs: Some(vec![]),
+                })],
+                role: AssistantRole::Assistant,
+                status: OutputStatus::Completed,
+            }));
+        }
+        for fc in &self.function_call_items {
+            if fc.started {
+                output.push(OutputItem::FunctionCall(FunctionToolCall {
+                    id: Some(fc.item_id.clone()),
+                    call_id: fc.call_id.clone(),
+                    name: fc.name.clone(),
+                    arguments: fc.accumulated_args.clone(),
+                    status: Some(OutputStatus::Completed),
+                }));
+            }
+        }
+
         // Emit response.completed
         let completed = ResponseStreamEvent::ResponseCompleted(ResponseCompletedEvent {
             sequence_number: self.next_seq(),
-            response: self.make_response(Status::Completed),
+            response: self.make_response(Status::Completed, output),
         });
         events.push(make_sse_event(&completed));
 
diff --git a/tests/utils/payloads.py b/tests/utils/payloads.py
@@ -547,12 +547,6 @@ class ResponsesStreamPayload(BasePayload):
     endpoint: str = "/v1/responses"
     http_stream: bool = True
 
-    def with_model(self, model):
-        p = deepcopy(self)
-        if "model" not in p.body:
-            p.body = {**p.body, "model": model}
-        return p
-
     @staticmethod
     def extract_content(response):
         """Parse SSE stream and validate event structure."""

Original file line number	Diff line number	Diff line change
`@@ -319,8 +319,8 @@ fn convert_input_items_to_messages(`
`319`	`319`	`},`
`320`	`320`	`));`
`321`	`321`	`}`
`322`		`- _ => {`
`323`		`- // Skip other item types (file search, computer call, etc.)`
	`322`	`+ other => {`
	`323`	`+ tracing::debug!("Skipping unsupported input item type during conversion: {:?}", std::mem::discriminant(other));`
`324`	`324`	`}`
`325`	`325`	`},`
`326`	`326`	`InputItem::EasyMessage(easy) => {`