diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 019e577401d..4c7392434cb 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -1579,24 +1579,6 @@ impl Session { } } - /// Returns the input if there was no task running to inject into - pub async fn inject_response_items( - &self, - input: Vec, - ) -> Result<(), Vec> { - let mut active = self.active_turn.lock().await; - match active.as_mut() { - Some(at) => { - let mut ts = at.turn_state.lock().await; - for item in input { - ts.push_pending_input(item); - } - Ok(()) - } - None => Err(input), - } - } - pub async fn get_pending_input(&self) -> Vec { let mut active = self.active_turn.lock().await; match active.as_mut() { @@ -2611,11 +2593,25 @@ pub(crate) async fn run_turn( break; } Err(CodexErr::InvalidImageRequest()) => { - let mut state = sess.state.lock().await; - error_or_panic( - "Invalid image detected, replacing it in the last turn to prevent poisoning", - ); - state.history.replace_last_turn_images("Invalid image"); + let tool_only = { + let mut state = sess.state.lock().await; + let tool_only = state.history.current_turn_images_tool_only(); + error_or_panic(if tool_only { + "Invalid image detected, replacing it in the last turn to prevent poisoning" + } else { + "Invalid user image detected; replacing it in the last turn to prevent poisoning" + }); + state.history.replace_last_turn_images("Invalid image"); + tool_only + }; + if tool_only { + continue; + } + let err = CodexErr::InvalidImageRequest(); + info!("Turn error: {err:#}"); + let event = EventMsg::Error(err.to_error_event(None)); + sess.send_event(&turn_context, event).await; + break; } Err(e) => { info!("Turn error: {e:#}"); diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index 3e0428c86d7..64ec587d305 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -155,6 +155,34 @@ impl ContextManager { } } + pub(crate) fn current_turn_images_tool_only(&self) -> bool { + let mut saw_tool = false; + for item in self.items.iter().rev() { + match item { + ResponseItem::Message { role, .. } if role == "assistant" => break, + ResponseItem::Message { role, content, .. } if role == "user" => { + if content + .iter() + .any(|item| matches!(item, ContentItem::InputImage { .. })) + { + return false; + } + } + ResponseItem::FunctionCallOutput { output, .. } => { + if output.content_items.as_ref().is_some_and(|items| { + items.iter().any(|item| { + matches!(item, FunctionCallOutputContentItem::InputImage { .. }) + }) + }) { + saw_tool = true; + } + } + _ => {} + } + } + saw_tool + } + /// Drop the last `num_turns` user turns from this history. /// /// "User turns" are identified as `ResponseItem::Message` entries whose role is `"user"`. diff --git a/codex-rs/core/src/tools/handlers/view_image.rs b/codex-rs/core/src/tools/handlers/view_image.rs index 87dd7207b1c..61954b9d2c6 100644 --- a/codex-rs/core/src/tools/handlers/view_image.rs +++ b/codex-rs/core/src/tools/handlers/view_image.rs @@ -12,8 +12,9 @@ use crate::tools::handlers::parse_arguments; use crate::tools::registry::ToolHandler; use crate::tools::registry::ToolKind; use codex_protocol::models::ContentItem; +use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::models::ResponseInputItem; -use codex_protocol::models::local_image_content_items_with_label_number; +use codex_protocol::user_input::UserInput; pub struct ViewImageHandler; @@ -65,22 +66,6 @@ impl ToolHandler for ViewImageHandler { } let event_path = abs_path.clone(); - let content: Vec = - local_image_content_items_with_label_number(&abs_path, None); - let input = ResponseInputItem::Message { - role: "user".to_string(), - content, - }; - - session - .inject_response_items(vec![input]) - .await - .map_err(|_| { - FunctionCallError::RespondToModel( - "unable to attach image (no active task)".to_string(), - ) - })?; - session .send_event( turn.as_ref(), @@ -91,9 +76,48 @@ impl ToolHandler for ViewImageHandler { ) .await; + let response_input: ResponseInputItem = vec![UserInput::LocalImage { + path: abs_path.clone(), + }] + .into(); + let image_url = match response_input { + ResponseInputItem::Message { content, .. } => { + let mut image_url = None; + let mut text_fallback = None; + for item in content { + match item { + ContentItem::InputImage { image_url: url } => { + image_url = Some(url); + break; + } + ContentItem::InputText { text } => { + text_fallback.get_or_insert(text); + } + _ => {} + } + } + if let Some(url) = image_url { + url + } else if let Some(text) = text_fallback { + return Err(FunctionCallError::RespondToModel(text)); + } else { + return Err(FunctionCallError::RespondToModel( + "unexpected image input payload".to_string(), + )); + } + } + _ => { + return Err(FunctionCallError::RespondToModel( + "unexpected image input payload".to_string(), + )); + } + }; + Ok(ToolOutput::Function { content: "attached local image path".to_string(), - content_items: None, + content_items: Some(vec![FunctionCallOutputContentItem::InputImage { + image_url, + }]), success: Some(true), }) } diff --git a/codex-rs/core/tests/suite/view_image.rs b/codex-rs/core/tests/suite/view_image.rs index 034254f7c59..f8fa0c8196b 100644 --- a/codex-rs/core/tests/suite/view_image.rs +++ b/codex-rs/core/tests/suite/view_image.rs @@ -46,6 +46,27 @@ fn find_image_message(body: &Value) -> Option<&Value> { }) } +fn find_tool_output_image_url(body: &Value, call_id: &str) -> Option { + let items = body.get("input").and_then(Value::as_array)?; + let output = items + .iter() + .find(|item| { + item.get("type").and_then(Value::as_str) == Some("function_call_output") + && item.get("call_id").and_then(Value::as_str) == Some(call_id) + })? + .get("output")?; + let output_items = output.as_array()?; + output_items.iter().find_map(|span| { + if span.get("type").and_then(Value::as_str) == Some("input_image") { + span.get("image_url") + .and_then(Value::as_str) + .map(str::to_string) + } else { + None + } + }) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn user_turn_with_local_image_attaches_image() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); @@ -208,41 +229,8 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> { let req = mock.single_request(); let body = req.body_json(); - let output_text = req - .function_call_output_content_and_success(call_id) - .and_then(|(content, _)| content) - .expect("output text present"); - assert_eq!(output_text, "attached local image path"); - - let image_message = - find_image_message(&body).expect("pending input image message not included in request"); - let content_items = image_message - .get("content") - .and_then(Value::as_array) - .expect("image message has content array"); - assert_eq!( - content_items.len(), - 1, - "view_image should inject only the image content item (no tag/label text)" - ); - assert_eq!( - content_items[0].get("type").and_then(Value::as_str), - Some("input_image"), - "view_image should inject only an input_image content item" - ); - let image_url = image_message - .get("content") - .and_then(Value::as_array) - .and_then(|content| { - content.iter().find_map(|span| { - if span.get("type").and_then(Value::as_str) == Some("input_image") { - span.get("image_url").and_then(Value::as_str) - } else { - None - } - }) - }) - .expect("image_url present"); + let image_url = + find_tool_output_image_url(&body, call_id).expect("tool output image not included"); let (prefix, encoded) = image_url .split_once(',') @@ -332,7 +320,7 @@ async fn view_image_tool_errors_when_path_is_directory() -> anyhow::Result<()> { } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn view_image_tool_placeholder_for_non_image_files() -> anyhow::Result<()> { +async fn view_image_tool_errors_for_non_image_files() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; @@ -392,36 +380,19 @@ async fn view_image_tool_placeholder_for_non_image_files() -> anyhow::Result<()> "non-image file should not produce an input_image message" ); - let placeholder = request - .inputs_of_type("message") - .iter() - .find_map(|item| { - let content = item.get("content").and_then(Value::as_array)?; - content.iter().find_map(|span| { - if span.get("type").and_then(Value::as_str) == Some("input_text") { - let text = span.get("text").and_then(Value::as_str)?; - if text.contains("Codex could not read the local image at") - && text.contains("unsupported MIME type `application/json`") - { - return Some(text.to_string()); - } - } - None - }) - }) - .expect("placeholder text found"); - - assert!( - placeholder.contains(&abs_path.display().to_string()), - "placeholder should mention path: {placeholder}" - ); - let output_text = mock .single_request() .function_call_output_content_and_success(call_id) .and_then(|(content, _)| content) .expect("output text present"); - assert_eq!(output_text, "attached local image path"); + assert!( + output_text.contains("Codex could not read the local image at"), + "expected tool error for non-image file" + ); + assert!( + output_text.contains(&abs_path.display().to_string()), + "output should mention path: {output_text}" + ); Ok(()) } @@ -499,7 +470,7 @@ async fn view_image_tool_errors_when_file_missing() -> anyhow::Result<()> { #[cfg(not(debug_assertions))] #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn replaces_invalid_local_image_after_bad_request() -> anyhow::Result<()> { +async fn reports_invalid_local_image_after_bad_request() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; @@ -556,6 +527,7 @@ async fn replaces_invalid_local_image_after_bad_request() -> anyhow::Result<()> }) .await?; + wait_for_event(&codex, |event| matches!(event, EventMsg::Error(_))).await; wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await; let first_body = invalid_image_mock.single_request().body_json(); @@ -564,14 +536,10 @@ async fn replaces_invalid_local_image_after_bad_request() -> anyhow::Result<()> "initial request should include the uploaded image" ); - let second_request = completion_mock.single_request(); - let second_body = second_request.body_json(); assert!( - find_image_message(&second_body).is_none(), - "second request should replace the invalid image" + completion_mock.requests().is_empty(), + "invalid user image should stop the turn before retrying" ); - let user_texts = second_request.message_input_texts("user"); - assert!(user_texts.iter().any(|text| text == "Invalid image")); Ok(()) }