Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 19 additions & 23 deletions codex-rs/core/src/codex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1579,24 +1579,6 @@ impl Session {
}
}

/// Returns the input if there was no task running to inject into
pub async fn inject_response_items(
&self,
input: Vec<ResponseInputItem>,
) -> Result<(), Vec<ResponseInputItem>> {
let mut active = self.active_turn.lock().await;
match active.as_mut() {
Some(at) => {
let mut ts = at.turn_state.lock().await;
for item in input {
ts.push_pending_input(item);
}
Ok(())
}
None => Err(input),
}
}

pub async fn get_pending_input(&self) -> Vec<ResponseInputItem> {
let mut active = self.active_turn.lock().await;
match active.as_mut() {
Expand Down Expand Up @@ -2611,11 +2593,25 @@ pub(crate) async fn run_turn(
break;
}
Err(CodexErr::InvalidImageRequest()) => {
let mut state = sess.state.lock().await;
error_or_panic(
"Invalid image detected, replacing it in the last turn to prevent poisoning",
);
state.history.replace_last_turn_images("Invalid image");
let tool_only = {
let mut state = sess.state.lock().await;
let tool_only = state.history.current_turn_images_tool_only();
error_or_panic(if tool_only {
"Invalid image detected, replacing it in the last turn to prevent poisoning"
} else {
"Invalid user image detected; replacing it in the last turn to prevent poisoning"
});
state.history.replace_last_turn_images("Invalid image");
tool_only
};
if tool_only {
continue;
}
let err = CodexErr::InvalidImageRequest();
info!("Turn error: {err:#}");
let event = EventMsg::Error(err.to_error_event(None));
sess.send_event(&turn_context, event).await;
break;
}
Err(e) => {
info!("Turn error: {e:#}");
Expand Down
28 changes: 28 additions & 0 deletions codex-rs/core/src/context_manager/history.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,34 @@ impl ContextManager {
}
}

pub(crate) fn current_turn_images_tool_only(&self) -> bool {
let mut saw_tool = false;
for item in self.items.iter().rev() {
match item {
ResponseItem::Message { role, .. } if role == "assistant" => break,
ResponseItem::Message { role, content, .. } if role == "user" => {
if content
.iter()
.any(|item| matches!(item, ContentItem::InputImage { .. }))
{
return false;
Comment on lines +164 to +168
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Treating any user image as definitive can abort tool errors

This logic returns false as soon as the current turn contains any user input_image, even if the invalid image that triggered InvalidImageRequest actually came from a tool output. Now that view_image emits an input_image as a function-call output, it’s possible to have both a valid user image and an invalid tool image in the same turn; in that case the error handler will misclassify the failure as “user‑injected,” emit an error event, and stop the turn instead of retrying with the tool image replaced. That’s a regression from the previous behavior where tool-originated invalid images would be recoverable.

Useful? React with 👍 / 👎.

}
}
ResponseItem::FunctionCallOutput { output, .. } => {
if output.content_items.as_ref().is_some_and(|items| {
items.iter().any(|item| {
matches!(item, FunctionCallOutputContentItem::InputImage { .. })
})
}) {
saw_tool = true;
}
}
_ => {}
}
}
saw_tool
}

/// Drop the last `num_turns` user turns from this history.
///
/// "User turns" are identified as `ResponseItem::Message` entries whose role is `"user"`.
Expand Down
60 changes: 42 additions & 18 deletions codex-rs/core/src/tools/handlers/view_image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ use crate::tools::handlers::parse_arguments;
use crate::tools::registry::ToolHandler;
use crate::tools::registry::ToolKind;
use codex_protocol::models::ContentItem;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::models::local_image_content_items_with_label_number;
use codex_protocol::user_input::UserInput;

pub struct ViewImageHandler;

Expand Down Expand Up @@ -65,22 +66,6 @@ impl ToolHandler for ViewImageHandler {
}
let event_path = abs_path.clone();

let content: Vec<ContentItem> =
local_image_content_items_with_label_number(&abs_path, None);
let input = ResponseInputItem::Message {
role: "user".to_string(),
content,
};

session
.inject_response_items(vec![input])
.await
.map_err(|_| {
FunctionCallError::RespondToModel(
"unable to attach image (no active task)".to_string(),
)
})?;

session
.send_event(
turn.as_ref(),
Expand All @@ -91,9 +76,48 @@ impl ToolHandler for ViewImageHandler {
)
.await;

let response_input: ResponseInputItem = vec![UserInput::LocalImage {
path: abs_path.clone(),
}]
.into();
let image_url = match response_input {
ResponseInputItem::Message { content, .. } => {
let mut image_url = None;
let mut text_fallback = None;
for item in content {
match item {
ContentItem::InputImage { image_url: url } => {
image_url = Some(url);
break;
}
ContentItem::InputText { text } => {
text_fallback.get_or_insert(text);
}
_ => {}
}
}
if let Some(url) = image_url {
url
} else if let Some(text) = text_fallback {
return Err(FunctionCallError::RespondToModel(text));
} else {
return Err(FunctionCallError::RespondToModel(
"unexpected image input payload".to_string(),
));
}
}
_ => {
return Err(FunctionCallError::RespondToModel(
"unexpected image input payload".to_string(),
));
}
};

Ok(ToolOutput::Function {
content: "attached local image path".to_string(),
content_items: None,
content_items: Some(vec![FunctionCallOutputContentItem::InputImage {
image_url,
}]),
success: Some(true),
})
}
Expand Down
104 changes: 36 additions & 68 deletions codex-rs/core/tests/suite/view_image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,27 @@ fn find_image_message(body: &Value) -> Option<&Value> {
})
}

fn find_tool_output_image_url(body: &Value, call_id: &str) -> Option<String> {
let items = body.get("input").and_then(Value::as_array)?;
let output = items
.iter()
.find(|item| {
item.get("type").and_then(Value::as_str) == Some("function_call_output")
&& item.get("call_id").and_then(Value::as_str) == Some(call_id)
})?
.get("output")?;
let output_items = output.as_array()?;
output_items.iter().find_map(|span| {
if span.get("type").and_then(Value::as_str) == Some("input_image") {
span.get("image_url")
.and_then(Value::as_str)
.map(str::to_string)
} else {
None
}
})
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn user_turn_with_local_image_attaches_image() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
Expand Down Expand Up @@ -208,41 +229,8 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {

let req = mock.single_request();
let body = req.body_json();
let output_text = req
.function_call_output_content_and_success(call_id)
.and_then(|(content, _)| content)
.expect("output text present");
assert_eq!(output_text, "attached local image path");

let image_message =
find_image_message(&body).expect("pending input image message not included in request");
let content_items = image_message
.get("content")
.and_then(Value::as_array)
.expect("image message has content array");
assert_eq!(
content_items.len(),
1,
"view_image should inject only the image content item (no tag/label text)"
);
assert_eq!(
content_items[0].get("type").and_then(Value::as_str),
Some("input_image"),
"view_image should inject only an input_image content item"
);
let image_url = image_message
.get("content")
.and_then(Value::as_array)
.and_then(|content| {
content.iter().find_map(|span| {
if span.get("type").and_then(Value::as_str) == Some("input_image") {
span.get("image_url").and_then(Value::as_str)
} else {
None
}
})
})
.expect("image_url present");
let image_url =
find_tool_output_image_url(&body, call_id).expect("tool output image not included");

let (prefix, encoded) = image_url
.split_once(',')
Expand Down Expand Up @@ -332,7 +320,7 @@ async fn view_image_tool_errors_when_path_is_directory() -> anyhow::Result<()> {
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn view_image_tool_placeholder_for_non_image_files() -> anyhow::Result<()> {
async fn view_image_tool_errors_for_non_image_files() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));

let server = start_mock_server().await;
Expand Down Expand Up @@ -392,36 +380,19 @@ async fn view_image_tool_placeholder_for_non_image_files() -> anyhow::Result<()>
"non-image file should not produce an input_image message"
);

let placeholder = request
.inputs_of_type("message")
.iter()
.find_map(|item| {
let content = item.get("content").and_then(Value::as_array)?;
content.iter().find_map(|span| {
if span.get("type").and_then(Value::as_str) == Some("input_text") {
let text = span.get("text").and_then(Value::as_str)?;
if text.contains("Codex could not read the local image at")
&& text.contains("unsupported MIME type `application/json`")
{
return Some(text.to_string());
}
}
None
})
})
.expect("placeholder text found");

assert!(
placeholder.contains(&abs_path.display().to_string()),
"placeholder should mention path: {placeholder}"
);

let output_text = mock
.single_request()
.function_call_output_content_and_success(call_id)
.and_then(|(content, _)| content)
.expect("output text present");
assert_eq!(output_text, "attached local image path");
assert!(
output_text.contains("Codex could not read the local image at"),
"expected tool error for non-image file"
);
assert!(
output_text.contains(&abs_path.display().to_string()),
"output should mention path: {output_text}"
);

Ok(())
}
Expand Down Expand Up @@ -499,7 +470,7 @@ async fn view_image_tool_errors_when_file_missing() -> anyhow::Result<()> {

#[cfg(not(debug_assertions))]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn replaces_invalid_local_image_after_bad_request() -> anyhow::Result<()> {
async fn reports_invalid_local_image_after_bad_request() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));

let server = start_mock_server().await;
Expand Down Expand Up @@ -556,6 +527,7 @@ async fn replaces_invalid_local_image_after_bad_request() -> anyhow::Result<()>
})
.await?;

wait_for_event(&codex, |event| matches!(event, EventMsg::Error(_))).await;
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;

let first_body = invalid_image_mock.single_request().body_json();
Expand All @@ -564,14 +536,10 @@ async fn replaces_invalid_local_image_after_bad_request() -> anyhow::Result<()>
"initial request should include the uploaded image"
);

let second_request = completion_mock.single_request();
let second_body = second_request.body_json();
assert!(
find_image_message(&second_body).is_none(),
"second request should replace the invalid image"
completion_mock.requests().is_empty(),
"invalid user image should stop the turn before retrying"
);
let user_texts = second_request.message_input_texts("user");
assert!(user_texts.iter().any(|text| text == "Invalid image"));

Ok(())
}
Loading