diff --git a/codex-rs/protocol/src/models.rs b/codex-rs/protocol/src/models.rs index 69a682f2df4..6d3499b3741 100644 --- a/codex-rs/protocol/src/models.rs +++ b/codex-rs/protocol/src/models.rs @@ -1,5 +1,8 @@ use std::collections::HashMap; +use std::path::PathBuf; +use codex_utils_image::decode_data_url; +use codex_utils_image::load_and_resize_bytes; use codex_utils_image::load_and_resize_to_fit; use mcp_types::CallToolResult; use mcp_types::ContentBlock; @@ -203,6 +206,24 @@ fn unsupported_image_error_placeholder(path: &std::path::Path, mime: &str) -> Co } } +fn inline_image_error_placeholder(error: impl std::fmt::Display) -> ContentItem { + ContentItem::InputText { + text: format!("Codex could not read the provided image data: {error}"), + } +} + +fn invalid_inline_image_error_placeholder(error: impl std::fmt::Display) -> ContentItem { + ContentItem::InputText { + text: format!("Provided image data is invalid: {error}"), + } +} + +fn unsupported_inline_image_error_placeholder(mime: &str) -> ContentItem { + ContentItem::InputText { + text: format!("Codex cannot attach image data: unsupported image format `{mime}`."), + } +} + impl From for ResponseItem { fn from(item: ResponseInputItem) -> Self { match item { @@ -302,7 +323,37 @@ impl From> for ResponseInputItem { .into_iter() .filter_map(|c| match c { UserInput::Text { text } => Some(ContentItem::InputText { text }), - UserInput::Image { image_url } => Some(ContentItem::InputImage { image_url }), + UserInput::Image { image_url } => { + if image_url.starts_with("data:") { + let inline = match decode_data_url(&image_url) { + Ok(inline) => inline, + Err(err) => return Some(inline_image_error_placeholder(err)), + }; + if let Some(mime) = inline.mime.as_deref() + && !mime.starts_with("image/") + && mime != "application/octet-stream" + { + return Some(unsupported_inline_image_error_placeholder(mime)); + } + match load_and_resize_bytes( + inline.bytes, + PathBuf::from(""), + ) { + Ok(image) => Some(ContentItem::InputImage { + image_url: image.into_data_url(), + }), + Err(err) => { + if err.is_invalid_image() { + Some(invalid_inline_image_error_placeholder(err)) + } else { + Some(inline_image_error_placeholder(err)) + } + } + } + } else { + Some(ContentItem::InputImage { image_url }) + } + } UserInput::LocalImage { path } => match load_and_resize_to_fit(&path) { Ok(image) => Some(ContentItem::InputImage { image_url: image.into_data_url(), @@ -556,6 +607,89 @@ mod tests { use pretty_assertions::assert_eq; use tempfile::tempdir; + #[test] + fn data_url_images_are_processed_locally() { + let data_url = "".to_string(); + + let item = ResponseInputItem::from(vec![UserInput::Image { + image_url: data_url, + }]); + + let ResponseInputItem::Message { content, .. } = item else { + panic!("expected message response input"); + }; + match content.as_slice() { + [ContentItem::InputImage { image_url }] => { + assert!(image_url.starts_with("data:image/")); + } + [ContentItem::InputText { text }] => { + panic!("expected input image, got placeholder: {text}"); + } + _ => panic!("expected single input content item"), + } + } + + #[test] + fn data_url_with_generic_mime_is_processed_locally() { + let payload = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMB/ee9bQAAAABJRU5ErkJggg=="; + let item = ResponseInputItem::from(vec![UserInput::Image { + image_url: format!("data:application/octet-stream;base64,{}", payload), + }]); + + let ResponseInputItem::Message { content, .. } = item else { + panic!("expected message response input"); + }; + match content.as_slice() { + [ContentItem::InputImage { image_url }] => { + assert!(image_url.starts_with("data:image/")); + } + [ContentItem::InputText { text }] => { + panic!("expected input image, got placeholder: {text}"); + } + _ => panic!("expected single input content item"), + } + } + + #[test] + fn data_url_with_missing_mime_is_processed_locally() { + let payload = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMB/ee9bQAAAABJRU5ErkJggg=="; + let item = ResponseInputItem::from(vec![UserInput::Image { + image_url: format!("data:;base64,{}", payload), + }]); + + let ResponseInputItem::Message { content, .. } = item else { + panic!("expected message response input"); + }; + match content.as_slice() { + [ContentItem::InputImage { image_url }] => { + assert!(image_url.starts_with("data:image/")); + } + [ContentItem::InputText { text }] => { + panic!("expected input image, got placeholder: {text}"); + } + _ => panic!("expected single input content item"), + } + } + + #[test] + fn data_url_with_non_image_mime_renders_placeholder() { + let item = ResponseInputItem::from(vec![UserInput::Image { + image_url: "data:text/plain;base64,SGVsbG8=".to_string(), + }]); + + let ResponseInputItem::Message { content, .. } = item else { + panic!("expected message response input"); + }; + let [ContentItem::InputText { text }] = content.as_slice() else { + panic!("expected single input text content item"); + }; + + assert_eq!( + text, + "Codex cannot attach image data: unsupported image format `text/plain`." + ); + } + #[test] fn serializes_success_as_plain_string() -> Result<()> { let item = ResponseInputItem::FunctionCallOutput { diff --git a/codex-rs/utils/image/src/error.rs b/codex-rs/utils/image/src/error.rs index 6bd055115dd..6c125f64d77 100644 --- a/codex-rs/utils/image/src/error.rs +++ b/codex-rs/utils/image/src/error.rs @@ -23,6 +23,8 @@ pub enum ImageProcessingError { #[source] source: image::ImageError, }, + #[error("invalid data URL: {message}")] + DataUrl { message: String }, } impl ImageProcessingError { diff --git a/codex-rs/utils/image/src/lib.rs b/codex-rs/utils/image/src/lib.rs index 0950bf176ea..c7b1b4ec9d1 100644 --- a/codex-rs/utils/image/src/lib.rs +++ b/codex-rs/utils/image/src/lib.rs @@ -1,5 +1,6 @@ use std::num::NonZeroUsize; use std::path::Path; +use std::path::PathBuf; use std::sync::LazyLock; use crate::error::ImageProcessingError; @@ -30,6 +31,12 @@ pub struct EncodedImage { pub height: u32, } +#[derive(Debug, Clone)] +pub struct InlineImageData { + pub mime: Option, + pub bytes: Vec, +} + impl EncodedImage { pub fn into_data_url(self) -> String { let encoded = BASE64_STANDARD.encode(&self.bytes); @@ -45,6 +52,84 @@ pub fn load_and_resize_to_fit(path: &Path) -> Result, + path_for_error: PathBuf, +) -> Result { + process_image_bytes(bytes, path_for_error) +} + +pub fn decode_data_url(image_url: &str) -> Result { + let Some(rest) = image_url.strip_prefix("data:") else { + return Err(ImageProcessingError::DataUrl { + message: "missing data URL prefix".to_string(), + }); + }; + let Some((header, data)) = rest.split_once(',') else { + return Err(ImageProcessingError::DataUrl { + message: "missing data URL header separator".to_string(), + }); + }; + + let mut mime = None; + let mut is_base64 = false; + + let mut parts = header.split(';'); + if let Some(first) = parts.next() + && !first.is_empty() + { + mime = Some(first.to_string()); + } + for part in parts { + if part == "base64" { + is_base64 = true; + } + } + + if !is_base64 { + return Err(ImageProcessingError::DataUrl { + message: "data URL is not base64 encoded".to_string(), + }); + } + + let decoded = + BASE64_STANDARD + .decode(data.trim()) + .map_err(|err| ImageProcessingError::DataUrl { + message: format!("invalid base64 image data: {err}"), + })?; + + Ok(InlineImageData { + mime, + bytes: decoded, + }) +} + +fn read_file_bytes(path: &Path, path_for_error: &Path) -> Result, ImageProcessingError> { + match tokio::runtime::Handle::try_current() { + // If we're inside a Tokio runtime, avoid block_on (it panics on worker threads). + // Use block_in_place and do a standard blocking read safely. + Ok(_) => tokio::task::block_in_place(|| std::fs::read(path)).map_err(|source| { + ImageProcessingError::Read { + path: path_for_error.to_path_buf(), + source, + } + }), + // Outside a runtime, just read synchronously. + Err(_) => std::fs::read(path).map_err(|source| ImageProcessingError::Read { + path: path_for_error.to_path_buf(), + source, + }), + } +} + +fn process_image_bytes( + file_bytes: Vec, + path_for_error: PathBuf, +) -> Result { let key = sha1_digest(&file_bytes); IMAGE_CACHE.get_or_try_insert_with(key, move || { @@ -56,7 +141,7 @@ pub fn load_and_resize_to_fit(path: &Path) -> Result Result Result, ImageProcessingError> { - match tokio::runtime::Handle::try_current() { - // If we're inside a Tokio runtime, avoid block_on (it panics on worker threads). - // Use block_in_place and do a standard blocking read safely. - Ok(_) => tokio::task::block_in_place(|| std::fs::read(path)).map_err(|source| { - ImageProcessingError::Read { - path: path_for_error.to_path_buf(), - source, - } - }), - // Outside a runtime, just read synchronously. - Err(_) => std::fs::read(path).map_err(|source| ImageProcessingError::Read { - path: path_for_error.to_path_buf(), - source, - }), - } -} - fn encode_image( image: &DynamicImage, preferred_format: ImageFormat,