Skip to content

Commit f43aa9f

Browse files
milispclaude
andcommitted
feat: add media attachment support to chat interface
Implement comprehensive media file support allowing users to attach images and audio files to chat messages. Add MediaSelector component for file selection, enhance codex client to handle media submissions, and extend stores to manage media attachments alongside file references. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 409cce5 commit f43aa9f

File tree

14 files changed

+540
-76
lines changed

14 files changed

+540
-76
lines changed

src-tauri/capabilities/default.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
"dialog:default",
1313
"shell:default",
1414
"fs:default",
15-
"log:default"
15+
"log:default",
16+
"dialog:default"
1617
]
1718
}

src-tauri/src/codex_client.rs

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ pub struct CodexClient {
3232

3333
impl CodexClient {
3434
pub async fn new(app: &AppHandle, session_id: String, config: CodexConfig) -> Result<Self> {
35-
log::debug!("Creating CodexClient for session: {}", session_id);
35+
log::debug!("Creating CodexClient for session and config: {} {:?}", session_id, config);
3636

3737
// Build codex command based on configuration
3838
let (command, args): (String, Vec<String>) =
@@ -119,18 +119,10 @@ impl CodexClient {
119119

120120
// API key will be provided via environment variable - no need to modify provider config
121121

122-
// Use model from profile if available, otherwise from config
123-
let profile = profiles.get(&config.provider)
124-
.or_else(|| profiles.get(&config.provider.to_lowercase()));
125-
126-
let model_to_use = if let Some(profile) = profile {
127-
&profile.model
128-
} else {
129-
&config.model
130-
};
131-
132-
if !model_to_use.is_empty() {
133-
cmd.arg("-c").arg(format!("model={}", model_to_use));
122+
// Always use model from config (user selection), not from profile
123+
// This ensures user's model choice in the GUI takes precedence
124+
if !config.model.is_empty() {
125+
cmd.arg("-c").arg(format!("model={}", config.model));
134126
}
135127
} else {
136128
// Fallback to original logic for custom providers
@@ -259,9 +251,9 @@ impl CodexClient {
259251
log::debug!("Starting stdout reader for session: {}", session_id_clone);
260252

261253
while let Ok(Some(line)) = lines.next_line().await {
262-
// log::debug!("Received line from codex: {}", line);
254+
log::debug!("📥 Received line from codex: {}", line);
263255
if let Ok(event) = serde_json::from_str::<Event>(&line) {
264-
// log::debug!("Parsed event: {:?}", event);
256+
log::debug!("📨 Parsed event: {:?}", event);
265257

266258
// Log the event for debugging
267259
if let Some(event_session_id) = get_session_id_from_event(&event) {
@@ -293,6 +285,7 @@ impl CodexClient {
293285
async fn send_submission(&self, submission: Submission) -> Result<()> {
294286
if let Some(stdin_tx) = &self.stdin_tx {
295287
let json = serde_json::to_string(&submission)?;
288+
log::debug!("📤 Sending JSON to codex: {}", json);
296289
stdin_tx.send(json)?;
297290
}
298291
Ok(())
@@ -309,6 +302,32 @@ impl CodexClient {
309302
self.send_submission(submission).await
310303
}
311304

305+
pub async fn send_user_input_with_media(&self, message: String, media_paths: Vec<String>) -> Result<()> {
306+
log::debug!("🎯 [CodexClient] send_user_input_with_media called:");
307+
log::debug!(" 💬 message: {}", message);
308+
log::debug!(" 📸 media_paths: {:?}", media_paths);
309+
log::debug!(" 📊 media_paths count: {}", media_paths.len());
310+
311+
let mut items = vec![InputItem::Text { text: message }];
312+
313+
// Add media files as LocalImage items - codex will convert to base64 automatically
314+
for path in media_paths {
315+
let path_buf = std::path::PathBuf::from(path.clone());
316+
log::debug!(" 🔗 Adding local image path: {}", path);
317+
items.push(InputItem::LocalImage { path: path_buf });
318+
}
319+
320+
log::debug!(" 📦 Total items in submission: {}", items.len());
321+
322+
let submission = Submission {
323+
id: Uuid::new_v4().to_string(),
324+
op: Op::UserInput { items },
325+
};
326+
327+
log::debug!(" 🚀 Sending submission to codex");
328+
self.send_submission(submission).await
329+
}
330+
312331
pub async fn send_exec_approval(&self, approval_id: String, approved: bool) -> Result<()> {
313332
let decision = if approved { "allow" } else { "deny" }.to_string();
314333

src-tauri/src/commands.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,22 @@ pub async fn send_message(
3232
codex::send_message(state, session_id, message).await
3333
}
3434

35+
#[tauri::command]
36+
pub async fn send_message_with_media(
37+
state: State<'_, CodexState>,
38+
session_id: String,
39+
message: String,
40+
media_paths: Vec<String>,
41+
) -> Result<(), String> {
42+
log::debug!("🔄 [Tauri Command] send_message_with_media called:");
43+
log::debug!(" 📝 session_id: {}", session_id);
44+
log::debug!(" 💬 message: {}", message);
45+
log::debug!(" 📸 media_paths: {:?}", media_paths);
46+
log::debug!(" 📊 media_paths count: {}", media_paths.len());
47+
48+
codex::send_message_with_media(state, session_id, message, media_paths).await
49+
}
50+
3551
#[tauri::command]
3652
pub async fn approve_execution(
3753
state: State<'_, CodexState>,

src-tauri/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ mod utils;
1010
use commands::{
1111
approve_execution, check_codex_version, close_session, delete_session_file,
1212
get_latest_session_id, get_running_sessions, get_session_files, read_session_file, read_history_file,
13-
load_sessions_from_disk, pause_session, send_message, start_codex_session, stop_session,
13+
load_sessions_from_disk, pause_session, send_message, send_message_with_media, start_codex_session, stop_session,
1414
};
1515
use config::{
1616
add_mcp_server, add_or_update_model_provider, add_or_update_profile, delete_mcp_server,
@@ -52,6 +52,7 @@ pub fn run() {
5252
.invoke_handler(tauri::generate_handler![
5353
start_codex_session,
5454
send_message,
55+
send_message_with_media,
5556
approve_execution,
5657
stop_session,
5758
pause_session,

src-tauri/src/protocol.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,11 @@ pub enum SandboxPolicy {
6262
#[serde(tag = "type", rename_all = "snake_case")]
6363
pub enum InputItem {
6464
Text { text: String },
65+
/// Pre‑encoded data: URI image.
6566
Image { image_url: String },
67+
/// Local image path provided by the user. This will be converted to an
68+
/// `Image` variant (base64 data URL) during request serialization.
69+
LocalImage { path: std::path::PathBuf },
6670
}
6771

6872
#[derive(Debug, Clone, Serialize, Deserialize)]

src-tauri/src/services/codex.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,32 @@ pub async fn send_message(
5555
}
5656
}
5757

58+
pub async fn send_message_with_media(
59+
state: State<'_, CodexState>,
60+
session_id: String,
61+
message: String,
62+
media_paths: Vec<String>,
63+
) -> Result<(), String> {
64+
log::debug!("🚀 [Codex Service] send_message_with_media called:");
65+
log::debug!(" 📝 session_id: {}", session_id);
66+
log::debug!(" 💬 message: {}", message);
67+
log::debug!(" 📸 media_paths: {:?}", media_paths);
68+
log::debug!(" 📊 media_paths count: {}", media_paths.len());
69+
70+
let mut sessions = state.sessions.lock().await;
71+
if let Some(client) = sessions.get_mut(&session_id) {
72+
log::debug!("✅ Session found, sending to client");
73+
client
74+
.send_user_input_with_media(message, media_paths)
75+
.await
76+
.map_err(|e| format!("Failed to send message with media: {}", e))?;
77+
Ok(())
78+
} else {
79+
log::error!("❌ Session not found: {}", session_id);
80+
Err("Session not found".to_string())
81+
}
82+
}
83+
5884
pub async fn approve_execution(
5985
state: State<'_, CodexState>,
6086
session_id: String,

src/components/chat/ChatInput.tsx

Lines changed: 77 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import React, { useState, useEffect } from 'react';
22
import { Button } from '../ui/button';
33
import { Textarea } from '../ui/textarea';
44
import { Badge } from '../ui/badge';
5-
import { Send, AtSign, X, ChevronUp, Cpu, Square } from 'lucide-react';
5+
import { Send, AtSign, X, ChevronUp, Cpu, Square, Image, Music, FileX } from 'lucide-react';
66
import {
77
Tooltip,
88
TooltipContent,
@@ -18,11 +18,12 @@ import { useChatInputStore } from '../../stores/chatInputStore';
1818
import { useSettingsStore } from '../../stores/SettingsStore';
1919
import { useModelStore } from '../../stores/ModelStore';
2020
import { ConfigService } from '../../services/configService';
21+
import { MediaSelector } from './MediaSelector';
2122

2223
interface ChatInputProps {
2324
inputValue: string;
2425
onInputChange: (value: string) => void;
25-
onSendMessage: (message: string) => void;
26+
onSendMessage: (messageData: string | { text: string; mediaAttachments?: any[] }) => void;
2627
onStopStreaming?: () => void;
2728
disabled?: boolean;
2829
isLoading?: boolean;
@@ -40,8 +41,11 @@ export const ChatInput: React.FC<ChatInputProps> = ({
4041
}) => {
4142
const {
4243
fileReferences,
44+
mediaAttachments,
4345
removeFileReference,
46+
removeMediaAttachment,
4447
clearFileReferences,
48+
clearMediaAttachments,
4549
} = useChatInputStore();
4650

4751
const { providers } = useSettingsStore();
@@ -141,9 +145,19 @@ export const ChatInput: React.FC<ChatInputProps> = ({
141145
messageContent = `${smartPrompt}\n\n${inputValue}`;
142146
}
143147

144-
onSendMessage(messageContent);
148+
// Pass media attachments along with the message
149+
const messageParts = {
150+
text: messageContent,
151+
mediaAttachments: mediaAttachments.length > 0 ? mediaAttachments : undefined
152+
};
153+
154+
console.log("📤 ChatInput: Sending message parts:", messageParts);
155+
console.log("📸 Media attachments count:", mediaAttachments.length);
156+
157+
onSendMessage(messageParts);
145158
onInputChange('');
146159
clearFileReferences();
160+
clearMediaAttachments();
147161
};
148162

149163
const handleStopStreaming = () => {
@@ -199,16 +213,68 @@ export const ChatInput: React.FC<ChatInputProps> = ({
199213
</Button>
200214
</div>
201215
)}
216+
217+
{/* Media attachments display */}
218+
{mediaAttachments.length > 0 && (
219+
<div className="mb-3 flex flex-wrap gap-2 items-center">
220+
{mediaAttachments.map((attachment) => (
221+
<TooltipProvider key={attachment.id}>
222+
<Tooltip>
223+
<TooltipTrigger>
224+
<Badge
225+
variant="outline"
226+
className="flex items-center gap-2 cursor-pointer hover:bg-gray-50 px-3 py-1"
227+
>
228+
{attachment.type === 'image' ? (
229+
<Image className="w-3 h-3 text-blue-500" />
230+
) : (
231+
<Music className="w-3 h-3 text-green-500" />
232+
)}
233+
<span className="text-xs font-medium">{attachment.name}</span>
234+
<X
235+
className="w-3 h-3 hover:bg-gray-300 rounded"
236+
onClick={(e) => {
237+
e.stopPropagation();
238+
removeMediaAttachment(attachment.id);
239+
}}
240+
/>
241+
</Badge>
242+
</TooltipTrigger>
243+
<TooltipContent>
244+
<div className="text-xs">
245+
<p>{attachment.path}</p>
246+
<p className="text-gray-500 mt-1">{attachment.type}{attachment.mimeType}</p>
247+
</div>
248+
</TooltipContent>
249+
</Tooltip>
250+
</TooltipProvider>
251+
))}
252+
<Button
253+
variant="ghost"
254+
size="sm"
255+
onClick={clearMediaAttachments}
256+
className="h-6 px-2 text-xs text-gray-500 hover:text-gray-700"
257+
>
258+
<FileX className="w-3 h-3 mr-1" />
259+
Clear media
260+
</Button>
261+
</div>
262+
)}
202263

203264
<div className="flex gap-2">
204-
<Textarea
205-
value={inputValue}
206-
onChange={(e) => onInputChange(e.target.value)}
207-
onKeyDown={handleKeyPress}
208-
placeholder={placeholderOverride || "Type your message..."}
209-
className="flex-1 min-h-[40px] max-h-[120px]"
210-
disabled={false}
211-
/>
265+
<div className="flex-1 relative">
266+
<Textarea
267+
value={inputValue}
268+
onChange={(e) => onInputChange(e.target.value)}
269+
onKeyDown={handleKeyPress}
270+
placeholder={placeholderOverride || "Type your message..."}
271+
className="min-h-[40px] max-h-[120px] pr-10"
272+
disabled={false}
273+
/>
274+
<div className="absolute right-2 top-2">
275+
<MediaSelector />
276+
</div>
277+
</div>
212278
{isLoading ? (
213279
<Button
214280
onClick={handleStopStreaming}

0 commit comments

Comments
 (0)