diff --git a/backend/handlers/chat.ts b/backend/handlers/chat.ts index b0d283e0..36e5b846 100644 --- a/backend/handlers/chat.ts +++ b/backend/handlers/chat.ts @@ -1,7 +1,79 @@ import { Context } from "hono"; -import { query, type PermissionMode } from "@anthropic-ai/claude-code"; -import type { ChatRequest, StreamResponse } from "../../shared/types.ts"; +import { query, type PermissionMode, type SDKUserMessage } from "@anthropic-ai/claude-code"; +import type { ChatRequest, StreamResponse, MultimodalMessage, ImageData } from "../../shared/types.ts"; import { logger } from "../utils/logger.ts"; +import { getPlatform } from "../utils/os.ts"; + +/** + * Gets the runtime type for Claude SDK + * @returns The runtime type that Claude SDK expects + */ +function getRuntimeType(): "bun" | "deno" | "node" { + // Check for Deno runtime + if (typeof (globalThis as any).Deno !== "undefined") { + return "deno"; + } + + // Check for Bun runtime + if (typeof (globalThis as any).Bun !== "undefined") { + return "bun"; + } + + // Default to Node.js + return "node"; +} + +/** + * Type guard to check if a message is multimodal + */ +function isMultimodalMessage(message: string | MultimodalMessage): message is MultimodalMessage { + return typeof message === 'object' && message !== null && 'text' in message && 'images' in message; +} + +/** + * Creates an SDKUserMessage from multimodal content + */ +function createMultimodalSDKMessage(message: MultimodalMessage, sessionId?: string): SDKUserMessage { + // Build content array with text and images + const content = []; + + // Add text content if present + if (message.text.trim()) { + content.push({ + type: 'text' as const, + text: message.text + }); + } + + // Add image content blocks + for (const image of message.images) { + content.push({ + type: 'image' as const, + source: { + type: 'base64' as const, + media_type: image.type, + data: image.data + } + }); + } + + return { + type: 'user' as const, + message: { + role: 'user' as const, + content: content + }, + session_id: sessionId || '', + parent_tool_use_id: null + }; +} + +/** + * Creates an async iterable from a single SDKUserMessage + */ +async function* createSDKMessageIterable(sdkMessage: SDKUserMessage): AsyncIterable { + yield sdkMessage; +} /** * Executes a Claude command and yields streaming responses @@ -16,7 +88,7 @@ import { logger } from "../utils/logger.ts"; * @returns AsyncGenerator yielding StreamResponse objects */ async function* executeClaudeCommand( - message: string, + message: string | MultimodalMessage, requestId: string, requestAbortControllers: Map, cliPath: string, @@ -28,53 +100,71 @@ async function* executeClaudeCommand( let abortController: AbortController; try { - // Process commands that start with '/' - let processedMessage = message; - if (message.startsWith("/")) { - // Remove the '/' and send just the command - processedMessage = message.substring(1); - } - // Create and store AbortController for this request abortController = new AbortController(); requestAbortControllers.set(requestId, abortController); - for await (const sdkMessage of query({ - prompt: processedMessage, - options: { - abortController, - executable: "node" as const, - executableArgs: [], - pathToClaudeCodeExecutable: cliPath, - ...(sessionId ? { resume: sessionId } : {}), - ...(allowedTools ? { allowedTools } : {}), - ...(workingDirectory ? { cwd: workingDirectory } : {}), - ...(permissionMode ? { permissionMode } : {}), - }, - })) { - // Debug logging of raw SDK messages with detailed content - logger.chat.debug("Claude SDK Message: {sdkMessage}", { sdkMessage }); + const runtimeType = getRuntimeType(); + const queryOptions = { + abortController, + executable: runtimeType, + executableArgs: [], + pathToClaudeCodeExecutable: cliPath, + env: { ...process.env }, + ...(sessionId ? { resume: sessionId } : {}), + ...(allowedTools ? { allowedTools } : {}), + ...(workingDirectory ? { cwd: workingDirectory } : {}), + ...(permissionMode ? { permissionMode } : {}), + }; - yield { - type: "claude_json", - data: sdkMessage, - }; + logger.chat.debug("Claude SDK query options: {options}", { options: queryOptions }); + + // Handle multimodal vs text-only messages + if (isMultimodalMessage(message)) { + // Multimodal message with images + logger.chat.debug("Processing multimodal message with {imageCount} images", { imageCount: message.images.length }); + + const sdkMessage = createMultimodalSDKMessage(message, sessionId); + const messageIterable = createSDKMessageIterable(sdkMessage); + + for await (const sdkMessage of query({ + prompt: messageIterable, + options: queryOptions, + })) { + logger.chat.debug("Claude SDK Message: {sdkMessage}", { sdkMessage }); + yield { + type: "claude_json", + data: sdkMessage, + }; + } + } else { + // Text-only message + let processedMessage = message; + if (message.startsWith("/")) { + processedMessage = message.substring(1); + } + + logger.chat.debug("Processing text-only message"); + + for await (const sdkMessage of query({ + prompt: processedMessage, + options: queryOptions, + })) { + logger.chat.debug("Claude SDK Message: {sdkMessage}", { sdkMessage }); + yield { + type: "claude_json", + data: sdkMessage, + }; + } } yield { type: "done" }; } catch (error) { - // Check if error is due to abort - // TODO: Re-enable when AbortError is properly exported from Claude SDK - // if (error instanceof AbortError) { - // yield { type: "aborted" }; - // } else { - { - logger.chat.error("Claude Code execution failed: {error}", { error }); - yield { - type: "error", - error: error instanceof Error ? error.message : String(error), - }; - } + logger.chat.error("Claude Code execution failed: {error}", { error }); + yield { + type: "error", + error: error instanceof Error ? error.message : String(error), + }; } finally { // Clean up AbortController from map if (requestAbortControllers.has(requestId)) { diff --git a/frontend/src/components/ChatPage.tsx b/frontend/src/components/ChatPage.tsx index a005e877..27990987 100644 --- a/frontend/src/components/ChatPage.tsx +++ b/frontend/src/components/ChatPage.tsx @@ -7,6 +7,7 @@ import type { ProjectInfo, PermissionMode, } from "../types"; +import type { ConversationSummary, ImageData, MultimodalMessage } from "../../../shared/types"; import { useClaudeStreaming } from "../hooks/useClaudeStreaming"; import { useChatState } from "../hooks/chat/useChatState"; import { usePermissions } from "../hooks/chat/usePermissions"; @@ -30,6 +31,13 @@ export function ChatPage() { const [searchParams] = useSearchParams(); const [projects, setProjects] = useState([]); const [isSettingsOpen, setIsSettingsOpen] = useState(false); + const [currentConversation, setCurrentConversation] = useState<{ + title: string; + fullTitle: string; + projectEncodedName: string; + } | null>(null); + // State for uploaded images + const [uploadedImages, setUploadedImages] = useState([]); // Extract and normalize working directory from URL const workingDirectory = (() => { @@ -148,10 +156,23 @@ export function ChatPage() { overridePermissionMode?: PermissionMode, ) => { const content = messageContent || input.trim(); - if (!content || isLoading) return; + if ((!content && uploadedImages.length === 0) || isLoading) return; const requestId = generateRequestId(); + // Prepare message payload - either string or multimodal + let messagePayload: string | MultimodalMessage; + if (uploadedImages.length > 0 && !messageContent) { + // Create multimodal message with images + messagePayload = { + text: content, + images: uploadedImages + }; + } else { + // Regular text-only message + messagePayload = content; + } + // Only add user message to chat if not hidden if (!hideUserMessage) { const userMessage: ChatMessage = { @@ -159,11 +180,16 @@ export function ChatPage() { role: "user", content: content, timestamp: Date.now(), + // Include images if this is a multimodal message + ...(uploadedImages.length > 0 && !messageContent ? { images: uploadedImages } : {}), }; addMessage(userMessage); } - if (!messageContent) clearInput(); + if (!messageContent) { + clearInput(); + setUploadedImages([]); // Clear images after sending + } startRequest(); try { @@ -171,7 +197,7 @@ export function ChatPage() { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ - message: content, + message: messagePayload, requestId, ...(currentSessionId ? { sessionId: currentSessionId } : {}), allowedTools: tools || allowedTools, @@ -259,6 +285,8 @@ export function ChatPage() { processStreamLine, handlePermissionError, createAbortHandler, + uploadedImages, + setUploadedImages, ], ); @@ -580,6 +608,8 @@ export function ChatPage() { showPermissions={isPermissionMode} permissionData={permissionData} planPermissionData={planPermissionData} + images={uploadedImages} + onImagesChange={setUploadedImages} /> )} diff --git a/frontend/src/components/MessageComponents.tsx b/frontend/src/components/MessageComponents.tsx index 71f3954a..ea6240de 100644 --- a/frontend/src/components/MessageComponents.tsx +++ b/frontend/src/components/MessageComponents.tsx @@ -66,6 +66,28 @@ export function ChatMessageComponent({ message }: ChatMessageComponentProps) { }`} /> + + {/* Display images if present (for user messages) */} + {message.images && message.images.length > 0 && ( +
+
+ {message.images.map((image) => ( +
+ {image.name} +
+ {image.name} +
+
+ ))} +
+
+ )} +
         {message.content}
       
diff --git a/frontend/src/components/chat/ChatInput.tsx b/frontend/src/components/chat/ChatInput.tsx index 6b1493cb..cf967bf1 100644 --- a/frontend/src/components/chat/ChatInput.tsx +++ b/frontend/src/components/chat/ChatInput.tsx @@ -1,10 +1,17 @@ import React, { useRef, useEffect, useState } from "react"; -import { StopIcon } from "@heroicons/react/24/solid"; +import { StopIcon, PhotoIcon, XMarkIcon } from "@heroicons/react/24/solid"; import { UI_CONSTANTS, KEYBOARD_SHORTCUTS } from "../../utils/constants"; import { useEnterBehavior } from "../../hooks/useSettings"; import { PermissionInputPanel } from "./PermissionInputPanel"; import { PlanPermissionInputPanel } from "./PlanPermissionInputPanel"; import type { PermissionMode } from "../../types"; +import type { ImageData } from "../../../../shared/types"; +import { + validateImageFiles, + fileToImageData, + formatFileSize, + type ImageValidationError +} from "../../utils/imageUtils"; interface PermissionData { patterns: string[]; @@ -50,6 +57,9 @@ interface ChatInputProps { showPermissions?: boolean; permissionData?: PermissionData; planPermissionData?: PlanPermissionData; + // Image upload props + images?: ImageData[]; + onImagesChange?: (images: ImageData[]) => void; } export function ChatInput({ @@ -64,9 +74,14 @@ export function ChatInput({ showPermissions = false, permissionData, planPermissionData, + images = [], + onImagesChange, }: ChatInputProps) { const inputRef = useRef(null); + const fileInputRef = useRef(null); const [isComposing, setIsComposing] = useState(false); + const [isDragging, setIsDragging] = useState(false); + const [uploadErrors, setUploadErrors] = useState([]); const { enterBehavior } = useEnterBehavior(); // Focus input when not loading and not in permission mode @@ -148,6 +163,72 @@ export function ChatInput({ setTimeout(() => setIsComposing(false), 0); }; + // Image handling functions + const handleFilesSelected = async (files: FileList | null) => { + if (!files || !onImagesChange) return; + + const fileArray = Array.from(files); + const { validFiles, errors } = validateImageFiles(fileArray, images.length); + + setUploadErrors(errors); + + if (validFiles.length > 0) { + try { + const imageDataPromises = validFiles.map(fileToImageData); + const newImageData = await Promise.all(imageDataPromises); + onImagesChange([...images, ...newImageData]); + } catch (error) { + console.error('Error processing images:', error); + setUploadErrors(prev => [...prev, { + type: 'read_error', + message: 'Failed to process one or more images' + }]); + } + } + }; + + const handleFileInputChange = (e: React.ChangeEvent) => { + handleFilesSelected(e.target.files); + // Reset input so same file can be selected again + if (e.target) { + e.target.value = ''; + } + }; + + const handleDragOver = (e: React.DragEvent) => { + e.preventDefault(); + e.stopPropagation(); + setIsDragging(true); + }; + + const handleDragLeave = (e: React.DragEvent) => { + e.preventDefault(); + e.stopPropagation(); + // Only set dragging to false if we're leaving the component entirely + if (!e.currentTarget.contains(e.relatedTarget as Node)) { + setIsDragging(false); + } + }; + + const handleDrop = (e: React.DragEvent) => { + e.preventDefault(); + e.stopPropagation(); + setIsDragging(false); + + const files = e.dataTransfer.files; + handleFilesSelected(files); + }; + + const removeImage = (imageId: string) => { + if (onImagesChange) { + onImagesChange(images.filter(img => img.id !== imageId)); + } + }; + + const openFileDialog = () => { + fileInputRef.current?.click(); + }; + // Get permission mode status indicator (CLI-style) const getPermissionModeIndicator = (mode: PermissionMode): string => { switch (mode) { @@ -210,7 +291,76 @@ export function ChatInput({ return (
-
+ {/* Image previews */} + {images.length > 0 && ( +
+
+ + + {images.length} image{images.length > 1 ? 's' : ''} attached + +
+
+ {images.map((image) => ( +
+ {image.name} + +
+ {image.name} ({formatFileSize(image.size)}) +
+
+ ))} +
+
+ )} + + {/* Upload errors */} + {uploadErrors.length > 0 && ( +
+
+ Upload errors: +
+ {uploadErrors.map((error, index) => ( +
+ {error.fileName && `${error.fileName}: `}{error.message} +
+ ))} +
+ )} + + {/* Drag overlay */} + {isDragging && ( +
+
+ +
+ Drop images here +
+
+ Supports JPEG, PNG, GIF, WebP (max 5MB each) +
+
+
+ )} + +