diff --git a/core/agent_core/events/ingestors.py b/core/agent_core/events/ingestors.py index 8828e68..94d55f5 100644 --- a/core/agent_core/events/ingestors.py +++ b/core/agent_core/events/ingestors.py @@ -337,6 +337,39 @@ def user_prompt_ingestor(payload: Any, params: Dict, context: Dict) -> str: return payload.get("prompt", "") return str(payload) +@register_ingestor("multimodal_user_prompt_ingestor") +def multimodal_user_prompt_ingestor(payload: Any, params: Dict, context: Dict) -> str: + """Processes user input containing images/files and returns a concise text description suitable for LLMs (actual data is handled during message construction).""" + if not isinstance(payload, dict): + return str(payload) + + prompt = payload.get("prompt", "") + images = payload.get("images", []) + files = payload.get("files", []) + + # If there are no images or files, return the text directly + if not images and not files: + return prompt + + # Construct a brief attachment description + parts = [] + if images: + parts.append(f"User uploaded {len(images)} image(s)") + if files: + # Optional: list up to the first 3 file names + names = [] + for f in files[:3]: + name = f.get("name") or f.get("filename") + if name: + names.append(name) + if names: + parts.append(f"and attached {len(files)} files (e.g., {', '.join(names)}{'' if len(files) <= 3 else ' etc.'})") + else: + parts.append(f"and attached {len(files)} files") + + note = "[" + ", ".join(parts) + "]" + return f"{prompt}\n\n{note}" if prompt else note + def _recursive_markdown_formatter(data: Any, schema: Dict, level: int = 0) -> List[str]: """ Intelligently formats data recursively into LLM-friendly Markdown. diff --git a/core/agent_core/framework/inbox_processor.py b/core/agent_core/framework/inbox_processor.py index e94eeeb..3d7bf56 100644 --- a/core/agent_core/framework/inbox_processor.py +++ b/core/agent_core/framework/inbox_processor.py @@ -1,7 +1,13 @@ import logging import uuid +import os +import base64 from typing import Any, Dict, List, Optional from datetime import datetime, timezone +import asyncio +import time +import httpx +from litellm import create_file from ..events.event_strategies import EVENT_STRATEGY_REGISTRY from ..events.ingestors import INGESTOR_REGISTRY, markdown_formatter_ingestor @@ -34,7 +40,8 @@ def _create_user_turn_from_inbox_item(self, item: Dict) -> Optional[str]: team_state = self.team_state prompt_content = item.get("payload", {}).get("prompt") - if not prompt_content: + files_content = item.get("payload", {}).get("files", []) + if not prompt_content and not files_content: return None user_turn_id = f"turn_user_{uuid.uuid4().hex[:8]}" @@ -62,7 +69,10 @@ def _create_user_turn_from_inbox_item(self, item: Dict) -> Optional[str]: "end_time": item.get("metadata", {}).get("created_at", datetime.now(timezone.utc).isoformat()), "source_turn_ids": [last_agent_turn_id] if last_agent_turn_id else [], "source_tool_call_id": None, - "inputs": {"prompt": prompt_content}, + "inputs": ( + {"prompt": prompt_content, "files": files_content} + if files_content else {"prompt": prompt_content} + ), "outputs": {}, "llm_interaction": None, "tool_interactions": [], @@ -207,7 +217,7 @@ async def process(self) -> Dict[str, Any]: try: payload = item["payload"] - if item.get("source") == "USER_PROMPT": + if item.get("source") in ["USER_PROMPT", "USER_PROMPT_WITH_FILES"]: new_user_turn_id = self._create_user_turn_from_inbox_item(item) if new_user_turn_id: # Pass the "baton" so the next agent_turn can correctly link to this user_turn. @@ -249,7 +259,196 @@ async def process(self) -> Dict[str, Any]: role = params.get("role", "user") is_persistent = params.get("is_persistent_in_memory", False) - new_message = {"role": role, "content": injected_content} + # Handle multimodal content (files only) + has_multimodal_content = False + content_parts = [] + + # Check if there is file content + if source in ["USER_PROMPT", "USER_PROMPT_WITH_FILES"] and isinstance(dehydrated_payload, dict): + # Process file content: upload attachments to Gemini and construct as file references + files = dehydrated_payload.get("files", []) + if files: + has_multimodal_content = True + # 添加文本内容(若尚未添加) + if injected_content and not any(part.get("type") == "text" for part in content_parts): + content_parts.append({ + "type": "text", + "text": injected_content + }) + + for f in files: + try: + filename = f.get("name") or f.get("filename") or f"file_{uuid.uuid4().hex[:6]}" + mime_type = f.get("mimeType") or f.get("mime_type") or "application/octet-stream" + + if f.get("file_id"): + # Already uploaded, use file reference + file_id = f["file_id"] + logger.info("gemini_file_upload_skipped_existing", extra={ + "agent_id": self.agent_id, + "file_name": filename, + "mime_type": mime_type, + "file_id": file_id, + }) + # Use file reference + content_parts.append({ + "type": "file", + "file": { + "file_id": file_id, + "filename": filename, + "format": mime_type + } + }) + else: + # Check if we have direct base64 data from frontend + if f.get("data"): + # Frontend sent base64 data - use directly without file upload + data_str = f["data"] + + if isinstance(mime_type, str) and mime_type.startswith("image/"): + # Ensure proper data URL format for images + if not data_str.startswith("data:"): + image_url = f"data:{mime_type};base64,{data_str}" + else: + image_url = data_str + + content_parts.append({ + "type": "image_url", + "image_url": { + "url": image_url, + "detail": "high" + } + }) + + logger.info("image_processed_as_base64", extra={ + "agent_id": self.agent_id, + "file_name": filename, + "mime_type": mime_type, + "method": "direct_base64" + }) + else: + # Non-image files with base64 data + logger.info("non_image_file_with_base64", extra={ + "agent_id": self.agent_id, + "file_name": filename, + "mime_type": mime_type, + "note": "Non-image files may not be fully supported" + }) + + elif f.get("url"): + # URL-based file - need to fetch and potentially upload + file_bytes = None + + # Async fetch + async with httpx.AsyncClient(timeout=20) as client: + resp = await client.get(f["url"]) + resp.raise_for_status() + file_bytes = resp.content + + size_bytes = len(file_bytes) if file_bytes is not None else None + max_base64_size = 20 * 1024 * 1024 # 20MB + + if size_bytes and size_bytes < max_base64_size: + # Small file from URL - convert to base64 + if isinstance(mime_type, str) and mime_type.startswith("image/"): + base64_data = base64.b64encode(file_bytes).decode() + image_url = f"data:{mime_type};base64,{base64_data}" + + content_parts.append({ + "type": "image_url", + "image_url": { + "url": image_url, + "detail": "high" + } + }) + + logger.info("url_file_converted_to_base64", extra={ + "agent_id": self.agent_id, + "file_name": filename, + "mime_type": mime_type, + "size_bytes": size_bytes + }) + else: + logger.info("non_image_url_file_skipped", extra={ + "agent_id": self.agent_id, + "file_name": filename, + "mime_type": mime_type + }) + else: + # Large file from URL - use Gemini file upload + # Prefer API key from project LLM config; fallback to env var + try: + resolver = LLMConfigResolver(shared_llm_configs=self.run_context.get("config", {}).get("shared_llm_configs_ref", {})) + llm_config = resolver.resolve(self.profile) + except Exception: + llm_config = {} + gemini_key = ( + (llm_config.get("api_key") if isinstance(llm_config, dict) else None) + or os.getenv("GEMINI_API_KEY") + ) + if not gemini_key: + logger.error( + "gemini_api_key_missing", + extra={ + "agent_id": self.agent_id, + "hint": "Provide api_key in active LLM config or set GEMINI_API_KEY env var" + } + ) + continue + + # Structured start log + logger.info("gemini_file_upload_start", extra={ + "agent_id": self.agent_id, + "file_name": filename, + "mime_type": mime_type, + "size_bytes": size_bytes, + "reason": "file_too_large_for_base64" + }) + t0 = time.perf_counter() + + # Offload blocking create_file to a thread + created = await asyncio.to_thread( + create_file, + file=file_bytes, + purpose="user_data", + custom_llm_provider="gemini", + api_key=gemini_key, + ) + file_id = getattr(created, "id", None) if created is not None else None + if not file_id: + logger.error("gemini_file_upload_failed", extra={ + "file_name": filename, + "mime_type": mime_type, + "size_bytes": size_bytes, + "duration_ms": int((time.perf_counter() - t0) * 1000), + }) + continue + else: + logger.info("gemini_file_upload_success", extra={ + "agent_id": self.agent_id, + "file_name": filename, + "mime_type": mime_type, + "size_bytes": size_bytes, + "file_id": file_id, + "duration_ms": int((time.perf_counter() - t0) * 1000), + }) + + # Append file reference content part for large files + content_parts.append({ + "type": "file", + "file": { + "file_id": file_id, + "filename": filename, + "format": mime_type + } + }) + except Exception as ex: + logger.error("file_processing_failed", extra={"error": str(ex)}, exc_info=True) + + if has_multimodal_content: + new_message = {"role": role, "content": content_parts} + else: + new_message = {"role": role, "content": injected_content} # If this message comes from the startup briefing, add an internal flag # to prevent it from being handed over again in the future. diff --git a/core/agent_core/nodes/base_agent_node.py b/core/agent_core/nodes/base_agent_node.py index fab2101..138cf07 100644 --- a/core/agent_core/nodes/base_agent_node.py +++ b/core/agent_core/nodes/base_agent_node.py @@ -931,9 +931,13 @@ def _clean_messages_for_llm(self, messages: List[Dict]) -> List[Dict]: if key in msg: value = msg[key] - # Ensure content is a string + # Handle content based on type - preserve multimodal structure if key == "content": - if isinstance(value, dict): + if isinstance(value, list): + # Multimodal content (list of parts) - preserve structure for LLM + cleaned_msg[key] = value + logger.debug("multimodal_content_preserved", extra={"message_role": msg.get('role'), "parts_count": len(value)}) + elif isinstance(value, dict): # If content is a dictionary, convert it to a JSON string import json cleaned_msg[key] = json.dumps(value, ensure_ascii=False) @@ -952,7 +956,53 @@ def _clean_messages_for_llm(self, messages: List[Dict]) -> List[Dict]: cleaned_messages.append(cleaned_msg) + # Validate message sequence for OpenAI API compliance + cleaned_messages = self._validate_message_sequence(cleaned_messages) + return cleaned_messages + + def _validate_message_sequence(self, messages: List[Dict]) -> List[Dict]: + """Validate and fix message sequence to ensure compliance with OpenAI API requirements.""" + if not messages: + return messages + + # Check for invalid sequences and log warnings + for i in range(1, len(messages)): + prev_msg = messages[i-1] + curr_msg = messages[i] + + prev_role = prev_msg.get("role") + curr_role = curr_msg.get("role") + has_tool_calls = bool(curr_msg.get("tool_calls")) + + # Check for invalid function call sequences + if has_tool_calls and curr_role == "assistant": + # Function calls should come after user messages or tool responses + if prev_role not in ["user", "tool"]: + logger.warning("invalid_function_call_sequence", extra={ + "agent_id": self.agent_id, + "prev_role": prev_role, + "curr_role": curr_role, + "has_tool_calls": has_tool_calls, + "position": i, + "fix": "This may cause OpenAI API 400 errors" + }) + + # Check for consecutive assistant messages without tool interaction + if prev_role == "assistant" and curr_role == "assistant": + prev_has_tools = bool(prev_msg.get("tool_calls")) + curr_is_tool_response = bool(curr_msg.get("tool_call_id")) + + if not prev_has_tools and not curr_is_tool_response: + logger.warning("consecutive_assistant_messages", extra={ + "agent_id": self.agent_id, + "position": i, + "prev_has_tools": prev_has_tools, + "curr_is_tool_response": curr_is_tool_response, + "fix": "This may cause message sequence issues" + }) + + return messages def _finalize_dangling_tool_in_turn(self, context: Dict): """ diff --git a/core/agent_core/nodes/mcp_proxy_node.py b/core/agent_core/nodes/mcp_proxy_node.py index 8c1487a..bc64e7a 100644 --- a/core/agent_core/nodes/mcp_proxy_node.py +++ b/core/agent_core/nodes/mcp_proxy_node.py @@ -2,6 +2,7 @@ import asyncio import anyio from typing import Dict, Any +from pathlib import Path # Import the new base class from .base_tool_node import BaseToolNode @@ -46,6 +47,38 @@ async def exec_async(self, prep_res: Dict) -> Dict[str, Any]: logger.error("mcp_proxy_session_group_not_found", extra={"unique_tool_name": self.unique_tool_name}) return {"status": "error", "error_message": error_msg} + # 2.5 Normalize Windows paths in common parameters to avoid relative-path issues + def _normalize_paths_in_params(params: Dict[str, Any], tool_info: Dict[str, Any]) -> None: + if not isinstance(params, dict) or not isinstance(tool_info, dict): + return + + properties = tool_info.get("parameters", {}).get("properties", {}) + for param_name, schema in properties.items(): + # Heuristic: Find string params that look like paths + is_path_like = ( + schema.get("type") == "string" and + ("path" in param_name.lower() or schema.get("format") == "uri-reference") + ) + + if is_path_like: + value = params.get(param_name) + if isinstance(value, str) and value.strip(): + p = Path(value) + if not p.is_absolute(): + abs_p = (Path.cwd() / p).resolve() + params[param_name] = str(abs_p) + try: + logger.debug("mcp_param_path_normalized", extra={ + "unique_tool_name": self.unique_tool_name, + "param": param_name, + "original_path": value, + "abs_path": str(abs_p) + }) + except Exception: + pass + + _normalize_paths_in_params(tool_params, self._tool_info) + # 3. Execute the original business logic logger.info("mcp_proxy_tool_call_begin", extra={"unique_tool_name": self.unique_tool_name}) diff --git a/core/api/message_handlers.py b/core/api/message_handlers.py index 1aed78a..81ce1c9 100644 --- a/core/api/message_handlers.py +++ b/core/api/message_handlers.py @@ -834,7 +834,7 @@ async def handle_manage_work_modules_request(ws_state: Dict, data: Dict): async def handle_send_to_run_message(ws_state: Dict, data: Dict): """ Handles 'send_to_run' messages, routing client messages to the specified active business run. - This function is now also responsible for "activating" runs that are in the CREATED state. + Also handles multimodal payloads (files), and is responsible for activating runs in CREATED state. """ event_manager = ws_state.event_manager session_id_for_log = event_manager.session_id @@ -843,8 +843,17 @@ async def handle_send_to_run_message(ws_state: Dict, data: Dict): run_id_var.set(target_run_id) # Set context variable message_payload = data.get("message_payload") extra_payload = data.get("extra_payload") - - logger.info("send_to_run_received", extra={"session_id": session_id_for_log, "target_run_id": target_run_id, "message_preview": str(message_payload)[:100]}) + files_content = (message_payload or {}).get("files", []) + + logger.info( + "send_to_run_received", + extra={ + "session_id": session_id_for_log, + "target_run_id": target_run_id, + "message_preview": str(message_payload)[:100], + "has_files": bool(files_content), + }, + ) if not target_run_id or message_payload is None: err_msg = "'send_to_run' requires 'run_id' and 'message_payload'." @@ -868,10 +877,12 @@ async def handle_send_to_run_message(ws_state: Dict, data: Dict): if run_status == 'CREATED': logger.debug("run_activation_started", extra={"run_id": target_run_id, "run_type": run_type}) - if prompt_content is None: - raise ValueError("First message to a new run must contain a 'prompt'.") + # Allow activation if there is either text or multimodal content + if (prompt_content is None) and (not files_content): + raise ValueError("First message must contain either 'prompt' text or attachments (files).") - run_context['team_state']['question'] = prompt_content + # If no text but attachments exist, use empty string to initialize question + run_context['team_state']['question'] = prompt_content or "" task = None if run_type == "partner_interaction": @@ -879,10 +890,17 @@ async def handle_send_to_run_message(ws_state: Dict, data: Dict): team_state = run_context['team_state'] partner_state = partner_context['state'] + # Construct payload, using only the unified 'files' multimodal path + payload = {"prompt": prompt_content} + if files_content: + payload["files"] = files_content + inbox_item = { "item_id": f"inbox_{uuid.uuid4().hex[:8]}", - "source": "USER_PROMPT", # Use standardized event source - "payload": {"prompt": prompt_content}, + "source": ( + "USER_PROMPT_WITH_FILES" if files_content else "USER_PROMPT" + ), # Standardized source with multimodal marker when applicable + "payload": payload, "consumption_policy": "consume_on_read", "metadata": {"created_at": datetime.now(timezone.utc).isoformat()} } @@ -912,19 +930,24 @@ async def handle_send_to_run_message(ws_state: Dict, data: Dict): # --- Branch 2: Send a message to a running session --- elif run_status in ['RUNNING', 'AWAITING_INPUT']: - if prompt_content is None: - raise ValueError("Message payload must contain a 'prompt'.") + # Allow sending if text or attachments exist + if (prompt_content is None) and (not files_content): + raise ValueError("Message must contain either 'prompt' text or attachments (files).") if run_type == "partner_interaction": partner_context = run_context['sub_context_refs']['_partner_context_ref'] partner_state = partner_context['state'] team_state = run_context['team_state'] - # --- Core modification: Similarly, only create an InboxItem --- + # --- Core modification: Create an InboxItem with multimodal info (text and/or files) --- + payload = {"prompt": prompt_content} + if files_content: + payload["files"] = files_content + inbox_item = { "item_id": f"inbox_{uuid.uuid4().hex[:8]}", - "source": "USER_PROMPT", - "payload": {"prompt": prompt_content}, + "source": ("USER_PROMPT_WITH_FILES" if files_content else "USER_PROMPT"), + "payload": payload, "consumption_policy": "consume_on_read", "metadata": {"created_at": datetime.now(timezone.utc).isoformat()} } @@ -949,12 +972,13 @@ async def handle_send_to_run_message(ws_state: Dict, data: Dict): logger.error("send_to_run_processing_error", extra={"session_id": session_id_for_log, "target_run_id": target_run_id, "run_type": run_type, "error_message": str(e)}, exc_info=True) await event_manager.emit_error(run_id=target_run_id, agent_id="System", error_message=f"Error processing message for run {target_run_id}: {str(e)}") + # --- MESSAGE_HANDLERS registry (Dango's version, with adapted function names) --- MESSAGE_HANDLERS: Dict[str, callable] = { "start_run": handle_start_run_message, "stop_run": handle_stop_run_message, "request_available_toolsets": handle_request_available_toolsets, - "send_to_run": handle_send_to_run_message, # Added by Dango, adapted + "send_to_run": handle_send_to_run_message, # Unified handler "stop_managed_principal": handle_stop_managed_principal_message, # Added by Dango, adapted "request_run_profiles": handle_request_run_profiles_message, # Added by Dango, adapted "request_run_context": handle_request_run_context_message, # Added by Dango, adapted diff --git a/core/pyproject.toml b/core/pyproject.toml index bce457b..fbdad9c 100644 --- a/core/pyproject.toml +++ b/core/pyproject.toml @@ -14,6 +14,7 @@ dependencies = [ "python-dotenv>=1.0.0", "python-json-logger>=2.0.7", "requests>=2.28.0", + "httpx>=0.24.0", "markdown>=3.4.0", "coolname>=1.1.0", "numpy>=2", diff --git a/frontend/app/chat/components/ChatInput.tsx b/frontend/app/chat/components/ChatInput.tsx index b0747e3..7b308cd 100644 --- a/frontend/app/chat/components/ChatInput.tsx +++ b/frontend/app/chat/components/ChatInput.tsx @@ -1,12 +1,23 @@ -import React from 'react'; +import React, { useState, useRef, useCallback, useEffect } from 'react'; import { Button } from '@/components/ui/button'; import { Input } from '@/components/ui/input'; +import { X, Paperclip, Music, Video, FileText } from 'lucide-react'; + + +interface FileAttachment { + id: string; + file: File; + dataUrl: string; + name: string; + mimeType?: string; + kind?: 'image' | 'audio' | 'video' | 'document'; +} interface ChatInputProps { currentInput: string; onInputChange: (value: string) => void; onKeyPress: (e: React.KeyboardEvent) => void; - onSendMessage: () => void; + onSendMessage: (files?: FileAttachment[]) => void; isStreaming: boolean; isLoading: boolean; onStopExecution: () => void; @@ -21,35 +32,279 @@ export function ChatInput({ isLoading, onStopExecution, }: ChatInputProps) { + const [files, setFiles] = useState([]); + const fileInputRef = useRef(null); + const inputRef = useRef(null); + const [errorMsg, setErrorMsg] = useState(null); + const errorTimerRef = useRef(null); + + const showError = useCallback((msg: string) => { + setErrorMsg(msg); + if (errorTimerRef.current) { + window.clearTimeout(errorTimerRef.current); + } + errorTimerRef.current = window.setTimeout(() => { + setErrorMsg(null); + errorTimerRef.current = null; + }, 3000); + }, []); + + // Convert File to base64 data URL (for backward compatibility on send) + const fileToDataUrl = useCallback((file: File) => { + return new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.onload = () => resolve(reader.result as string); + reader.onerror = reject; + reader.readAsDataURL(file); + }); + }, []); + + useEffect(() => { + return () => { + if (errorTimerRef.current) { + window.clearTimeout(errorTimerRef.current); + } + // Revoke any remaining object URLs on unmount to prevent memory leaks + try { + files.forEach(file => { + if (file.kind === 'image' && file.dataUrl) { + URL.revokeObjectURL(file.dataUrl); + } + }); + } catch {} + }; + }, [files]); + + // 粘贴事件处理在下方定义,以确保依赖的 addFile 已声明 + + // Add file (image/audio/video/document) + const addFile = useCallback((file: File) => { + const isImage = file.type.startsWith('image/'); + const isAudio = file.type.startsWith('audio/'); + const isVideo = file.type.startsWith('video/'); + // Paste event handler is defined below to ensure addFile is already declared + + // Add file (image/audio/video/document) + const addFile = useCallback((file: File) => { + const isImage = file.type.startsWith('image/'); + const isAudio = file.type.startsWith('audio/'); + const isVideo = file.type.startsWith('video/'); + // Document type: application/* or text/*, or fallback by extension + const ext = file.name.split('.').pop()?.toLowerCase() || ''; + const docExts = new Set(['pdf','doc','docx','xls','xlsx','ppt','pptx','txt','rtf','md','csv']); + const isDocByMime = file.type.startsWith('application/') || file.type.startsWith('text/'); + const isDocument = (!isImage && !isAudio && !isVideo) && (isDocByMime || docExts.has(ext)); + + if (!isImage && !isAudio && !isVideo && !isDocument) { + showError('Unsupported file type. Allowed: images, audio, video, documents.'); + return; + } + + // 允许超过 20MB 的文件添加; + // 小于 20MB 的所有文件(图片/音频/视频/文本/文档)在发送时统一转换为 dataUrl, + // Allow files larger than 20MB to be added; + // All files smaller than 20MB (images/audio/video/text/documents) will be converted to dataUrl when sending, + // Files 20MB or larger will be sent to the backend for upload as files. + + // Generate Blob URL preview for images; for audio, video, and documents, only keep the file and name + const objectUrl = isImage ? URL.createObjectURL(file) : ''; + const newAttachment: FileAttachment = { + id: crypto.randomUUID(), + file, + dataUrl: objectUrl, + name: file.name, + mimeType: file.type, + kind: isImage ? 'image' : isAudio ? 'audio' : isVideo ? 'video' : 'document', + }; + setFiles(prev => [...prev, newAttachment]); + }, [showError]); + + // Handle paste event (only supports images) + const handlePaste = useCallback((e: React.ClipboardEvent) => { + const items = e.clipboardData?.items; + if (!items) return; + + for (let i = 0; i < items.length; i++) { + const item = items[i]; + if (item.type.startsWith('image/')) { + e.preventDefault(); + const file = item.getAsFile(); + if (file) { + addFile(file); + } + break; + } + } + }, [addFile]); + + // Handle file selection + const handleFileSelect = useCallback((e: React.ChangeEvent) => { + const files = e.target.files; + if (files) { + Array.from(files).forEach(addFile); + } + // Clear the input value to allow selecting the same file again + if (fileInputRef.current) { + fileInputRef.current.value = ''; + } + }, [addFile]); + + // Remove image + const removeFile = useCallback((id: string) => { + setFiles(prev => { + const target = prev.find(img => img.id === id); + if (target) { + try { if (target.kind === 'image' && target.dataUrl) { URL.revokeObjectURL(target.dataUrl); } } catch {} + } + return prev.filter(img => img.id !== id); + }); + }, []); + + // Handle sending message + const handleSendMessage = useCallback(async () => { + if (files.length > 0) { + // Prefer file; convert to base64 dataUrl for all files <20MB (image/audio/video/text/document) + const converted = await Promise.all(files.map(async (img) => { + const kind = img.kind ?? 'image'; + const isImage = kind === 'image'; + const isAudio = kind === 'audio'; + const isVideo = kind === 'video'; + const isDocument = kind === 'document'; + const isTextMime = (img.mimeType ?? '').startsWith('text/'); + const shouldConvert = (isImage || isAudio || isVideo || isTextMime || isDocument) && img.file.size < 20 * 1024 * 1024; + return { + ...img, + dataUrl: shouldConvert ? await fileToDataUrl(img.file) : '', + }; + })); + onSendMessage(converted); + } else { + onSendMessage(undefined); + } + // Revoke all object URLs and clear after sending + files.forEach(file => { + try { URL.revokeObjectURL(file.dataUrl); } catch {} + }); + setFiles([]); + }, [onSendMessage, files, fileToDataUrl]); + + // Handle keyboard events + const handleKeyPress = useCallback((e: React.KeyboardEvent) => { + if (e.key === 'Enter' && !e.shiftKey) { + e.preventDefault(); + if ((currentInput.trim() || files.length > 0) && !isStreaming && !isLoading) { + handleSendMessage(); + } + } else { + onKeyPress(e); + } + }, [currentInput, files, isStreaming, isLoading, handleSendMessage, onKeyPress]); + return (
-
+ {/* Attachment preview area */} + {files.length > 0 && ( +
+
+ {files.map((file) => ( +
+ {file.kind === 'image' && file.dataUrl ? ( + {file.name} + ) : ( +
+ {file.kind === 'audio' ? : file.kind === 'video' ?
+ )} + +
+ {file.name} +
+
+ ))} +
+
+ )} + + {errorMsg && ( +
+ {errorMsg} +
+ )} + +
{ e.preventDefault(); e.stopPropagation(); }} + onDrop={(e) => { + e.preventDefault(); + e.stopPropagation(); + const files = Array.from(e.dataTransfer.files || []); + files.forEach((f) => addFile(f)); + }} + > + {/* File upload button */} + + onInputChange(e.target.value)} - onKeyPress={onKeyPress} - placeholder="Enter message..." + onKeyPress={handleKeyPress} + onPaste={handlePaste} + placeholder="Enter message, paste or drag-and-drop files (images/audio/video/docs)..." disabled={isStreaming || isLoading} className="flex-1 border-0 focus-visible:ring-0 focus-visible:ring-offset-0 shadow-none px-2" /> + {isStreaming ? ( ) : ( )}
+ + {/* Hidden file input */} +
); } diff --git a/frontend/app/chat/components/ProjectPage.tsx b/frontend/app/chat/components/ProjectPage.tsx index effa802..c12e7d8 100644 --- a/frontend/app/chat/components/ProjectPage.tsx +++ b/frontend/app/chat/components/ProjectPage.tsx @@ -1,7 +1,6 @@ import React, { useState, useEffect } from 'react'; import { observer } from 'mobx-react-lite'; import { Button } from '@/components/ui/button'; -import { Textarea } from '@/components/ui/textarea'; import { Input } from '@/components/ui/input'; import { SidebarTrigger } from '@/components/ui/sidebar'; import { @@ -22,11 +21,12 @@ import { projectStore } from '@/app/stores/projectStore'; import { selectionStore } from '@/app/stores/selectionStore'; import LoadingSpinner from '@/components/layout/LoadingSpinner'; import { ProjectWithRuns } from '@/lib/types'; +import { ChatInput } from './ChatInput'; interface ProjectPageProps { currentInput: string; onInputChange: (value: string) => void; - onSendMessage: () => void; + onSendMessage: (files?: any[]) => void; onKeyPress: (e: React.KeyboardEvent) => void; isLoading: boolean; } @@ -285,22 +285,15 @@ export const ProjectPage = observer(function ProjectPage({ {/* Chat Input Section */}

What can I help you?

-
-