diff --git a/core/agent_core/events/ingestors.py b/core/agent_core/events/ingestors.py
index 8828e68..94d55f5 100644
--- a/core/agent_core/events/ingestors.py
+++ b/core/agent_core/events/ingestors.py
@@ -337,6 +337,39 @@ def user_prompt_ingestor(payload: Any, params: Dict, context: Dict) -> str:
         return payload.get("prompt", "")
     return str(payload)
 
+@register_ingestor("multimodal_user_prompt_ingestor")
+def multimodal_user_prompt_ingestor(payload: Any, params: Dict, context: Dict) -> str:
+    """Processes user input containing images/files and returns a concise text description suitable for LLMs (actual data is handled during message construction)."""
+    if not isinstance(payload, dict):
+        return str(payload)
+
+    prompt = payload.get("prompt", "")
+    images = payload.get("images", [])
+    files = payload.get("files", [])
+
+    # If there are no images or files, return the text directly
+    if not images and not files:
+        return prompt
+
+    # Construct a brief attachment description
+    parts = []
+    if images:
+        parts.append(f"User uploaded {len(images)} image(s)")
+    if files:
+        # Optional: list up to the first 3 file names
+        names = []
+        for f in files[:3]:
+            name = f.get("name") or f.get("filename")
+            if name:
+                names.append(name)
+        if names:
+            parts.append(f"and attached {len(files)} files (e.g., {', '.join(names)}{'' if len(files) <= 3 else ' etc.'})")
+        else:
+            parts.append(f"and attached {len(files)} files")
+
+    note = "[" + ", ".join(parts) + "]"
+    return f"{prompt}\n\n{note}" if prompt else note
+
 def _recursive_markdown_formatter(data: Any, schema: Dict, level: int = 0) -> List[str]:
     """
     Intelligently formats data recursively into LLM-friendly Markdown.
diff --git a/core/agent_core/framework/inbox_processor.py b/core/agent_core/framework/inbox_processor.py
index e94eeeb..3d7bf56 100644
--- a/core/agent_core/framework/inbox_processor.py
+++ b/core/agent_core/framework/inbox_processor.py
@@ -1,7 +1,13 @@
 import logging
 import uuid
+import os
+import base64
 from typing import Any, Dict, List, Optional
 from datetime import datetime, timezone
+import asyncio
+import time
+import httpx
+from litellm import create_file
 
 from ..events.event_strategies import EVENT_STRATEGY_REGISTRY
 from ..events.ingestors import INGESTOR_REGISTRY, markdown_formatter_ingestor
@@ -34,7 +40,8 @@ def _create_user_turn_from_inbox_item(self, item: Dict) -> Optional[str]:
         team_state = self.team_state
         
         prompt_content = item.get("payload", {}).get("prompt")
-        if not prompt_content:
+        files_content = item.get("payload", {}).get("files", [])
+        if not prompt_content and not files_content:
             return None
 
         user_turn_id = f"turn_user_{uuid.uuid4().hex[:8]}"
@@ -62,7 +69,10 @@ def _create_user_turn_from_inbox_item(self, item: Dict) -> Optional[str]:
             "end_time": item.get("metadata", {}).get("created_at", datetime.now(timezone.utc).isoformat()),
             "source_turn_ids": [last_agent_turn_id] if last_agent_turn_id else [],
             "source_tool_call_id": None,
-            "inputs": {"prompt": prompt_content},
+            "inputs": (
+                {"prompt": prompt_content, "files": files_content}
+                if files_content else {"prompt": prompt_content}
+            ),
             "outputs": {},
             "llm_interaction": None,
             "tool_interactions": [],
@@ -207,7 +217,7 @@ async def process(self) -> Dict[str, Any]:
             try:
                 payload = item["payload"]
                 
-                if item.get("source") == "USER_PROMPT":
+                if item.get("source") in ["USER_PROMPT", "USER_PROMPT_WITH_FILES"]:
                     new_user_turn_id = self._create_user_turn_from_inbox_item(item)
                     if new_user_turn_id:
                         # Pass the "baton" so the next agent_turn can correctly link to this user_turn.
@@ -249,7 +259,196 @@ async def process(self) -> Dict[str, Any]:
                 role = params.get("role", "user")
                 is_persistent = params.get("is_persistent_in_memory", False)
                 
-                new_message = {"role": role, "content": injected_content}
+                # Handle multimodal content (files only)
+                has_multimodal_content = False
+                content_parts = []
+                
+                # Check if there is file content
+                if source in ["USER_PROMPT", "USER_PROMPT_WITH_FILES"] and isinstance(dehydrated_payload, dict):
+                    # Process file content: upload attachments to Gemini and construct as file references
+                    files = dehydrated_payload.get("files", [])
+                    if files:
+                        has_multimodal_content = True
+                        # 添加文本内容（若尚未添加）
+                        if injected_content and not any(part.get("type") == "text" for part in content_parts):
+                            content_parts.append({
+                                "type": "text",
+                                "text": injected_content
+                            })
+
+                        for f in files:
+                            try:
+                                filename = f.get("name") or f.get("filename") or f"file_{uuid.uuid4().hex[:6]}"
+                                mime_type = f.get("mimeType") or f.get("mime_type") or "application/octet-stream"
+
+                                if f.get("file_id"):
+                                    # Already uploaded, use file reference
+                                    file_id = f["file_id"]
+                                    logger.info("gemini_file_upload_skipped_existing", extra={
+                                        "agent_id": self.agent_id,
+                                        "file_name": filename,
+                                        "mime_type": mime_type,
+                                        "file_id": file_id,
+                                    })
+                                    # Use file reference
+                                    content_parts.append({
+                                        "type": "file",
+                                        "file": {
+                                            "file_id": file_id,
+                                            "filename": filename,
+                                            "format": mime_type
+                                        }
+                                    })
+                                else:
+                                    # Check if we have direct base64 data from frontend
+                                    if f.get("data"):
+                                        # Frontend sent base64 data - use directly without file upload
+                                        data_str = f["data"]
+                                        
+                                        if isinstance(mime_type, str) and mime_type.startswith("image/"):
+                                            # Ensure proper data URL format for images
+                                            if not data_str.startswith("data:"):
+                                                image_url = f"data:{mime_type};base64,{data_str}"
+                                            else:
+                                                image_url = data_str
+                                            
+                                            content_parts.append({
+                                                "type": "image_url",
+                                                "image_url": {
+                                                    "url": image_url,
+                                                    "detail": "high"
+                                                }
+                                            })
+                                            
+                                            logger.info("image_processed_as_base64", extra={
+                                                "agent_id": self.agent_id,
+                                                "file_name": filename,
+                                                "mime_type": mime_type,
+                                                "method": "direct_base64"
+                                            })
+                                        else:
+                                            # Non-image files with base64 data
+                                            logger.info("non_image_file_with_base64", extra={
+                                                "agent_id": self.agent_id,
+                                                "file_name": filename,
+                                                "mime_type": mime_type,
+                                                "note": "Non-image files may not be fully supported"
+                                            })
+                                    
+                                    elif f.get("url"):
+                                        # URL-based file - need to fetch and potentially upload
+                                        file_bytes = None
+                                        
+                                        # Async fetch
+                                        async with httpx.AsyncClient(timeout=20) as client:
+                                            resp = await client.get(f["url"])
+                                            resp.raise_for_status()
+                                            file_bytes = resp.content
+                                        
+                                        size_bytes = len(file_bytes) if file_bytes is not None else None
+                                        max_base64_size = 20 * 1024 * 1024  # 20MB
+                                        
+                                        if size_bytes and size_bytes < max_base64_size:
+                                            # Small file from URL - convert to base64
+                                            if isinstance(mime_type, str) and mime_type.startswith("image/"):
+                                                base64_data = base64.b64encode(file_bytes).decode()
+                                                image_url = f"data:{mime_type};base64,{base64_data}"
+                                                
+                                                content_parts.append({
+                                                    "type": "image_url",
+                                                    "image_url": {
+                                                        "url": image_url,
+                                                        "detail": "high"
+                                                    }
+                                                })
+                                                
+                                                logger.info("url_file_converted_to_base64", extra={
+                                                    "agent_id": self.agent_id,
+                                                    "file_name": filename,
+                                                    "mime_type": mime_type,
+                                                    "size_bytes": size_bytes
+                                                })
+                                            else:
+                                                logger.info("non_image_url_file_skipped", extra={
+                                                    "agent_id": self.agent_id,
+                                                    "file_name": filename,
+                                                    "mime_type": mime_type
+                                                })
+                                        else:
+                                            # Large file from URL - use Gemini file upload
+                                            # Prefer API key from project LLM config; fallback to env var
+                                            try:
+                                                resolver = LLMConfigResolver(shared_llm_configs=self.run_context.get("config", {}).get("shared_llm_configs_ref", {}))
+                                                llm_config = resolver.resolve(self.profile)
+                                            except Exception:
+                                                llm_config = {}
+                                            gemini_key = (
+                                                (llm_config.get("api_key") if isinstance(llm_config, dict) else None)
+                                                or os.getenv("GEMINI_API_KEY")
+                                            )
+                                            if not gemini_key:
+                                                logger.error(
+                                                    "gemini_api_key_missing",
+                                                    extra={
+                                                        "agent_id": self.agent_id,
+                                                        "hint": "Provide api_key in active LLM config or set GEMINI_API_KEY env var"
+                                                    }
+                                                )
+                                                continue
+
+                                            # Structured start log
+                                            logger.info("gemini_file_upload_start", extra={
+                                                "agent_id": self.agent_id,
+                                                "file_name": filename,
+                                                "mime_type": mime_type,
+                                                "size_bytes": size_bytes,
+                                                "reason": "file_too_large_for_base64"
+                                            })
+                                            t0 = time.perf_counter()
+
+                                            # Offload blocking create_file to a thread
+                                            created = await asyncio.to_thread(
+                                                create_file,
+                                                file=file_bytes,
+                                                purpose="user_data",
+                                                custom_llm_provider="gemini",
+                                                api_key=gemini_key,
+                                            )
+                                            file_id = getattr(created, "id", None) if created is not None else None
+                                            if not file_id:
+                                                logger.error("gemini_file_upload_failed", extra={
+                                                    "file_name": filename,
+                                                    "mime_type": mime_type,
+                                                    "size_bytes": size_bytes,
+                                                    "duration_ms": int((time.perf_counter() - t0) * 1000),
+                                                })
+                                                continue
+                                            else:
+                                                logger.info("gemini_file_upload_success", extra={
+                                                    "agent_id": self.agent_id,
+                                                    "file_name": filename,
+                                                    "mime_type": mime_type,
+                                                    "size_bytes": size_bytes,
+                                                    "file_id": file_id,
+                                                    "duration_ms": int((time.perf_counter() - t0) * 1000),
+                                                })
+
+                                            # Append file reference content part for large files
+                                            content_parts.append({
+                                                "type": "file",
+                                                "file": {
+                                                    "file_id": file_id,
+                                                    "filename": filename,
+                                                    "format": mime_type
+                                                }
+                                            })
+                            except Exception as ex:
+                                logger.error("file_processing_failed", extra={"error": str(ex)}, exc_info=True)
+                
+                if has_multimodal_content:
+                    new_message = {"role": role, "content": content_parts}
+                else:
+                    new_message = {"role": role, "content": injected_content}
 
                 # If this message comes from the startup briefing, add an internal flag
                 # to prevent it from being handed over again in the future.
diff --git a/core/agent_core/nodes/base_agent_node.py b/core/agent_core/nodes/base_agent_node.py
index fab2101..138cf07 100644
--- a/core/agent_core/nodes/base_agent_node.py
+++ b/core/agent_core/nodes/base_agent_node.py
@@ -931,9 +931,13 @@ def _clean_messages_for_llm(self, messages: List[Dict]) -> List[Dict]:
                 if key in msg:
                     value = msg[key]
                     
-                    # Ensure content is a string
+                    # Handle content based on type - preserve multimodal structure
                     if key == "content":
-                        if isinstance(value, dict):
+                        if isinstance(value, list):
+                            # Multimodal content (list of parts) - preserve structure for LLM
+                            cleaned_msg[key] = value
+                            logger.debug("multimodal_content_preserved", extra={"message_role": msg.get('role'), "parts_count": len(value)})
+                        elif isinstance(value, dict):
                             # If content is a dictionary, convert it to a JSON string
                             import json
                             cleaned_msg[key] = json.dumps(value, ensure_ascii=False)
@@ -952,7 +956,53 @@ def _clean_messages_for_llm(self, messages: List[Dict]) -> List[Dict]:
             
             cleaned_messages.append(cleaned_msg)
         
+        # Validate message sequence for OpenAI API compliance
+        cleaned_messages = self._validate_message_sequence(cleaned_messages)
+        
         return cleaned_messages
+    
+    def _validate_message_sequence(self, messages: List[Dict]) -> List[Dict]:
+        """Validate and fix message sequence to ensure compliance with OpenAI API requirements."""
+        if not messages:
+            return messages
+            
+        # Check for invalid sequences and log warnings
+        for i in range(1, len(messages)):
+            prev_msg = messages[i-1]
+            curr_msg = messages[i]
+            
+            prev_role = prev_msg.get("role")
+            curr_role = curr_msg.get("role")
+            has_tool_calls = bool(curr_msg.get("tool_calls"))
+            
+            # Check for invalid function call sequences
+            if has_tool_calls and curr_role == "assistant":
+                # Function calls should come after user messages or tool responses
+                if prev_role not in ["user", "tool"]:
+                    logger.warning("invalid_function_call_sequence", extra={
+                        "agent_id": self.agent_id,
+                        "prev_role": prev_role,
+                        "curr_role": curr_role,
+                        "has_tool_calls": has_tool_calls,
+                        "position": i,
+                        "fix": "This may cause OpenAI API 400 errors"
+                    })
+            
+            # Check for consecutive assistant messages without tool interaction
+            if prev_role == "assistant" and curr_role == "assistant":
+                prev_has_tools = bool(prev_msg.get("tool_calls"))
+                curr_is_tool_response = bool(curr_msg.get("tool_call_id"))
+                
+                if not prev_has_tools and not curr_is_tool_response:
+                    logger.warning("consecutive_assistant_messages", extra={
+                        "agent_id": self.agent_id,
+                        "position": i,
+                        "prev_has_tools": prev_has_tools,
+                        "curr_is_tool_response": curr_is_tool_response,
+                        "fix": "This may cause message sequence issues"
+                    })
+        
+        return messages
 
     def _finalize_dangling_tool_in_turn(self, context: Dict):
         """
diff --git a/core/agent_core/nodes/mcp_proxy_node.py b/core/agent_core/nodes/mcp_proxy_node.py
index 8c1487a..bc64e7a 100644
--- a/core/agent_core/nodes/mcp_proxy_node.py
+++ b/core/agent_core/nodes/mcp_proxy_node.py
@@ -2,6 +2,7 @@
 import asyncio
 import anyio
 from typing import Dict, Any
+from pathlib import Path
 
 # Import the new base class
 from .base_tool_node import BaseToolNode
@@ -46,6 +47,38 @@ async def exec_async(self, prep_res: Dict) -> Dict[str, Any]:
             logger.error("mcp_proxy_session_group_not_found", extra={"unique_tool_name": self.unique_tool_name})
             return {"status": "error", "error_message": error_msg}
 
+        # 2.5 Normalize Windows paths in common parameters to avoid relative-path issues
+        def _normalize_paths_in_params(params: Dict[str, Any], tool_info: Dict[str, Any]) -> None:
+            if not isinstance(params, dict) or not isinstance(tool_info, dict):
+                return
+
+            properties = tool_info.get("parameters", {}).get("properties", {})
+            for param_name, schema in properties.items():
+                # Heuristic: Find string params that look like paths
+                is_path_like = (
+                    schema.get("type") == "string" and 
+                    ("path" in param_name.lower() or schema.get("format") == "uri-reference")
+                )
+
+                if is_path_like:
+                    value = params.get(param_name)
+                    if isinstance(value, str) and value.strip():
+                        p = Path(value)
+                        if not p.is_absolute():
+                            abs_p = (Path.cwd() / p).resolve()
+                            params[param_name] = str(abs_p)
+                            try:
+                                logger.debug("mcp_param_path_normalized", extra={
+                                    "unique_tool_name": self.unique_tool_name, 
+                                    "param": param_name, 
+                                    "original_path": value,
+                                    "abs_path": str(abs_p)
+                                })
+                            except Exception:
+                                pass
+
+        _normalize_paths_in_params(tool_params, self._tool_info)
+
         # 3. Execute the original business logic
         logger.info("mcp_proxy_tool_call_begin", extra={"unique_tool_name": self.unique_tool_name})
         
diff --git a/core/api/message_handlers.py b/core/api/message_handlers.py
index 1aed78a..81ce1c9 100644
--- a/core/api/message_handlers.py
+++ b/core/api/message_handlers.py
@@ -834,7 +834,7 @@ async def handle_manage_work_modules_request(ws_state: Dict, data: Dict):
 async def handle_send_to_run_message(ws_state: Dict, data: Dict):
     """
     Handles 'send_to_run' messages, routing client messages to the specified active business run.
-    This function is now also responsible for "activating" runs that are in the CREATED state.
+    Also handles multimodal payloads (files), and is responsible for activating runs in CREATED state.
     """
     event_manager = ws_state.event_manager
     session_id_for_log = event_manager.session_id
@@ -843,8 +843,17 @@ async def handle_send_to_run_message(ws_state: Dict, data: Dict):
     run_id_var.set(target_run_id)  # Set context variable
     message_payload = data.get("message_payload")
     extra_payload = data.get("extra_payload")
-
-    logger.info("send_to_run_received", extra={"session_id": session_id_for_log, "target_run_id": target_run_id, "message_preview": str(message_payload)[:100]})
+    files_content = (message_payload or {}).get("files", [])
+
+    logger.info(
+        "send_to_run_received",
+        extra={
+            "session_id": session_id_for_log,
+            "target_run_id": target_run_id,
+            "message_preview": str(message_payload)[:100],
+            "has_files": bool(files_content),
+        },
+    )
 
     if not target_run_id or message_payload is None:
         err_msg = "'send_to_run' requires 'run_id' and 'message_payload'."
@@ -868,10 +877,12 @@ async def handle_send_to_run_message(ws_state: Dict, data: Dict):
         if run_status == 'CREATED':
             logger.debug("run_activation_started", extra={"run_id": target_run_id, "run_type": run_type})
             
-            if prompt_content is None:
-                raise ValueError("First message to a new run must contain a 'prompt'.")
+            # Allow activation if there is either text or multimodal content
+            if (prompt_content is None) and (not files_content):
+                raise ValueError("First message must contain either 'prompt' text or attachments (files).")
             
-            run_context['team_state']['question'] = prompt_content
+            # If no text but attachments exist, use empty string to initialize question
+            run_context['team_state']['question'] = prompt_content or ""
             
             task = None
             if run_type == "partner_interaction":
@@ -879,10 +890,17 @@ async def handle_send_to_run_message(ws_state: Dict, data: Dict):
                 team_state = run_context['team_state']
                 partner_state = partner_context['state']
 
+                # Construct payload, using only the unified 'files' multimodal path
+                payload = {"prompt": prompt_content}
+                if files_content:
+                    payload["files"] = files_content
+                
                 inbox_item = {
                     "item_id": f"inbox_{uuid.uuid4().hex[:8]}",
-                    "source": "USER_PROMPT", # Use standardized event source
-                    "payload": {"prompt": prompt_content},
+                    "source": (
+                        "USER_PROMPT_WITH_FILES" if files_content else "USER_PROMPT"
+                    ), # Standardized source with multimodal marker when applicable
+                    "payload": payload,
                     "consumption_policy": "consume_on_read",
                     "metadata": {"created_at": datetime.now(timezone.utc).isoformat()}
                 }
@@ -912,19 +930,24 @@ async def handle_send_to_run_message(ws_state: Dict, data: Dict):
 
         # --- Branch 2: Send a message to a running session ---
         elif run_status in ['RUNNING', 'AWAITING_INPUT']:
-            if prompt_content is None:
-                raise ValueError("Message payload must contain a 'prompt'.")
+            # Allow sending if text or attachments exist
+            if (prompt_content is None) and (not files_content):
+                raise ValueError("Message must contain either 'prompt' text or attachments (files).")
 
             if run_type == "partner_interaction":
                 partner_context = run_context['sub_context_refs']['_partner_context_ref']
                 partner_state = partner_context['state']
                 team_state = run_context['team_state']
 
-                # --- Core modification: Similarly, only create an InboxItem ---
+                # --- Core modification: Create an InboxItem with multimodal info (text and/or files) ---
+                payload = {"prompt": prompt_content}
+                if files_content:
+                    payload["files"] = files_content
+                
                 inbox_item = {
                     "item_id": f"inbox_{uuid.uuid4().hex[:8]}",
-                    "source": "USER_PROMPT",
-                    "payload": {"prompt": prompt_content},
+                    "source": ("USER_PROMPT_WITH_FILES" if files_content else "USER_PROMPT"),
+                    "payload": payload,
                     "consumption_policy": "consume_on_read",
                     "metadata": {"created_at": datetime.now(timezone.utc).isoformat()}
                 }
@@ -949,12 +972,13 @@ async def handle_send_to_run_message(ws_state: Dict, data: Dict):
         logger.error("send_to_run_processing_error", extra={"session_id": session_id_for_log, "target_run_id": target_run_id, "run_type": run_type, "error_message": str(e)}, exc_info=True)
         await event_manager.emit_error(run_id=target_run_id, agent_id="System", error_message=f"Error processing message for run {target_run_id}: {str(e)}")
 
+
 # --- MESSAGE_HANDLERS registry (Dango's version, with adapted function names) ---
 MESSAGE_HANDLERS: Dict[str, callable] = {
     "start_run": handle_start_run_message,
     "stop_run": handle_stop_run_message,
     "request_available_toolsets": handle_request_available_toolsets,
-    "send_to_run": handle_send_to_run_message, # Added by Dango, adapted
+    "send_to_run": handle_send_to_run_message, # Unified handler
     "stop_managed_principal": handle_stop_managed_principal_message, # Added by Dango, adapted
     "request_run_profiles": handle_request_run_profiles_message, # Added by Dango, adapted
     "request_run_context": handle_request_run_context_message, # Added by Dango, adapted
diff --git a/core/pyproject.toml b/core/pyproject.toml
index bce457b..fbdad9c 100644
--- a/core/pyproject.toml
+++ b/core/pyproject.toml
@@ -14,6 +14,7 @@ dependencies = [
     "python-dotenv>=1.0.0",
     "python-json-logger>=2.0.7",
     "requests>=2.28.0",
+    "httpx>=0.24.0",
     "markdown>=3.4.0",
     "coolname>=1.1.0",
     "numpy>=2",
diff --git a/frontend/app/chat/components/ChatInput.tsx b/frontend/app/chat/components/ChatInput.tsx
index b0747e3..7b308cd 100644
--- a/frontend/app/chat/components/ChatInput.tsx
+++ b/frontend/app/chat/components/ChatInput.tsx
@@ -1,12 +1,23 @@
-import React from 'react';
+import React, { useState, useRef, useCallback, useEffect } from 'react';
 import { Button } from '@/components/ui/button';
 import { Input } from '@/components/ui/input';
+import { X, Paperclip, Music, Video, FileText } from 'lucide-react';
+
+
+interface FileAttachment {
+  id: string;
+  file: File;
+  dataUrl: string;
+  name: string;
+  mimeType?: string;
+  kind?: 'image' | 'audio' | 'video' | 'document';
+}
 
 interface ChatInputProps {
   currentInput: string;
   onInputChange: (value: string) => void;
   onKeyPress: (e: React.KeyboardEvent) => void;
-  onSendMessage: () => void;
+  onSendMessage: (files?: FileAttachment[]) => void;
   isStreaming: boolean;
   isLoading: boolean;
   onStopExecution: () => void;
@@ -21,35 +32,279 @@ export function ChatInput({
   isLoading,
   onStopExecution,
 }: ChatInputProps) {
+  const [files, setFiles] = useState<FileAttachment[]>([]);
+  const fileInputRef = useRef<HTMLInputElement>(null);
+  const inputRef = useRef<HTMLInputElement>(null);
+  const [errorMsg, setErrorMsg] = useState<string | null>(null);
+  const errorTimerRef = useRef<number | null>(null);
+
+  const showError = useCallback((msg: string) => {
+    setErrorMsg(msg);
+    if (errorTimerRef.current) {
+      window.clearTimeout(errorTimerRef.current);
+    }
+    errorTimerRef.current = window.setTimeout(() => {
+      setErrorMsg(null);
+      errorTimerRef.current = null;
+    }, 3000);
+  }, []);
+
+  // Convert File to base64 data URL (for backward compatibility on send)
+  const fileToDataUrl = useCallback((file: File) => {
+    return new Promise<string>((resolve, reject) => {
+      const reader = new FileReader();
+      reader.onload = () => resolve(reader.result as string);
+      reader.onerror = reject;
+      reader.readAsDataURL(file);
+    });
+  }, []);
+
+  useEffect(() => {
+    return () => {
+      if (errorTimerRef.current) {
+        window.clearTimeout(errorTimerRef.current);
+      }
+      // Revoke any remaining object URLs on unmount to prevent memory leaks
+      try {
+        files.forEach(file => {
+          if (file.kind === 'image' && file.dataUrl) {
+            URL.revokeObjectURL(file.dataUrl);
+          }
+        });
+      } catch {}
+    };
+  }, [files]);
+
+  // 粘贴事件处理在下方定义，以确保依赖的 addFile 已声明
+
+  // Add file (image/audio/video/document)
+  const addFile = useCallback((file: File) => {
+    const isImage = file.type.startsWith('image/');
+    const isAudio = file.type.startsWith('audio/');
+    const isVideo = file.type.startsWith('video/');
+  // Paste event handler is defined below to ensure addFile is already declared
+
+  // Add file (image/audio/video/document)
+  const addFile = useCallback((file: File) => {
+    const isImage = file.type.startsWith('image/');
+    const isAudio = file.type.startsWith('audio/');
+    const isVideo = file.type.startsWith('video/');
+    // Document type: application/* or text/*, or fallback by extension
+    const ext = file.name.split('.').pop()?.toLowerCase() || '';
+    const docExts = new Set(['pdf','doc','docx','xls','xlsx','ppt','pptx','txt','rtf','md','csv']);
+    const isDocByMime = file.type.startsWith('application/') || file.type.startsWith('text/');
+    const isDocument = (!isImage && !isAudio && !isVideo) && (isDocByMime || docExts.has(ext));
+
+    if (!isImage && !isAudio && !isVideo && !isDocument) {
+      showError('Unsupported file type. Allowed: images, audio, video, documents.');
+      return;
+    }
+
+    // 允许超过 20MB 的文件添加；
+    // 小于 20MB 的所有文件（图片/音频/视频/文本/文档）在发送时统一转换为 dataUrl，
+    // Allow files larger than 20MB to be added;
+    // All files smaller than 20MB (images/audio/video/text/documents) will be converted to dataUrl when sending,
+    // Files 20MB or larger will be sent to the backend for upload as files.
+
+    // Generate Blob URL preview for images; for audio, video, and documents, only keep the file and name
+    const objectUrl = isImage ? URL.createObjectURL(file) : '';
+    const newAttachment: FileAttachment = {
+      id: crypto.randomUUID(),
+      file,
+      dataUrl: objectUrl,
+      name: file.name,
+      mimeType: file.type,
+      kind: isImage ? 'image' : isAudio ? 'audio' : isVideo ? 'video' : 'document',
+    };
+    setFiles(prev => [...prev, newAttachment]);
+  }, [showError]);
+
+  // Handle paste event (only supports images)
+  const handlePaste = useCallback((e: React.ClipboardEvent) => {
+    const items = e.clipboardData?.items;
+    if (!items) return;
+
+    for (let i = 0; i < items.length; i++) {
+      const item = items[i];
+      if (item.type.startsWith('image/')) {
+        e.preventDefault();
+        const file = item.getAsFile();
+        if (file) {
+          addFile(file);
+        }
+        break;
+      }
+    }
+  }, [addFile]);
+
+  // Handle file selection
+  const handleFileSelect = useCallback((e: React.ChangeEvent<HTMLInputElement>) => {
+    const files = e.target.files;
+    if (files) {
+      Array.from(files).forEach(addFile);
+    }
+    // Clear the input value to allow selecting the same file again
+    if (fileInputRef.current) {
+      fileInputRef.current.value = '';
+    }
+  }, [addFile]);
+
+  // Remove image
+  const removeFile = useCallback((id: string) => {
+    setFiles(prev => {
+      const target = prev.find(img => img.id === id);
+      if (target) {
+        try { if (target.kind === 'image' && target.dataUrl) { URL.revokeObjectURL(target.dataUrl); } } catch {}
+      }
+      return prev.filter(img => img.id !== id);
+    });
+  }, []);
+
+  // Handle sending message
+  const handleSendMessage = useCallback(async () => {
+    if (files.length > 0) {
+      // Prefer file; convert to base64 dataUrl for all files <20MB (image/audio/video/text/document)
+      const converted = await Promise.all(files.map(async (img) => {
+        const kind = img.kind ?? 'image';
+        const isImage = kind === 'image';
+        const isAudio = kind === 'audio';
+        const isVideo = kind === 'video';
+        const isDocument = kind === 'document';
+        const isTextMime = (img.mimeType ?? '').startsWith('text/');
+        const shouldConvert = (isImage || isAudio || isVideo || isTextMime || isDocument) && img.file.size < 20 * 1024 * 1024;
+        return {
+          ...img,
+          dataUrl: shouldConvert ? await fileToDataUrl(img.file) : '',
+        };
+      }));
+      onSendMessage(converted);
+    } else {
+      onSendMessage(undefined);
+    }
+    // Revoke all object URLs and clear after sending
+    files.forEach(file => {
+      try { URL.revokeObjectURL(file.dataUrl); } catch {}
+    });
+    setFiles([]);
+  }, [onSendMessage, files, fileToDataUrl]);
+
+  // Handle keyboard events
+  const handleKeyPress = useCallback((e: React.KeyboardEvent) => {
+    if (e.key === 'Enter' && !e.shiftKey) {
+      e.preventDefault();
+      if ((currentInput.trim() || files.length > 0) && !isStreaming && !isLoading) {
+        handleSendMessage();
+      }
+    } else {
+      onKeyPress(e);
+    }
+  }, [currentInput, files, isStreaming, isLoading, handleSendMessage, onKeyPress]);
+
   return (
     <div className="p-3">
-      <div className="flex gap-2 bg-white rounded-lg border overflow-hidden p-2 focus-within:border-black transition-colors">
+      {/* Attachment preview area */}
+      {files.length > 0 && (
+        <div className="mb-3 p-3 bg-gray-50 rounded-lg">
+          <div className="flex flex-wrap gap-2">
+            {files.map((file) => (
+              <div key={file.id} className="relative group">
+                {file.kind === 'image' && file.dataUrl ? (
+                  <img
+                    src={file.dataUrl}
+                    alt={file.name}
+                    className="w-20 h-20 object-cover rounded border"
+                  />
+                ) : (
+                  <div className="w-20 h-20 rounded border bg-white flex items-center justify-center">
+                    {file.kind === 'audio' ? <Music size={18} /> : file.kind === 'video' ? <Video size={18} /> : <FileText size={18} />}
+                  </div>
+                )}
+                <button
+                  onClick={() => removeFile(file.id)}
+                  className="absolute -top-1 -right-1 w-5 h-5 bg-red-500 text-white rounded-full flex items-center justify-center opacity-0 group-hover:opacity-100 transition-opacity"
+                  aria-label="Remove attachment"
+                >
+                  <X size={12} />
+                </button>
+                <div className="absolute bottom-0 left-0 right-0 bg-black bg-opacity-50 text-white text-xs p-1 rounded-b truncate">
+                  {file.name}
+                </div>
+              </div>
+            ))}
+          </div>
+        </div>
+      )}
+
+      {errorMsg && (
+        <div className="mb-2 text-sm text-red-600 bg-red-50 border border-red-200 rounded px-2 py-1">
+          {errorMsg}
+        </div>
+      )}
+
+      <div
+        className="flex gap-2 bg-white rounded-lg border overflow-hidden p-2 focus-within:border-black transition-colors"
+        onDragOver={(e) => { e.preventDefault(); e.stopPropagation(); }}
+        onDrop={(e) => {
+          e.preventDefault();
+          e.stopPropagation();
+          const files = Array.from(e.dataTransfer.files || []);
+          files.forEach((f) => addFile(f));
+        }}
+      >
+        {/* File upload button */}
+        <Button
+          variant="ghost"
+          size="icon"
+          onClick={() => fileInputRef.current?.click()}
+          disabled={isStreaming || isLoading}
+          className="flex-shrink-0"
+          aria-label="Attach files"
+        >
+          <Paperclip size={16} />
+        </Button>
+        
         <Input
+          ref={inputRef}
           value={currentInput}
           onChange={(e) => onInputChange(e.target.value)}
-          onKeyPress={onKeyPress}
-          placeholder="Enter message..."
+          onKeyPress={handleKeyPress}
+          onPaste={handlePaste}
+          placeholder="Enter message, paste or drag-and-drop files (images/audio/video/docs)..."
           disabled={isStreaming || isLoading}
           className="flex-1 border-0 focus-visible:ring-0 focus-visible:ring-offset-0 shadow-none px-2"
         />
+        
         {isStreaming ? (
           <Button
             variant="ghost"
             size="icon"
-            className="rounded-full bg-black hover:bg-black/90 !px-2 !py-1"
+            className="rounded-full bg-black hover:bg-black/90 !px-2 !py-1 flex-shrink-0"
             onClick={onStopExecution}
+            aria-label="Stop streaming"
           >
             <div className="w-3 h-3 bg-white" />
           </Button>
         ) : (
           <Button 
-            onClick={onSendMessage}
-            disabled={!currentInput.trim() || isLoading}
+            onClick={handleSendMessage}
+            disabled={(!currentInput.trim() && files.length === 0) || isLoading}
+            className="flex-shrink-0"
+            aria-label="Send message"
           >
             Send
           </Button>
         )}
       </div>
+      
+      {/* Hidden file input */}
+      <input
+        ref={fileInputRef}
+        type="file"
+        accept="image/*,audio/*,video/*,.pdf,.doc,.docx,.xls,.xlsx,.ppt,.pptx,.txt,.rtf,.md,.csv"
+        multiple
+        onChange={handleFileSelect}
+        className="hidden"
+      />
     </div>
   );
 }
diff --git a/frontend/app/chat/components/ProjectPage.tsx b/frontend/app/chat/components/ProjectPage.tsx
index effa802..c12e7d8 100644
--- a/frontend/app/chat/components/ProjectPage.tsx
+++ b/frontend/app/chat/components/ProjectPage.tsx
@@ -1,7 +1,6 @@
 import React, { useState, useEffect } from 'react';
 import { observer } from 'mobx-react-lite';
 import { Button } from '@/components/ui/button';
-import { Textarea } from '@/components/ui/textarea';
 import { Input } from '@/components/ui/input';
 import { SidebarTrigger } from '@/components/ui/sidebar';
 import {
@@ -22,11 +21,12 @@ import { projectStore } from '@/app/stores/projectStore';
 import { selectionStore } from '@/app/stores/selectionStore';
 import LoadingSpinner from '@/components/layout/LoadingSpinner';
 import { ProjectWithRuns } from '@/lib/types';
+import { ChatInput } from './ChatInput';
 
 interface ProjectPageProps {
   currentInput: string;
   onInputChange: (value: string) => void;
-  onSendMessage: () => void;
+  onSendMessage: (files?: any[]) => void;
   onKeyPress: (e: React.KeyboardEvent) => void;
   isLoading: boolean;
 }
@@ -285,22 +285,15 @@ export const ProjectPage = observer(function ProjectPage({
             {/* Chat Input Section */}
             <div>
               <h2 className="text-lg font-semibold mb-3">What can I help you?</h2>
-              <div className="relative">
-                <Textarea
-                  value={currentInput}
-                  onChange={(e) => onInputChange(e.target.value)}
-                  onKeyPress={onKeyPress}
-                  placeholder="Enter message..."
-                  className="min-h-[120px] resize-none w-full rounded-lg border p-4 pr-24 focus-visible:ring-1 focus-visible:ring-black"
-                />
-                <Button
-                  onClick={onSendMessage}
-                  disabled={!currentInput.trim() || isLoading}
-                  className="absolute right-3 bottom-3 bg-black hover:bg-black/90"
-                >
-                  Send
-                </Button>
-              </div>
+              <ChatInput
+                currentInput={currentInput}
+                onInputChange={onInputChange}
+                onKeyPress={onKeyPress}
+                onSendMessage={onSendMessage}
+                isStreaming={false}
+                isLoading={isLoading}
+                onStopExecution={() => {}}
+              />
             </div>
 
             {/* Project Instructions Section */}
diff --git a/frontend/app/chat/components/WelcomeScreen.tsx b/frontend/app/chat/components/WelcomeScreen.tsx
index ebad82b..d1fce58 100644
--- a/frontend/app/chat/components/WelcomeScreen.tsx
+++ b/frontend/app/chat/components/WelcomeScreen.tsx
@@ -1,14 +1,13 @@
 import React from 'react';
 import { observer } from 'mobx-react-lite';
-import { Button } from '@/components/ui/button';
-import { Textarea } from '@/components/ui/textarea';
 import { SidebarTrigger } from '@/components/ui/sidebar';
 import { selectionStore } from '@/app/stores/selectionStore';
+import { ChatInput } from './ChatInput';
 
 interface WelcomeScreenProps {
   currentInput: string;
   onInputChange: (value: string) => void;
-  onSendMessage: () => void;
+  onSendMessage: (files?: any[]) => void;
   onKeyPress: (e: React.KeyboardEvent) => void;
   isLoading: boolean;
 }
@@ -28,23 +27,16 @@ export const WelcomeScreen = observer(function WelcomeScreen({ currentInput, onI
       </div>
       <div className="flex-1 flex flex-col items-center justify-center">
         <h1 className="text-[32px] font-medium mb-12">What can I help you with?</h1>
-        <div className="w-[600px] relative mb-12">
-          <div className="rounded-lg border focus-within:border-black overflow-hidden transition-colors">
-            <Textarea
-              value={currentInput}
-              onChange={(e) => onInputChange(e.target.value)}
-              onKeyPress={onKeyPress}
-              placeholder="Enter message..."
-              className="min-h-[150px] resize-none w-full border-0 shadow-none focus-visible:ring-0 focus-visible:ring-offset-0"
-            />
-          </div>
-          <Button 
-            onClick={onSendMessage}
-            disabled={!currentInput.trim() || isLoading}
-            className="absolute right-4 bottom-4 bg-black hover:bg-black/90 "
-          >
-            Send
-          </Button>
+        <div className="w-[600px] mb-12">
+          <ChatInput
+            currentInput={currentInput}
+            onInputChange={onInputChange}
+            onKeyPress={onKeyPress}
+            onSendMessage={onSendMessage}
+            isStreaming={false}
+            isLoading={isLoading}
+            onStopExecution={() => {}}
+          />
         </div>
         {/* <div className="flex flex-col items-start justify-center">
           <p className="font-medium text-lg">From the Community</p>
diff --git a/frontend/app/chat/components/details/TurnBubble.tsx b/frontend/app/chat/components/details/TurnBubble.tsx
index 5dc83dd..6646adc 100644
--- a/frontend/app/chat/components/details/TurnBubble.tsx
+++ b/frontend/app/chat/components/details/TurnBubble.tsx
@@ -177,6 +177,39 @@ export const TurnBubble = observer(({ turn, isHighlighted = false, onNodeIdClick
               {displayContent && (
                 <ReactMarkdown remarkPlugins={[remarkGfm]}>{displayContent}</ReactMarkdown>
               )}
+              
+              {/* Display uploaded images for user messages */}
+              {isUserTurn && turn.inputs?.files && turn.inputs.files.length > 0 && (
+                <div className="mt-3 space-y-2">
+                  <div className="grid grid-cols-1 sm:grid-cols-2 gap-2">
+                    {turn.inputs.files.map((file, index) => (
+                      <div key={index} className="relative">
+                        {file.mimeType.startsWith('image/') ? (
+                          <div className="relative">
+                            <img
+                              src={file.data.startsWith('data:') ? file.data : `data:${file.mimeType};base64,${file.data}`}
+                              alt={file.name}
+                              className="max-w-full h-auto rounded-lg border border-gray-200 shadow-sm hover:shadow-md transition-shadow"
+                              style={{ maxHeight: '200px' }}
+                            />
+                            <div className="absolute bottom-0 left-0 right-0 bg-black bg-opacity-50 text-white text-xs px-2 py-1 rounded-b-lg truncate">
+                              {file.name}
+                            </div>
+                          </div>
+                        ) : (
+                          <div className="flex items-center p-3 border border-gray-200 rounded-lg bg-gray-50">
+                            <div className="flex-1">
+                              <div className="text-sm font-medium text-gray-900 truncate">{file.name}</div>
+                              <div className="text-xs text-gray-500">{file.mimeType}</div>
+                            </div>
+                          </div>
+                        )}
+                      </div>
+                    ))}
+                  </div>
+                </div>
+              )}
+              
               {turn.tool_interactions?.map((interaction) => (
                 <ToolInteraction key={interaction.tool_call_id} interaction={interaction} />
               ))}
diff --git a/frontend/app/chat/page.tsx b/frontend/app/chat/page.tsx
index 6bf7aa1..84f779c 100644
--- a/frontend/app/chat/page.tsx
+++ b/frontend/app/chat/page.tsx
@@ -30,8 +30,8 @@ export default observer(function ChatPage() {
     selectionStore.clearSelection();
   }, []);
 
-  const sendMessage = useCallback(async () => {
-    if (!store.currentInput.trim() || store.isLoading || store.isCreatingRun) return;
+  const sendMessage = useCallback(async (files?: any[]) => {
+    if ((!store.currentInput.trim() && !files?.length) || store.isLoading || store.isCreatingRun) return;
     
     // Close the sidebar (if on mobile or first message)
     if (sidebarContext) {
@@ -55,7 +55,7 @@ export default observer(function ChatPage() {
         
         // Send message
         store.setIsLoading(true);
-        await sessionStore.sendMessage(messageToSend, newRunId);
+        await sessionStore.sendMessage(messageToSend, newRunId, files);
 
         // Redirect to the newly created run page
         router.push(`/r?id=${newRunId}`);
diff --git a/frontend/app/chat/types/conversation.ts b/frontend/app/chat/types/conversation.ts
index 6106f9c..6ad00ab 100644
--- a/frontend/app/chat/types/conversation.ts
+++ b/frontend/app/chat/types/conversation.ts
@@ -11,6 +11,11 @@ export interface Turn {
   end_time?: string | null;
   inputs?: {
     prompt?: string;
+    files?: Array<{
+      data: string;
+      name: string;
+      mimeType: string;
+    }>;
   } | null;
   llm_interaction?: {
     status?: 'running' | 'completed' | 'error';
diff --git a/frontend/app/stores/sessionStore.ts b/frontend/app/stores/sessionStore.ts
index 9d402d3..54acc80 100644
--- a/frontend/app/stores/sessionStore.ts
+++ b/frontend/app/stores/sessionStore.ts
@@ -888,22 +888,52 @@ class SessionStore {
     }
   }
 
-  async sendMessage(message: string, runId: string) {
+  async sendMessage(message: string, runId: string, files?: any[]) {
     if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
         console.error("Cannot send message, WebSocket is not open.");
         this.error = "Connection failed. Cannot send message.";
         return;
     }
 
+    // 处理附件为统一的 files 数组（base64 data URL 或纯 base64）
+    const filesData = files ? await Promise.all(
+      files.map(async (f) => {
+        const base64 = await this.fileToBase64(f.file);
+        return {
+          data: base64,
+          name: f.name,
+          mimeType: f.file.type,
+        } as { data: string; name: string; mimeType: string };
+      })
+    ) : [];
+
+    const messagePayload: any = { prompt: message };
+    if (filesData.length > 0) {
+      messagePayload.files = filesData;
+    }
+
     this.ws.send(JSON.stringify({
         type: 'send_to_run',
         data: {
             run_id: runId,
-            message_payload: { prompt: message }
+            message_payload: messagePayload
         }
     }));
   }
 
+  private fileToBase64(file: File): Promise<string> {
+    return new Promise((resolve, reject) => {
+      const reader = new FileReader();
+      reader.onload = () => {
+        const result = reader.result as string;
+        const base64Data = result.split(',')[1];
+        resolve(base64Data);
+      };
+      reader.onerror = reject;
+      reader.readAsDataURL(file);
+    });
+  }
+
   async stopExecution(runId: string) {
     if (this.ws?.readyState === WebSocket.OPEN) {
       this.ws.send(JSON.stringify({