HKUDS
diff --git a/‎config/mcp_tool_definitions.py‎
Lines changed: 3 additions & 3 deletions b/‎config/mcp_tool_definitions.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎config/mcp_tool_definitions_index.py‎
Lines changed: 10 additions & 10 deletions b/‎config/mcp_tool_definitions_index.py‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎prompts/code_prompts.py‎
Lines changed: 34 additions & 55 deletions b/‎prompts/code_prompts.py‎
Lines changed: 34 additions & 55 deletions
diff --git a/‎tools/code_indexer.py‎
Lines changed: 1 addition & 31 deletions b/‎tools/code_indexer.py‎
Lines changed: 1 addition & 31 deletions
diff --git a/‎tools/pdf_downloader.py‎
Lines changed: 22 additions & 0 deletions b/‎tools/pdf_downloader.py‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎utils/file_processor.py‎
Lines changed: 15 additions & 0 deletions b/‎utils/file_processor.py‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎utils/llm_utils.py‎
Lines changed: 34 additions & 0 deletions b/‎utils/llm_utils.py‎
Lines changed: 34 additions & 0 deletions
@@ -25,13 +25,13 @@ def get_code_implementation_tools() -> List[Dict[str, Any]]:
         Get tool definitions for code implementation
         """
         return [
-            MCPToolDefinitions._get_read_file_tool(),
+            # MCPToolDefinitions._get_read_file_tool(),
             # MCPToolDefinitions._get_read_multiple_files_tool(),
-            MCPToolDefinitions._get_read_code_mem_tool(),
+            # MCPToolDefinitions._get_read_code_mem_tool(),
             MCPToolDefinitions._get_write_file_tool(),
             # MCPToolDefinitions._get_write_multiple_files_tool(),
             # MCPToolDefinitions._get_execute_python_tool(),
-            MCPToolDefinitions._get_execute_bash_tool(),
+            # MCPToolDefinitions._get_execute_bash_tool(),
         ]
 
     @staticmethod
 
@@ -25,18 +25,18 @@ def get_code_implementation_tools() -> List[Dict[str, Any]]:
         Get tool definitions for code implementation
         """
         return [
-            MCPToolDefinitions._get_read_file_tool(),
-            MCPToolDefinitions._get_read_multiple_files_tool(),
-            MCPToolDefinitions._get_read_code_mem_tool(),
+            # MCPToolDefinitions._get_read_file_tool(),
+            # MCPToolDefinitions._get_read_multiple_files_tool(),
+            # MCPToolDefinitions._get_read_code_mem_tool(),
             MCPToolDefinitions._get_write_file_tool(),
-            MCPToolDefinitions._get_write_multiple_files_tool(),
-            MCPToolDefinitions._get_execute_python_tool(),
-            MCPToolDefinitions._get_execute_bash_tool(),
+            # MCPToolDefinitions._get_write_multiple_files_tool(),
+            # MCPToolDefinitions._get_execute_python_tool(),
+            # MCPToolDefinitions._get_execute_bash_tool(),
             MCPToolDefinitions._get_search_code_references_tool(),
-            MCPToolDefinitions._get_search_code_tool(),
-            MCPToolDefinitions._get_file_structure_tool(),
-            MCPToolDefinitions._get_set_workspace_tool(),
-            MCPToolDefinitions._get_operation_history_tool(),
+            # MCPToolDefinitions._get_search_code_tool(),
+            # MCPToolDefinitions._get_file_structure_tool(),
+            # MCPToolDefinitions._get_set_workspace_tool(),
+            # MCPToolDefinitions._get_operation_history_tool(),
         ]
 
     @staticmethod
 
@@ -61,19 +61,21 @@
 PAPER_DOWNLOADER_PROMPT = """You are a precise paper downloader that processes input from PaperInputAnalyzerAgent.
 
 Task: Handle paper according to input type and save to "./deepcode_lab/papers/id/id.md"
-Note: Generate id (id is a number) by counting files in "./deepcode_lab/papers/" directory and increment by 1.
+Note: The paper ID will be provided at the start of the message as "PAPER_ID=<number>". Use this EXACT number.
 
-CRITICAL RULE: NEVER use write_file tool to create paper content directly. Always use file-downloader tools for PDF/document conversion.
+CRITICAL RULES:
+- Use the EXACT paper ID provided in the message (PAPER_ID=X).
+- Save path MUST be: ./deepcode_lab/papers/{PAPER_ID}/{PAPER_ID}.md
 
 Processing Rules:
 1. URL Input (input_type = "url"):
-   - Use "file-downloader" tool to download paper
+   - Use download_file_to tool with: url=<url>, destination="./deepcode_lab/papers/{PAPER_ID}/", filename="{PAPER_ID}.md"
    - Extract metadata (title, authors, year)
    - Return saved file path and metadata
 
 2. File Input (input_type = "file"):
-   - Copy file to "./deepcode_lab/papers/id/" using move_file_to tool (preserves original)
-   - The move_file_to tool will automatically convert PDF/documents to .md format
+   - Use move_file_to tool with: source=<file_path>, destination="./deepcode_lab/papers/{PAPER_ID}/{PAPER_ID}.md"
+   - The tool will automatically convert PDF/documents to .md format
    - NEVER manually extract content or use write_file - let the conversion tools handle this
    - Note: Original file is preserved, only a copy is placed in target directory
    - Return new saved file path and metadata
@@ -100,16 +102,26 @@
     "requirements": ["requirement1", "requirement2"]
 }
 
-Output Format (DO NOT MODIFY):
+CRITICAL OUTPUT RESTRICTIONS:
+- RETURN ONLY RAW JSON - NO TEXT BEFORE OR AFTER
+- NO markdown code blocks (```json)
+- NO explanatory text or descriptions
+- NO tool call information
+- NO analysis summaries
+- JUST THE JSON OBJECT BELOW
+
+Output Format (MANDATORY - EXACT FORMAT):
 {
     "status": "success|failure",
-    "paper_path": "path to paper file or null for text input",
+    "paper_path": "./deepcode_lab/papers/{PAPER_ID}/{PAPER_ID}.md (or null for text input)",
     "metadata": {
         "title": "extracted or provided title",
         "authors": ["extracted or provided authors"],
         "year": "extracted or provided year"
     }
 }
+
+Example: If PAPER_ID=14, then paper_path should be "./deepcode_lab/papers/14/14.md"
 """
 
 PAPER_REFERENCE_ANALYZER_PROMPT = """You are an expert academic paper reference analyzer specializing in computer science and machine learning.
@@ -1045,11 +1057,10 @@
 **IMPLEMENTATION APPROACH**:
 Build incrementally using multiple tool calls. For each step:
 1. **Identify** what needs to be implemented from the paper
-2. **Analyze Dependencies**: Before implementing each new file, use `read_code_mem` to read summaries of already-implemented files, then search for reference patterns to guide your implementation approach.
-3. **Implement** one component at a time
-4. **Test** immediately to catch issues early
-5. **Integrate** with existing components
-6. **Verify** against paper specifications
+2. **Implement** one component at a time
+3. **Test** immediately to catch issues early
+4. **Integrate** with existing components
+5. **Verify** against paper specifications
 
 **TOOL CALLING STRATEGY**:
 1. ⚠️ **SINGLE FUNCTION CALL PER MESSAGE**: Each message may perform only one function call. You will see the result of the function right after sending the message. If you need to perform multiple actions, you can always send more messages with subsequent function calls. Do some reasoning before your actions, describing what function calls you are going to use and how they fit into your plan.
@@ -1059,8 +1070,7 @@
   - **Reference only**: Use `search_code_references(indexes_path="indexes", target_file=the_file_you_want_to_implement, keywords=the_keywords_you_want_to_search)` for reference, NOT as implementation standard
   - **Core principle**: Original paper requirements take absolute priority over any reference code found
 3. **TOOL EXECUTION STRATEGY**:
-  - ⚠️**Development Cycle (for each new file implementation)**: `read_code_mem` (check existing implementations in Working Directory, use `read_file` as fallback if memory unavailable) → `search_code_references` (OPTIONAL reference check from indexes library in working directory) → `write_file` (implement based on original paper) → `execute_python` (if should test)
-  - **Environment Setup**: `write_file` (requirements.txt) → `execute_bash` (pip install) → `execute_python` (verify)
+  - ⚠️**Development Cycle (for each new file implementation)**: `search_code_references` (OPTIONAL reference check from indexes library in working directory) → `write_file` (implement based on original paper)
 
 4. **CRITICAL**: Use bash and python tools to ACTUALLY REPLICATE the paper yourself - do not provide instructions.
 
@@ -1104,11 +1114,10 @@
 **IMPLEMENTATION APPROACH**:
 Build incrementally using multiple tool calls. For each step:
 1. **Identify** what needs to be implemented from the paper
-2. **Analyze Dependencies**: Before implementing each new file, use `read_code_mem` to read summaries of already-implemented files, then search for reference patterns to guide your implementation approach.
-3. **Implement** one component at a time
-4. **Test** immediately to catch issues early
-5. **Integrate** with existing components
-6. **Verify** against paper specifications
+2. **Implement** one component at a time
+3. **Test** immediately to catch issues early
+4. **Integrate** with existing components
+5. **Verify** against paper specifications
 
 **TOOL CALLING STRATEGY**:
 1. ⚠️ **SINGLE FUNCTION CALL PER MESSAGE**: Each message may perform only one function call. You will see the result of the function right after sending the message. If you need to perform multiple actions, you can always send more messages with subsequent function calls. Do some reasoning before your actions, describing what function calls you are going to use and how they fit into your plan.
@@ -1118,10 +1127,7 @@
   - **Reference only**: Use `search_code_references(indexes_path="indexes", target_file=the_file_you_want_to_implement, keywords=the_keywords_you_want_to_search)` for reference, NOT as implementation standard
   - **Core principle**: Original paper requirements take absolute priority over any reference code found
 3. **TOOL EXECUTION STRATEGY**:
-  - ⚠️**Development Cycle (for each new file implementation)**: `read_code_mem` (check existing implementations in Working Directory, use `read_file` as fallback if memory unavailable`) → `search_code_references` (OPTIONAL reference check from `/home/agent/indexes`) → `write_file` (implement based on original paper) → `execute_python` (if needed to verify implementation)
-  - **File Verification**: Use `execute_bash` and `execute_python` when needed to check implementation completeness
-
-4. **CRITICAL**: Use bash and python tools when needed to CHECK and VERIFY implementation completeness - do not provide instructions. These tools help validate that your implementation files are syntactically correct and properly structured.
+  - ⚠️**Development Cycle (for each new file implementation)**: `search_code_references` (OPTIONAL reference check from `/home/agent/indexes`) → `write_file` (implement based on original paper)
 
 **Execution Guidelines**:
 - **Plan First**: Before each action, explain your reasoning and which function you'll use
@@ -1213,24 +1219,16 @@
 **IMPLEMENTATION APPROACH**:
 Build incrementally using multiple tool calls. For each step:
 1. **Identify** what needs to be implemented from the requirements
-2. **Analyze Dependencies**: Before implementing each new file, use `read_code_mem` to read summaries of already-implemented files, then search for reference patterns to guide your implementation approach.
-3. **Implement** one component at a time
-4. **Verify** optionally using `execute_python` or `execute_bash` to check implementation completeness if needed
-5. **Integrate** with existing components
-6. **Validate** against requirement specifications
+2. **Implement** one component at a time
+3. **Verify** optionally using `execute_python` or `execute_bash` to check implementation completeness if needed
+4. **Integrate** with existing components
+5. **Validate** against requirement specifications
 
 **TOOL CALLING STRATEGY**:
 1. ⚠️ **SINGLE FUNCTION CALL PER MESSAGE**: Each message may perform only one function call. You will see the result of the function right after sending the message. If you need to perform multiple actions, you can always send more messages with subsequent function calls. Do some reasoning before your actions, describing what function calls you are going to use and how they fit into your plan.
 
 2. **TOOL EXECUTION STRATEGY**:
-  - **Development Cycle (for each new file implementation)**: `read_code_mem` (check existing implementations in Working Directory, use `read_file` as fallback if memory unavailable) → `write_file` (implement) → **Optional Verification**: `execute_python` or `execute_bash` (if needed to check implementation)
-  - **File Verification**: Use `execute_bash` and `execute_python` when needed to verify implementation completeness.
-
-3. **CRITICAL**: Use `execute_bash` and `execute_python` tools when needed to CHECK and VERIFY file implementation completeness - do not provide instructions. These tools are essential for:
-   - Checking file syntax and import correctness (`execute_python`)
-   - Verifying file structure and dependencies (`execute_bash` for listing, `execute_python` for imports)
-   - Validating that implemented files are syntactically correct and can be imported
-   - Ensuring code implementation meets basic functionality requirements
+  - **Development Cycle (for each new file implementation)**: `write_file` (implement)
 
 **Execution Guidelines**:
 - **Plan First**: Before each action, explain your reasoning and which function you'll use
@@ -1348,10 +1346,6 @@
 ## TRADITIONAL APPROACH: Full Document Reading
 Read the complete document to ensure comprehensive coverage of all algorithmic details:
 
-1. **Locate and read the markdown (.md) file** in the paper directory
-2. **Analyze the entire document** to capture all algorithms, methods, and formulas
-3. **Extract complete implementation details** without missing any components
-
 # DETAILED EXTRACTION PROTOCOL
 
 ## 1. COMPREHENSIVE ALGORITHM SCAN
@@ -1511,10 +1505,6 @@
 ## TRADITIONAL APPROACH: Complete Document Analysis
 Read the entire document systematically to ensure comprehensive understanding:
 
-1. **Locate and read the markdown (.md) file** in the paper directory
-2. **Analyze the complete document structure** from introduction to conclusion
-3. **Extract all conceptual frameworks** and implementation requirements
-
 # COMPREHENSIVE ANALYSIS PROTOCOL
 
 ## 1. COMPLETE PAPER STRUCTURAL ANALYSIS
@@ -1678,17 +1668,6 @@
 1. **Comprehensive Paper Analysis**: Complete paper structure, components, and requirements
 2. **Complete Algorithm Extraction**: All algorithms, formulas, pseudocode, and technical details
 
-Plus you can access the complete paper document by reading the markdown file directly.
-
-# TRADITIONAL DOCUMENT ACCESS
-
-## Direct Paper Reading
-For any additional details needed beyond the provided analyses:
-
-1. **Read the complete markdown (.md) file** in the paper directory
-2. **Access any section directly** without token limitations for smaller documents
-3. **Cross-reference information** across the entire document as needed
-
 # OBJECTIVE
 Create an implementation plan so detailed that a developer can reproduce the ENTIRE paper without reading it.
 
 
@@ -25,37 +25,7 @@
 
 # MCP Agent imports for LLM
 import yaml
-from utils.llm_utils import get_preferred_llm_class
-
-
-def get_default_models(config_path: str = "mcp_agent.config.yaml"):
-    """
-    Get default models from configuration file.
-
-    Args:
-        config_path: Path to the configuration file
-
-    Returns:
-        dict: Dictionary with 'anthropic' and 'openai' default models
-    """
-    try:
-        if os.path.exists(config_path):
-            with open(config_path, "r", encoding="utf-8") as f:
-                config = yaml.safe_load(f)
-
-            anthropic_model = config.get("anthropic", {}).get(
-                "default_model", "claude-sonnet-4-20250514"
-            )
-            openai_model = config.get("openai", {}).get("default_model", "o3-mini")
-
-            return {"anthropic": anthropic_model, "openai": openai_model}
-        else:
-            print(f"Config file {config_path} not found, using default models")
-            return {"anthropic": "claude-sonnet-4-20250514", "openai": "o3-mini"}
-
-    except Exception as e:
-        print(f"Error reading config file {config_path}: {e}")
-        return {"anthropic": "claude-sonnet-4-20250514", "openai": "o3-mini"}
+from utils.llm_utils import get_preferred_llm_class, get_default_models
 
 
 @dataclass
 
@@ -1098,8 +1098,21 @@ async def download_file_to(
         Status message about the download operation
     """
     # 确定文件名
+
+    url = URLExtractor.extract_urls(url)[0]
+
+    if not filename:
+        filename = URLExtractor.infer_filename_from_url(url)
+
     if not filename:
         filename = URLExtractor.infer_filename_from_url(url)
+    else:
+        name_source, extension_source = os.path.splitext(os.path.basename(URLExtractor.infer_filename_from_url(url)))
+        name_destination, extension_destination = os.path.splitext(os.path.basename(filename))
+        if extension_source:
+            filename = name_destination + extension_source
+        else:
+            filename = name_destination + extension_destination
 
     # 确定完整路径
     if destination:
@@ -1203,6 +1216,14 @@ async def move_file_to(
     # 确定文件名
     if not filename:
         filename = os.path.basename(source)
+    else:
+        name_source, extension_source = os.path.splitext(os.path.basename(source))
+        name_destination, extension_destination = os.path.splitext(os.path.basename(filename))
+        if extension_source:
+            filename = name_destination + extension_source
+        else:
+            filename = name_destination + extension_destination
+
 
     # 确定完整路径
     if destination:
@@ -1215,6 +1236,7 @@ async def move_file_to(
             target_path = destination
         else:  # 是目录
             target_path = os.path.join(destination, filename)
+            
     else:
         target_path = filename
 
 
@@ -282,10 +282,25 @@ async def process_file_input(
             if isinstance(file_input, str):
                 import re
 
+                # Try to extract path from backticks first
                 file_path_match = re.search(r"`([^`]+\.md)`", file_input)
                 if file_path_match:
                     paper_path = file_path_match.group(1)
                     file_input = {"paper_path": paper_path}
+                else:
+                    # Try to extract from "Saved Path:" or similar patterns
+                    path_patterns = [
+                        r"[Ss]aved [Pp]ath[:\s]+([^\s\n]+\.md)",
+                        r"[Pp]aper [Pp]ath[:\s]+([^\s\n]+\.md)",
+                        r"[Ff]ile[:\s]+([^\s\n]+\.md)",
+                        r"[Oo]utput[:\s]+([^\s\n]+\.md)",
+                    ]
+                    for pattern in path_patterns:
+                        match = re.search(pattern, file_input)
+                        if match:
+                            paper_path = match.group(1)
+                            file_input = {"paper_path": paper_path}
+                            break
 
             # Extract paper directory path
             paper_dir = cls.extract_file_path(file_input)
 
@@ -53,6 +53,40 @@ def get_preferred_llm_class(config_path: str = "mcp_agent.secrets.yaml") -> Type
         return OpenAIAugmentedLLM
 
 
+def get_token_limits(config_path: str = "mcp_agent.config.yaml") -> Tuple[int, int]:
+    """
+    Get token limits from configuration.
+
+    Args:
+        config_path: Path to the main configuration file
+
+    Returns:
+        tuple: (base_max_tokens, retry_max_tokens)
+    """
+    # Default values that work with qwen/qwen-max (32768 total context)
+    default_base = 20000
+    default_retry = 15000
+    
+    try:
+        if os.path.exists(config_path):
+            with open(config_path, "r", encoding="utf-8") as f:
+                config = yaml.safe_load(f)
+
+            openai_config = config.get("openai", {})
+            base_tokens = openai_config.get("base_max_tokens", default_base)
+            retry_tokens = openai_config.get("retry_max_tokens", default_retry)
+            
+            print(f"⚙️ Token limits from config: base={base_tokens}, retry={retry_tokens}")
+            return base_tokens, retry_tokens
+        else:
+            print(f"⚠️ Config file {config_path} not found, using defaults: base={default_base}, retry={default_retry}")
+            return default_base, default_retry
+    except Exception as e:
+        print(f"⚠️ Error reading token config from {config_path}: {e}")
+        print(f"🔧 Falling back to default token limits: base={default_base}, retry={default_retry}")
+        return default_base, default_retry
+
+
 def get_default_models(config_path: str = "mcp_agent.config.yaml"):
     """
     Get default models from configuration file.