✨ file to text tool

Zhi-a · Zhi-a · commit d70751427078 · 2025-11-25T15:50:42.000+08:00
diff --git a/backend/agents/create_agent_info.py b/backend/agents/create_agent_info.py
@@ -9,6 +9,7 @@
 from nexent.core.agents.agent_model import AgentRunInfo, ModelConfig, AgentConfig, ToolConfig
 from nexent.memory.memory_service import search_memory_in_levels
 
+from services.file_management_service import get_llm_model
 from services.vectordatabase_service import (
     ElasticSearchService,
     get_vector_db_core,
@@ -20,10 +21,11 @@
 from database.agent_db import search_agent_info_by_agent_id, query_sub_agents_id_list
 from database.tool_db import search_tools_for_sub_agent
 from database.model_management_db import get_model_records, get_model_by_model_id
+from database.client import minio_client
 from utils.model_name_utils import add_repo_to_name
 from utils.prompt_template_utils import get_agent_prompt_template
 from utils.config_utils import tenant_config_manager, get_model_name_from_config
-from consts.const import LOCAL_MCP_SERVER, MODEL_CONFIG_MAPPING, LANGUAGE
+from consts.const import LOCAL_MCP_SERVER, MODEL_CONFIG_MAPPING, LANGUAGE, DATA_PROCESS_SERVICE
 
 logger = logging.getLogger("create_agent_info")
 logger.setLevel(logging.DEBUG)
@@ -236,6 +238,12 @@ async def create_tool_config_list(agent_id, tenant_id, user_id):
                 "vdb_core": get_vector_db_core(),
                 "embedding_model": get_embedding_model(tenant_id=tenant_id),
             }
+        elif tool_config.class_name == "AnalyzeTextFileTool":
+            tool_config.metadata = {
+                "llm_model": get_llm_model(tenant_id=tenant_id),
+                "storage_client": minio_client,
+                "data_process_service_url": DATA_PROCESS_SERVICE
+            }
         tool_config_list.append(tool_config)
 
     return tool_config_list
diff --git a/backend/services/file_management_service.py b/backend/services/file_management_service.py
@@ -10,7 +10,7 @@
 from fastapi import UploadFile
 
 from agents.preprocess_manager import preprocess_manager
-from consts.const import UPLOAD_FOLDER, MAX_CONCURRENT_UPLOADS, DATA_PROCESS_SERVICE, LANGUAGE
+from consts.const import UPLOAD_FOLDER, MAX_CONCURRENT_UPLOADS, DATA_PROCESS_SERVICE, LANGUAGE, MODEL_CONFIG_MAPPING
 from database.attachment_db import (
     upload_fileobj,
     get_file_url,
@@ -19,11 +19,15 @@
     delete_file,
     list_files
 )
-from utils.attachment_utils import convert_image_to_text, convert_long_text_to_text
 from services.vectordatabase_service import ElasticSearchService, get_vector_db_core
+from utils.attachment_utils import convert_image_to_text, convert_long_text_to_text
+from utils.config_utils import tenant_config_manager, get_model_name_from_config
 from utils.prompt_template_utils import get_file_processing_messages_template
 from utils.file_management_utils import save_upload_file
 
+from nexent import MessageObserver
+from nexent.core.models import OpenAILongContextModel
+
 # Create upload directory
 upload_dir = Path(UPLOAD_FOLDER)
 upload_dir.mkdir(exist_ok=True)
@@ -405,3 +409,16 @@ def get_file_description(files: List[UploadFile]) -> str:
         else:
             description += f"- File {file.filename or ''}\n"
     return description
+
+def get_llm_model(tenant_id: str):
+    # Get the tenant config
+    main_model_config = tenant_config_manager.get_model_config(
+        key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id)
+    long_text_to_text_model = OpenAILongContextModel(
+        observer=MessageObserver(),
+        model_id=get_model_name_from_config(main_model_config),
+        api_base=main_model_config.get("base_url"),
+        api_key=main_model_config.get("api_key"),
+        max_context_tokens=main_model_config.get("max_tokens")
+    )
+    return long_text_to_text_model
diff --git a/backend/services/tool_configuration_service.py b/backend/services/tool_configuration_service.py
@@ -11,7 +11,7 @@
 import jsonref
 from mcpadapt.smolagents_adapter import _sanitize_function_name
 
-from consts.const import DEFAULT_USER_ID, LOCAL_MCP_SERVER
+from consts.const import DEFAULT_USER_ID, LOCAL_MCP_SERVER, DATA_PROCESS_SERVICE
 from consts.exceptions import MCPConnectionError, ToolExecutionException, NotFoundException
 from consts.model import ToolInstanceInfoRequest, ToolInfo, ToolSourceEnum, ToolValidateRequest
 from database.remote_mcp_db import get_mcp_records_by_tenant, get_mcp_server_by_name_and_tenant
@@ -23,6 +23,8 @@
     search_last_tool_instance_by_tool_id,
 )
 from database.user_tenant_db import get_all_tenant_ids
+from database.client import minio_client
+from services.file_management_service import get_llm_model
 from services.vectordatabase_service import get_embedding_model, get_vector_db_core
 from services.tenant_config_service import get_selected_knowledge_list
 
@@ -613,6 +615,17 @@ def _validate_local_tool(
                 'embedding_model': embedding_model,
             }
             tool_instance = tool_class(**params)
+        elif tool_name == "analyze_text_file":
+            if not tenant_id or not user_id:
+                raise ToolExecutionException(f"Tenant ID and User ID are required for {tool_name} validation")
+            long_text_to_text_model = get_llm_model(tenant_id=tenant_id)
+            params = {
+                **instantiation_params,
+                'llm_model': long_text_to_text_model,
+                'storage_client': minio_client,
+                "data_process_service_url": DATA_PROCESS_SERVICE
+            }
+            tool_instance = tool_class(**params)
         else:
             tool_instance = tool_class(**instantiation_params)
 
diff --git a/sdk/nexent/core/agents/nexent_agent.py b/sdk/nexent/core/agents/nexent_agent.py
@@ -83,6 +83,12 @@ def create_local_tool(self, tool_config: ToolConfig):
                     "vdb_core", None) if tool_config.metadata else None
                 tools_obj.embedding_model = tool_config.metadata.get(
                     "embedding_model", None) if tool_config.metadata else None
+            elif class_name == "AnalyzeTextFileTool":
+                tools_obj = tool_class(observer=self.observer,
+                                       llm_model=tool_config.metadata.get("llm_model", []),
+                                       storage_client=tool_config.metadata.get("storage_client", []),
+                                       data_process_service_url=tool_config.metadata.get("data_process_service_url", []),
+                                       **params)
             else:
                 tools_obj = tool_class(**params)
                 if hasattr(tools_obj, 'observer'):
diff --git a/sdk/nexent/core/prompts/analyze_file.yaml b/sdk/nexent/core/prompts/analyze_file.yaml
@@ -0,0 +1,15 @@
+# File analysis prompt template
+# For long text content analysis
+system_prompt: |-
+  用户提出了一个问题：{{ query }}，请从回答这个问题的角度精简、仔细描述一下这段文本，200字以内。
+  
+  **文本分析要求：**
+  1. 重点提取与用户问题相关的文本内容
+  2. 归纳总结要准确简洁，突出核心信息
+  3. 保持原文的关键观点和数据
+  4. 避免冗余信息，专注于问题相关内容
+
+user_prompt: |
+  请仔细阅读并分析这段文本：
+
+
diff --git a/sdk/nexent/core/prompts/analyze_file_en.yaml b/sdk/nexent/core/prompts/analyze_file_en.yaml
@@ -0,0 +1,14 @@
+# File analysis prompt template
+# For long text content analysis
+system_prompt: |-
+  The user has asked a question: {{ query }}. Please provide a concise and careful description of this text from the perspective of answering this question, within 200 words.
+  
+  **Text Analysis Requirements:**
+  1. Focus on extracting text content relevant to the user's question
+  2. Summary should be accurate and concise, highlighting core information
+  3. Maintain key viewpoints and data from the original text
+  4. Avoid redundant information, focus on question-related content
+
+user_prompt: |
+  Please carefully read and analyze this text:
+
diff --git a/sdk/nexent/core/tools/__init__.py b/sdk/nexent/core/tools/__init__.py
@@ -12,6 +12,7 @@
 from .move_item_tool import MoveItemTool
 from .list_directory_tool import ListDirectoryTool
 from .terminal_tool import TerminalTool
+from .analyze_text_file_tool import AnalyzeTextFileTool
 
 __all__ = [
     "ExaSearchTool", 
@@ -27,5 +28,6 @@
     "DeleteDirectoryTool",
     "MoveItemTool",
     "ListDirectoryTool",
-    "TerminalTool"
+    "TerminalTool",
+    "AnalyzeTextFileTool"
 ]
diff --git a/sdk/nexent/core/tools/analyze_text_file_tool.py b/sdk/nexent/core/tools/analyze_text_file_tool.py
@@ -0,0 +1,212 @@
+"""
+Analyze Text File Tool
+
+Extracts content from text files (excluding images) and analyzes it using a large language model.
+Supports files from S3, HTTP, and HTTPS URLs.
+"""
+import json
+import logging
+from typing import List, Optional, Union
+
+import httpx
+from jinja2 import Template, StrictUndefined
+from pydantic import Field
+from smolagents.tools import Tool
+
+from nexent.core import MessageObserver
+from nexent.core.utils.observer import ProcessType
+from nexent.core.utils.prompt_template_utils import get_prompt_template
+from nexent.core.utils.tools_common_message import ToolCategory, ToolSign
+from nexent.storage import MinIOStorageClient
+from nexent.multi_modal.load_save_object import LoadSaveObjectManager
+
+
+logger = logging.getLogger("analyze_text_file_tool")
+
+
+class AnalyzeTextFileTool(Tool):
+    """Tool for analyzing text file content using a large language model"""
+    
+    name = "analyze_text_file"
+    description = (
+        "Extract content from text files and analyze them using a large language model based on your query. "
+        "Supports multiple files from S3 URLs (s3://bucket/key or /bucket/key), HTTP, and HTTPS URLs. "
+        "The tool will extract the text content from each file and return an analysis based on your question."
+    )
+    
+    inputs = {
+        "file_url_list": {
+            "type": "array",
+            "description": "List of file URLs (S3, HTTP, or HTTPS). Supports s3://bucket/key, /bucket/key, http://, and https:// URLs. Can also accept a single file URL which will be treated as a list with one element."
+        },
+        "query": {
+            "type": "string",
+            "description": "User's question to guide the analysis"
+        }
+    }
+    output_type = "string"
+    category = ToolCategory.FILE.value
+    tool_sign = ToolSign.FILE_OPERATION.value
+
+    def __init__(
+        self,
+        storage_client: Optional[MinIOStorageClient] = Field(
+            description="Storage client for downloading files from S3 URLs、HTTP URLs、HTTPS URLs.",
+            default=None,
+            exclude=True
+        ),
+        observer: MessageObserver = Field(
+            description="Message observer",
+            default=None,
+            exclude=True
+        ),
+        data_process_service_url: str = Field(
+            description="URL of data process service",
+            default=None,
+            exclude=True),
+        llm_model: str = Field(
+            description="The LLM model to use",
+            default=None,
+            exclude=True)
+    ):
+        super().__init__()
+        self.storage_client = storage_client
+        self.observer = observer
+        self.llm_model = llm_model
+        self.data_process_service_url = data_process_service_url
+        self.mm = LoadSaveObjectManager(storage_client=self.storage_client)
+
+        self.running_prompt_zh = "正在分析文本文件..."
+        self.running_prompt_en = "Analyzing text file..."
+        # Dynamically apply the load_object decorator to forward method
+        self.forward = self.mm.load_object(input_names=["file_url_list"])(self._forward_impl)
+
+    def _forward_impl(
+        self,
+        file_url_list: Union[bytes, List[bytes]],
+        query: str,
+    ) -> Union[str, List[str]]:
+        """
+        Analyze text file content using a large language model.
+
+        Note: This method is wrapped by load_object decorator which downloads
+        the image from S3 URL, HTTP URL, or HTTPS URL and passes bytes to this method.
+
+        Args:
+            file_url_list: File bytes or a sequence of file bytes (converted from URLs by the decorator).
+                          The load_object decorator converts URLs to bytes before calling this method.
+            query: User's question to guide the analysis
+
+        Returns:
+            Union[str, List[str]]: Single analysis string for one file or a list
+            of analysis strings that align with the order of the provided files.
+        """
+        # Send tool run message
+        if self.observer:
+            running_prompt = self.running_prompt_zh if self.observer.lang == "zh" else self.running_prompt_en
+            self.observer.add_message("", ProcessType.TOOL, running_prompt)
+            card_content = [{"icon": "file", "text": f"Analyzing file..."}]
+            self.observer.add_message("", ProcessType.CARD, json.dumps(card_content, ensure_ascii=False))
+
+        if file_url_list is None:
+            raise ValueError("file_url_list must contain at least one file")
+
+        if isinstance(file_url_list, (list, tuple)):
+            file_inputs: List[bytes] = list(file_url_list)
+        elif isinstance(file_url_list, bytes):
+            file_inputs = [file_url_list]
+        else:
+            raise ValueError("file_url_list must be bytes or a list/tuple of bytes")
+
+        try:
+            analysis_results: List[str] = []
+
+            for index, single_file in enumerate(file_inputs, start=1):
+                logger.info(f"Extracting text content from file #{index}, query: {query}")
+                filename = f"file_{index}.txt"
+
+                # Step 1: Get file content
+                raw_text = self.process_text_file(filename, single_file)
+
+                if not raw_text:
+                    error_msg = f"No text content extracted from file #{index}"
+                    logger.error(error_msg)
+                    raise Exception(error_msg)
+
+                logger.info(f"Analyzing text content with LLM for file #{index}, query: {query}")
+
+                # Step 2: Analyze file content
+                try:
+                    text, _ = self.analyze_file(query, raw_text)
+                    analysis_results.append(text)
+                except Exception as analysis_error:
+                    logger.error(f"Failed to analyze file #{index}: {analysis_error}")
+                    analysis_results.append(str(analysis_error))
+
+            if len(analysis_results) == 1:
+                return analysis_results[0]
+            return analysis_results
+            
+        except Exception as e:
+            logger.error(f"Error analyzing text file: {str(e)}", exc_info=True)
+            error_msg = f"Error analyzing text file: {str(e)}"
+            raise Exception(error_msg)
+
+
+    def process_text_file(self, filename: str, file_content: bytes,) -> str:
+        """
+        Process text file, convert to text using external API
+        """
+        # file_content is byte data, need to send to API through file upload
+        api_url = f"{self.data_process_service_url}/tasks/process_text_file"
+        logger.info(f"Processing text file {filename} with API: {api_url}")
+
+        raw_text = ""
+        try:
+            # Upload byte data as a file
+            files = {
+                'file': (filename, file_content, 'application/octet-stream')
+            }
+            data = {
+                'chunking_strategy': 'basic',
+                'timeout': 60
+            }
+            with httpx.Client(timeout=60) as client:
+                response = client.post(api_url, files=files, data=data)
+
+            if response.status_code == 200:
+                result = response.json()
+                raw_text = result.get("text", "")
+                logger.info(
+                    f"File processed successfully: {raw_text[:200]}...{raw_text[-200:]}...， length: {len(raw_text)}")
+            else:
+                error_detail = response.json().get('detail', 'unknown error') if response.headers.get(
+                    'content-type', '').startswith('application/json') else response.text
+                logger.error(
+                    f"File processing failed (status code: {response.status_code}): {error_detail}")
+                raise Exception(error_detail)
+
+        except Exception as e:
+            logger.error(f"Failed to process text file {filename}: {str(e)}", exc_info=True)
+            raise
+
+        return raw_text
+
+    def analyze_file(self, query: str, raw_text: str,):
+        """
+        Process text file, convert to text using external API
+        """
+        language = getattr(self.observer, "lang", "en") if self.observer else "en"
+        prompts = get_prompt_template(template_type='analyze_file', language=language)
+        system_prompt_template = Template(prompts['system_prompt'], undefined=StrictUndefined)
+        user_prompt_template = Template(prompts['user_prompt'], undefined=StrictUndefined)
+
+        system_prompt = system_prompt_template.render({'query': query})
+        user_prompt = user_prompt_template.render({})
+
+        result, truncation_percentage = self.llm_model.analyze_long_text(
+            text_content=raw_text,
+            system_prompt=system_prompt,
+            user_prompt=user_prompt
+        )
+        return result.content, truncation_percentage
diff --git a/sdk/nexent/core/utils/prompt_template_utils.py b/sdk/nexent/core/utils/prompt_template_utils.py
diff --git a/sdk/nexent/multi_modal/load_save_object.py b/sdk/nexent/multi_modal/load_save_object.py