✨ Comments fix: Truncation information of files that are too large is displayed to the user #1119

WMC001 · WMC001 · commit fa83bed4fc8b · 2025-09-09T11:09:12.000+08:00
diff --git a/backend/apps/file_management_app.py b/backend/apps/file_management_app.py
@@ -326,5 +326,3 @@ async def agent_preprocess_api(
     except Exception as e:
         raise HTTPException(
             status_code=500, detail=f"File preprocessing error: {str(e)}")
-
-
diff --git a/backend/prompts/utils/file_processing_messages.yaml b/backend/prompts/utils/file_processing_messages.yaml
@@ -0,0 +1,5 @@
+FILE_CONTENT_SUCCESS: "文件 {filename} 内容: {content}"
+FILE_CONTENT_ERROR: "文件 {filename} 内容: 处理文本文件 {filename} 时出错: {error}"
+FILE_PROCESSING_ERROR: "文件处理失败 (状态码: {status_code}): {error_detail}"
+IMAGE_CONTENT_SUCCESS: "图片文件 {filename} 内容: {content}"
+IMAGE_CONTENT_ERROR: "图片文件 {filename} 内容: 处理图片文件 {filename} 时出错: {error}"
diff --git a/backend/prompts/utils/file_processing_messages_en.yaml b/backend/prompts/utils/file_processing_messages_en.yaml
@@ -0,0 +1,5 @@
+FILE_CONTENT_SUCCESS: "File {filename} content: {content}"
+FILE_CONTENT_ERROR: "File {filename} content: Error processing text file {filename}: {error}"
+FILE_PROCESSING_ERROR: "File processing failed (status code: {status_code}): {error_detail}"
+IMAGE_CONTENT_SUCCESS: "Image file {filename} content: {content}"
+IMAGE_CONTENT_ERROR: "Image file {filename} content: Error processing image file {filename}: {error}"
diff --git a/backend/services/file_management_service.py b/backend/services/file_management_service.py
@@ -17,6 +17,7 @@
 from database.attachment_db import upload_fileobj, get_file_url, get_content_type, get_file_stream, delete_file, \
     list_files
 from utils.attachment_utils import convert_image_to_text, convert_long_text_to_text
+from utils.prompt_template_utils import get_file_processing_messages_template
 from utils.file_management_utils import save_upload_file
 
 # Create upload directory
@@ -140,41 +141,44 @@ async def list_files_impl(prefix: str, limit: Optional[int] = None):
     return files
 
 
-def get_parsing_file_message(language: str, index: int, total_files: int, filename: str) -> str:
+def get_parsing_file_data(index: int, total_files: int, filename: str) -> dict:
     """
-    Get internationalized parsing file message
-    
+    Get structured data for parsing file message
+
     Args:
-        language: Language code ('zh' or 'en')
         index: Current file index (0-based)
         total_files: Total number of files
         filename: Name of the file being parsed
-        
+
     Returns:
-        str: Internationalized message
+        dict: Structured data with parameters for internationalization
     """
-    if language == 'zh':
-        return f"正在解析文件 {index + 1}/{total_files}: {filename}"
-    else:
-        return f"Parsing file {index + 1}/{total_files}: {filename}"
+    return {
+        "params": {
+            "index": index + 1,
+            "total": total_files,
+            "filename": filename
+        }
+    }
 
 
-def get_truncation_message(language: str, filename: str, truncation_percentage: int) -> str:
+def get_truncation_data(filename: str, truncation_percentage: int) -> dict:
     """
-    Get internationalized truncation message
-    
+    Get structured data for truncation message
+
     Args:
-        language: Language code ('zh' or 'en')
         filename: Name of the file being truncated
         truncation_percentage: Percentage of content that was read
-        
+
     Returns:
-        str: Internationalized truncation message
+        dict: Structured data with parameters for internationalization
     """
-    if language == 'zh':
-        return f"{filename} 超出字数限制，只阅读了前 {truncation_percentage}%"
-    else:
-        return f"{filename} exceeds word limit, only read the first {truncation_percentage}%"
+    return {
+        "params": {
+            "filename": filename,
+            "percentage": truncation_percentage
+        }
+    }
 
 
 async def preprocess_files_generator(
@@ -187,15 +191,15 @@ async def preprocess_files_generator(
 ) -> AsyncGenerator[str, None]:
     """
     Generate streaming response for file preprocessing
-    
+
     Args:
         query: User query string
         file_cache: List of cached file data
         tenant_id: Tenant ID
         language: Language preference
         task_id: Unique task ID
         conversation_id: Conversation ID
-    
+
     Yields:
         str: JSON formatted streaming messages
     """
@@ -205,7 +209,8 @@ async def preprocess_files_generator(
     # Create and register the preprocess task
     task = asyncio.current_task()
     if task:
-        preprocess_manager.register_preprocess_task(task_id, conversation_id, task)
+        preprocess_manager.register_preprocess_task(
+            task_id, conversation_id, task)
 
     try:
         for index, file_data in enumerate(file_cache):
@@ -217,7 +222,7 @@ async def preprocess_files_generator(
             progress_message = json.dumps({
                 "type": "progress",
                 "progress": progress,
-                "message": get_parsing_file_message(language, index, total_files, file_data['filename'])
+                "message_data": get_parsing_file_data(index, total_files, file_data['filename'])
             }, ensure_ascii=False)
             yield f"data: {progress_message}\n\n"
             await asyncio.sleep(0.1)
@@ -240,20 +245,19 @@ async def preprocess_files_generator(
                     "filename": file_data["filename"],
                     "description": description
                 }
-                file_message = json.dumps(file_message_data, ensure_ascii=False)
+                file_message = json.dumps(
+                    file_message_data, ensure_ascii=False)
                 yield f"data: {file_message}\n\n"
                 await asyncio.sleep(0.1)
-                
+
                 # Send truncation notice immediately if file was truncated
                 if truncation_percentage is not None and int(truncation_percentage) < 100:
                     if int(truncation_percentage) == 0:
                         truncation_percentage = "< 1"
 
-                    truncation_msg = get_truncation_message(language, file_data['filename'], truncation_percentage)
-                    
                     truncation_message = json.dumps({
                         "type": "truncation",
-                        "message": truncation_msg
+                        "message_data": get_truncation_data(file_data['filename'], truncation_percentage)
                     }, ensure_ascii=False)
                     yield f"data: {truncation_message}\n\n"
                     await asyncio.sleep(0.1)
@@ -284,18 +288,24 @@ async def process_image_file(query: str, filename: str, file_content: bytes, ten
     """
     Process image file, convert to text using external API
     """
+    # Load messages based on language
+    messages = get_file_processing_messages_template(language)
+    
     try:
         image_stream = BytesIO(file_content)
         text = convert_image_to_text(query, image_stream, tenant_id, language)
-        return f"Image file {filename} content: {text}"
+        return messages["IMAGE_CONTENT_SUCCESS"].format(filename=filename, content=text)
     except Exception as e:
-        return f"Image file {filename} content: Error processing image file {filename}: {str(e)}"
+        return messages["IMAGE_CONTENT_ERROR"].format(filename=filename, error=str(e))
 
 
 async def process_text_file(query: str, filename: str, file_content: bytes, tenant_id: str, language: str = 'zh') -> tuple[str, Optional[str]]:
     """
     Process text file, convert to text using external API
     """
+    # Load messages based on language
+    messages = get_file_processing_messages_template(language)
+    
     # file_content is byte data, need to send to API through file upload
     data_process_service_url = DATA_PROCESS_SERVICE
     api_url = f"{data_process_service_url}/tasks/process_text_file"
@@ -319,21 +329,22 @@ async def process_text_file(query: str, filename: str, file_content: bytes, tena
             logger.info(
                 f"File processed successfully: {raw_text[:200]}...{raw_text[-200:]}...， length: {len(raw_text)}")
         else:
-            error_detail = response.json().get('detail', '未知错误') if response.headers.get(
+            error_detail = response.json().get('detail', 'unknown error') if response.headers.get(
                 'content-type', '').startswith('application/json') else response.text
             logger.error(
                 f"File processing failed (status code: {response.status_code}): {error_detail}")
             raise Exception(
-                f"File processing failed (status code: {response.status_code}): {error_detail}")
+                messages["FILE_PROCESSING_ERROR"].format(status_code=response.status_code, error_detail=error_detail))
 
     except Exception as e:
-        return f"File {filename} content: Error processing text file {filename}: {str(e)}", None
+        return messages["FILE_CONTENT_ERROR"].format(filename=filename, error=str(e)), None
 
     try:
-        text, truncation_percentage = convert_long_text_to_text(query, raw_text, tenant_id, language)
-        return f"File {filename} content: {text}", truncation_percentage
+        text, truncation_percentage = convert_long_text_to_text(
+            query, raw_text, tenant_id, language)
+        return messages["FILE_CONTENT_SUCCESS"].format(filename=filename, content=text), truncation_percentage
     except Exception as e:
-        return f"File {filename} content: Error processing text file {filename}: {str(e)}", None
+        return messages["FILE_CONTENT_ERROR"].format(filename=filename, error=str(e)), None
 
 
 def get_file_description(files: List[UploadFile]) -> str:
@@ -342,7 +353,7 @@ def get_file_description(files: List[UploadFile]) -> str:
     """
     if not files:
         return "User provided some reference files:\nNo files provided"
-    
+
     description = "User provided some reference files:\n"
     for file in files:
         ext = os.path.splitext(file.filename or "")[1].lower()
diff --git a/backend/utils/prompt_template_utils.py b/backend/utils/prompt_template_utils.py
@@ -1,6 +1,7 @@
 import yaml
 from typing import Dict, Any
 import logging
+import os
 logger = logging.getLogger("prompt_template_utils")
 
 
@@ -15,6 +16,7 @@ def get_prompt_template(template_type: str, language: str = 'zh', **kwargs) -> D
             - 'knowledge_summary': Knowledge summary template
             - 'analyze_file': File analysis template
             - 'generate_title': Title generation template
+            - 'file_processing_messages': File processing messages template
         language: Language code ('zh' or 'en')
         **kwargs: Additional parameters, for agent type need to pass is_manager parameter
 
@@ -51,6 +53,10 @@ def get_prompt_template(template_type: str, language: str = 'zh', **kwargs) -> D
         'generate_title': {
             'zh': 'backend/prompts/utils/generate_title.yaml',
             'en': 'backend/prompts/utils/generate_title_en.yaml'
+        },
+        'file_processing_messages': {
+            'zh': 'backend/prompts/utils/file_processing_messages.yaml',
+            'en': 'backend/prompts/utils/file_processing_messages_en.yaml'
         }
     }
 
@@ -65,8 +71,14 @@ def get_prompt_template(template_type: str, language: str = 'zh', **kwargs) -> D
     else:
         template_path = template_paths[template_type][language]
 
+    # Get the directory of this file and construct absolute path
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    # Go up one level from utils to backend, then use the template path
+    backend_dir = os.path.dirname(current_dir)
+    absolute_template_path = os.path.join(backend_dir, template_path.replace('backend/', ''))
+    
     # Read and return template content
-    with open(template_path, 'r', encoding='utf-8') as f:
+    with open(absolute_template_path, 'r', encoding='utf-8') as f:
         return yaml.safe_load(f)
 
 
@@ -135,3 +147,16 @@ def get_generate_title_prompt_template(language: str = 'zh') -> Dict[str, Any]:
         dict: Loaded prompt template configuration
     """
     return get_prompt_template('generate_title', language)
+
+
+def get_file_processing_messages_template(language: str = 'zh') -> Dict[str, Any]:
+    """
+    Get file processing messages template
+
+    Args:
+        language: Language code ('zh' or 'en')
+
+    Returns:
+        dict: Loaded file processing messages configuration
+    """
+    return get_prompt_template('file_processing_messages', language)
diff --git a/frontend/app/[locale]/chat/internal/chatInterface.tsx b/frontend/app/[locale]/chat/internal/chatInterface.tsx
@@ -46,6 +46,15 @@ import { X } from "lucide-react";
 
 const stepIdCounter = { current: 0 };
 
+// Get internationalization key based on message type
+const getI18nKeyByType = (type: string): string => {
+  const typeToKeyMap: Record<string, string> = {
+    "progress": "chatInterface.parsingFileWithProgress",
+    "truncation": "chatInterface.fileTruncated",
+  };
+  return typeToKeyMap[type] || "";
+};
+
 export function ChatInterface() {
   const router = useRouter();
   const { user } = useAuth(); // Get user information
@@ -457,7 +466,7 @@ export function ChatInterface() {
         // Buffer for truncation messages with deduplication
         const truncationBuffer: any[] = [];
         const processedTruncationIds = new Set<string>(); // Track processed truncation messages to avoid duplicates
-        
+
         // Use extracted preprocessing function to process attachments
         const result = await preprocessAttachments(
           userMessage.content,
@@ -498,24 +507,33 @@ export function ChatInterface() {
                   };
                   lastMsg.steps.push(step);
                 }
-                
+
                 // Handle truncation messages - buffer them instead of updating immediately
                 if (jsonData.type === "truncation") {
                   // Create a unique ID for this truncation message to avoid duplicates
-                  const truncationId = `${jsonData.filename || 'unknown'}_${jsonData.message || ''}`;
-                  
+                  const truncationId = `${jsonData.filename || "unknown"}_${
+                    jsonData.message || ""
+                  }`;
+
                   // Only add if not already processed
                   if (!processedTruncationIds.has(truncationId)) {
                     truncationBuffer.push(jsonData);
                     processedTruncationIds.add(truncationId);
                   }
                   return newMessages; // Don't update stepContent for truncation
                 }
-                
+
                 let stepContent = "";
                 switch (jsonData.type) {
                   case "progress":
-                    stepContent = jsonData.message;
+                    if (jsonData.message_data) {
+                      const i18nKey = getI18nKeyByType(jsonData.type);
+                      stepContent = String(
+                        t(i18nKey, jsonData.message_data.params)
+                      );
+                    } else {
+                      stepContent = jsonData.message || "";
+                    }
                     break;
                   case "error":
                     stepContent = t("chatInterface.parseFileFailed", {
@@ -531,14 +549,26 @@ export function ChatInterface() {
                   case "complete":
                     // When complete, process all buffered truncation messages
                     if (truncationBuffer.length > 0) {
-                      // Directly concatenate all truncation messages with internationalized separator
+                      // Process truncation messages using internationalization
                       const truncationInfo = truncationBuffer
-                        .map((truncation) => truncation.message)
-                        .join(t("chatInterface.truncationSeparator")); // Use internationalized separator
-                      
-                      stepContent = t("chatInterface.fileParsingCompleteWithTruncation", {
-                        truncationInfo: truncationInfo
-                      });
+                        .map((truncation) => {
+                          if (truncation.message_data) {
+                            const i18nKey = getI18nKeyByType(truncation.type);
+                            return String(
+                              t(i18nKey, truncation.message_data.params)
+                            );
+                          } else {
+                            return truncation.message;
+                          }
+                        })
+                        .join(String(t("chatInterface.truncationSeparator")));
+
+                      stepContent = t(
+                        "chatInterface.fileParsingCompleteWithTruncation",
+                        {
+                          truncationInfo: truncationInfo,
+                        }
+                      );
                     } else {
                       stepContent = t("chatInterface.fileParsingComplete");
                     }
@@ -1659,5 +1689,3 @@ export function ChatInterface() {
     </>
   );
 }
-
-
diff --git a/frontend/public/locales/en/common.json b/frontend/public/locales/en/common.json
@@ -21,6 +21,8 @@
     "chatInterface.createDialogFailedButContinue": "Failed to create new conversation, but will still attempt to send message:",
     "chatInterface.filePreprocessing": "File Preprocessing",
     "chatInterface.parsingFile": "Parsing file...",
+    "chatInterface.parsingFileWithProgress": "Parsing file {{index}}/{{total}}: {{filename}}",
+    "chatInterface.fileTruncated": "{{filename}} exceeds word limit, only read the first {{percentage}}%",
     "chatInterface.parseFileFailed": "Failed to parse file {{filename}}: {{message}}",
     "chatInterface.fileParsed": "File {{filename}} has been parsed successfully",
     "chatInterface.fileParsingComplete": "File parsing complete",
diff --git a/frontend/public/locales/zh/common.json b/frontend/public/locales/zh/common.json
@@ -21,6 +21,8 @@
     "chatInterface.createDialogFailedButContinue": "创建新对话失败，但仍会尝试发送消息:",
     "chatInterface.filePreprocessing": "文件预处理",
     "chatInterface.parsingFile": "正在解析文件…",
+    "chatInterface.parsingFileWithProgress": "正在解析文件 {{index}}/{{total}}: {{filename}}",
+    "chatInterface.fileTruncated": "{{filename}} 超出字数限制，只阅读了前 {{percentage}}%",
     "chatInterface.parseFileFailed": "解析文件 {{filename}} 失败: {{message}}",
     "chatInterface.fileParsed": "文件 {{filename}} 已解析完成",
     "chatInterface.fileParsingComplete": "文件解析完成",
diff --git a/test/backend/services/test_file_management_service.py b/test/backend/services/test_file_management_service.py
diff --git a/test/backend/utils/test_prompt_template_utils.py b/test/backend/utils/test_prompt_template_utils.py