Skip to content

Commit fa83bed

Browse files
committed
✨ Comments fix: Truncation information of files that are too large is displayed to the user #1119
1 parent 6730650 commit fa83bed

File tree

10 files changed

+215
-111
lines changed

10 files changed

+215
-111
lines changed

backend/apps/file_management_app.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -326,5 +326,3 @@ async def agent_preprocess_api(
326326
except Exception as e:
327327
raise HTTPException(
328328
status_code=500, detail=f"File preprocessing error: {str(e)}")
329-
330-
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
FILE_CONTENT_SUCCESS: "文件 {filename} 内容: {content}"
2+
FILE_CONTENT_ERROR: "文件 {filename} 内容: 处理文本文件 {filename} 时出错: {error}"
3+
FILE_PROCESSING_ERROR: "文件处理失败 (状态码: {status_code}): {error_detail}"
4+
IMAGE_CONTENT_SUCCESS: "图片文件 {filename} 内容: {content}"
5+
IMAGE_CONTENT_ERROR: "图片文件 {filename} 内容: 处理图片文件 {filename} 时出错: {error}"
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
FILE_CONTENT_SUCCESS: "File {filename} content: {content}"
2+
FILE_CONTENT_ERROR: "File {filename} content: Error processing text file {filename}: {error}"
3+
FILE_PROCESSING_ERROR: "File processing failed (status code: {status_code}): {error_detail}"
4+
IMAGE_CONTENT_SUCCESS: "Image file {filename} content: {content}"
5+
IMAGE_CONTENT_ERROR: "Image file {filename} content: Error processing image file {filename}: {error}"

backend/services/file_management_service.py

Lines changed: 49 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from database.attachment_db import upload_fileobj, get_file_url, get_content_type, get_file_stream, delete_file, \
1818
list_files
1919
from utils.attachment_utils import convert_image_to_text, convert_long_text_to_text
20+
from utils.prompt_template_utils import get_file_processing_messages_template
2021
from utils.file_management_utils import save_upload_file
2122

2223
# Create upload directory
@@ -140,41 +141,44 @@ async def list_files_impl(prefix: str, limit: Optional[int] = None):
140141
return files
141142

142143

143-
def get_parsing_file_message(language: str, index: int, total_files: int, filename: str) -> str:
144+
def get_parsing_file_data(index: int, total_files: int, filename: str) -> dict:
144145
"""
145-
Get internationalized parsing file message
146-
146+
Get structured data for parsing file message
147+
147148
Args:
148-
language: Language code ('zh' or 'en')
149149
index: Current file index (0-based)
150150
total_files: Total number of files
151151
filename: Name of the file being parsed
152-
152+
153153
Returns:
154-
str: Internationalized message
154+
dict: Structured data with parameters for internationalization
155155
"""
156-
if language == 'zh':
157-
return f"正在解析文件 {index + 1}/{total_files}: {filename}"
158-
else:
159-
return f"Parsing file {index + 1}/{total_files}: {filename}"
156+
return {
157+
"params": {
158+
"index": index + 1,
159+
"total": total_files,
160+
"filename": filename
161+
}
162+
}
160163

161164

162-
def get_truncation_message(language: str, filename: str, truncation_percentage: int) -> str:
165+
def get_truncation_data(filename: str, truncation_percentage: int) -> dict:
163166
"""
164-
Get internationalized truncation message
165-
167+
Get structured data for truncation message
168+
166169
Args:
167-
language: Language code ('zh' or 'en')
168170
filename: Name of the file being truncated
169171
truncation_percentage: Percentage of content that was read
170-
172+
171173
Returns:
172-
str: Internationalized truncation message
174+
dict: Structured data with parameters for internationalization
173175
"""
174-
if language == 'zh':
175-
return f"{filename} 超出字数限制,只阅读了前 {truncation_percentage}%"
176-
else:
177-
return f"{filename} exceeds word limit, only read the first {truncation_percentage}%"
176+
return {
177+
"params": {
178+
"filename": filename,
179+
"percentage": truncation_percentage
180+
}
181+
}
178182

179183

180184
async def preprocess_files_generator(
@@ -187,15 +191,15 @@ async def preprocess_files_generator(
187191
) -> AsyncGenerator[str, None]:
188192
"""
189193
Generate streaming response for file preprocessing
190-
194+
191195
Args:
192196
query: User query string
193197
file_cache: List of cached file data
194198
tenant_id: Tenant ID
195199
language: Language preference
196200
task_id: Unique task ID
197201
conversation_id: Conversation ID
198-
202+
199203
Yields:
200204
str: JSON formatted streaming messages
201205
"""
@@ -205,7 +209,8 @@ async def preprocess_files_generator(
205209
# Create and register the preprocess task
206210
task = asyncio.current_task()
207211
if task:
208-
preprocess_manager.register_preprocess_task(task_id, conversation_id, task)
212+
preprocess_manager.register_preprocess_task(
213+
task_id, conversation_id, task)
209214

210215
try:
211216
for index, file_data in enumerate(file_cache):
@@ -217,7 +222,7 @@ async def preprocess_files_generator(
217222
progress_message = json.dumps({
218223
"type": "progress",
219224
"progress": progress,
220-
"message": get_parsing_file_message(language, index, total_files, file_data['filename'])
225+
"message_data": get_parsing_file_data(index, total_files, file_data['filename'])
221226
}, ensure_ascii=False)
222227
yield f"data: {progress_message}\n\n"
223228
await asyncio.sleep(0.1)
@@ -240,20 +245,19 @@ async def preprocess_files_generator(
240245
"filename": file_data["filename"],
241246
"description": description
242247
}
243-
file_message = json.dumps(file_message_data, ensure_ascii=False)
248+
file_message = json.dumps(
249+
file_message_data, ensure_ascii=False)
244250
yield f"data: {file_message}\n\n"
245251
await asyncio.sleep(0.1)
246-
252+
247253
# Send truncation notice immediately if file was truncated
248254
if truncation_percentage is not None and int(truncation_percentage) < 100:
249255
if int(truncation_percentage) == 0:
250256
truncation_percentage = "< 1"
251257

252-
truncation_msg = get_truncation_message(language, file_data['filename'], truncation_percentage)
253-
254258
truncation_message = json.dumps({
255259
"type": "truncation",
256-
"message": truncation_msg
260+
"message_data": get_truncation_data(file_data['filename'], truncation_percentage)
257261
}, ensure_ascii=False)
258262
yield f"data: {truncation_message}\n\n"
259263
await asyncio.sleep(0.1)
@@ -284,18 +288,24 @@ async def process_image_file(query: str, filename: str, file_content: bytes, ten
284288
"""
285289
Process image file, convert to text using external API
286290
"""
291+
# Load messages based on language
292+
messages = get_file_processing_messages_template(language)
293+
287294
try:
288295
image_stream = BytesIO(file_content)
289296
text = convert_image_to_text(query, image_stream, tenant_id, language)
290-
return f"Image file {filename} content: {text}"
297+
return messages["IMAGE_CONTENT_SUCCESS"].format(filename=filename, content=text)
291298
except Exception as e:
292-
return f"Image file {filename} content: Error processing image file {filename}: {str(e)}"
299+
return messages["IMAGE_CONTENT_ERROR"].format(filename=filename, error=str(e))
293300

294301

295302
async def process_text_file(query: str, filename: str, file_content: bytes, tenant_id: str, language: str = 'zh') -> tuple[str, Optional[str]]:
296303
"""
297304
Process text file, convert to text using external API
298305
"""
306+
# Load messages based on language
307+
messages = get_file_processing_messages_template(language)
308+
299309
# file_content is byte data, need to send to API through file upload
300310
data_process_service_url = DATA_PROCESS_SERVICE
301311
api_url = f"{data_process_service_url}/tasks/process_text_file"
@@ -319,21 +329,22 @@ async def process_text_file(query: str, filename: str, file_content: bytes, tena
319329
logger.info(
320330
f"File processed successfully: {raw_text[:200]}...{raw_text[-200:]}..., length: {len(raw_text)}")
321331
else:
322-
error_detail = response.json().get('detail', '未知错误') if response.headers.get(
332+
error_detail = response.json().get('detail', 'unknown error') if response.headers.get(
323333
'content-type', '').startswith('application/json') else response.text
324334
logger.error(
325335
f"File processing failed (status code: {response.status_code}): {error_detail}")
326336
raise Exception(
327-
f"File processing failed (status code: {response.status_code}): {error_detail}")
337+
messages["FILE_PROCESSING_ERROR"].format(status_code=response.status_code, error_detail=error_detail))
328338

329339
except Exception as e:
330-
return f"File {filename} content: Error processing text file {filename}: {str(e)}", None
340+
return messages["FILE_CONTENT_ERROR"].format(filename=filename, error=str(e)), None
331341

332342
try:
333-
text, truncation_percentage = convert_long_text_to_text(query, raw_text, tenant_id, language)
334-
return f"File {filename} content: {text}", truncation_percentage
343+
text, truncation_percentage = convert_long_text_to_text(
344+
query, raw_text, tenant_id, language)
345+
return messages["FILE_CONTENT_SUCCESS"].format(filename=filename, content=text), truncation_percentage
335346
except Exception as e:
336-
return f"File {filename} content: Error processing text file {filename}: {str(e)}", None
347+
return messages["FILE_CONTENT_ERROR"].format(filename=filename, error=str(e)), None
337348

338349

339350
def get_file_description(files: List[UploadFile]) -> str:
@@ -342,7 +353,7 @@ def get_file_description(files: List[UploadFile]) -> str:
342353
"""
343354
if not files:
344355
return "User provided some reference files:\nNo files provided"
345-
356+
346357
description = "User provided some reference files:\n"
347358
for file in files:
348359
ext = os.path.splitext(file.filename or "")[1].lower()

backend/utils/prompt_template_utils.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import yaml
22
from typing import Dict, Any
33
import logging
4+
import os
45
logger = logging.getLogger("prompt_template_utils")
56

67

@@ -15,6 +16,7 @@ def get_prompt_template(template_type: str, language: str = 'zh', **kwargs) -> D
1516
- 'knowledge_summary': Knowledge summary template
1617
- 'analyze_file': File analysis template
1718
- 'generate_title': Title generation template
19+
- 'file_processing_messages': File processing messages template
1820
language: Language code ('zh' or 'en')
1921
**kwargs: Additional parameters, for agent type need to pass is_manager parameter
2022
@@ -51,6 +53,10 @@ def get_prompt_template(template_type: str, language: str = 'zh', **kwargs) -> D
5153
'generate_title': {
5254
'zh': 'backend/prompts/utils/generate_title.yaml',
5355
'en': 'backend/prompts/utils/generate_title_en.yaml'
56+
},
57+
'file_processing_messages': {
58+
'zh': 'backend/prompts/utils/file_processing_messages.yaml',
59+
'en': 'backend/prompts/utils/file_processing_messages_en.yaml'
5460
}
5561
}
5662

@@ -65,8 +71,14 @@ def get_prompt_template(template_type: str, language: str = 'zh', **kwargs) -> D
6571
else:
6672
template_path = template_paths[template_type][language]
6773

74+
# Get the directory of this file and construct absolute path
75+
current_dir = os.path.dirname(os.path.abspath(__file__))
76+
# Go up one level from utils to backend, then use the template path
77+
backend_dir = os.path.dirname(current_dir)
78+
absolute_template_path = os.path.join(backend_dir, template_path.replace('backend/', ''))
79+
6880
# Read and return template content
69-
with open(template_path, 'r', encoding='utf-8') as f:
81+
with open(absolute_template_path, 'r', encoding='utf-8') as f:
7082
return yaml.safe_load(f)
7183

7284

@@ -135,3 +147,16 @@ def get_generate_title_prompt_template(language: str = 'zh') -> Dict[str, Any]:
135147
dict: Loaded prompt template configuration
136148
"""
137149
return get_prompt_template('generate_title', language)
150+
151+
152+
def get_file_processing_messages_template(language: str = 'zh') -> Dict[str, Any]:
153+
"""
154+
Get file processing messages template
155+
156+
Args:
157+
language: Language code ('zh' or 'en')
158+
159+
Returns:
160+
dict: Loaded file processing messages configuration
161+
"""
162+
return get_prompt_template('file_processing_messages', language)

frontend/app/[locale]/chat/internal/chatInterface.tsx

Lines changed: 43 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,15 @@ import { X } from "lucide-react";
4646

4747
const stepIdCounter = { current: 0 };
4848

49+
// Get internationalization key based on message type
50+
const getI18nKeyByType = (type: string): string => {
51+
const typeToKeyMap: Record<string, string> = {
52+
"progress": "chatInterface.parsingFileWithProgress",
53+
"truncation": "chatInterface.fileTruncated",
54+
};
55+
return typeToKeyMap[type] || "";
56+
};
57+
4958
export function ChatInterface() {
5059
const router = useRouter();
5160
const { user } = useAuth(); // Get user information
@@ -457,7 +466,7 @@ export function ChatInterface() {
457466
// Buffer for truncation messages with deduplication
458467
const truncationBuffer: any[] = [];
459468
const processedTruncationIds = new Set<string>(); // Track processed truncation messages to avoid duplicates
460-
469+
461470
// Use extracted preprocessing function to process attachments
462471
const result = await preprocessAttachments(
463472
userMessage.content,
@@ -498,24 +507,33 @@ export function ChatInterface() {
498507
};
499508
lastMsg.steps.push(step);
500509
}
501-
510+
502511
// Handle truncation messages - buffer them instead of updating immediately
503512
if (jsonData.type === "truncation") {
504513
// Create a unique ID for this truncation message to avoid duplicates
505-
const truncationId = `${jsonData.filename || 'unknown'}_${jsonData.message || ''}`;
506-
514+
const truncationId = `${jsonData.filename || "unknown"}_${
515+
jsonData.message || ""
516+
}`;
517+
507518
// Only add if not already processed
508519
if (!processedTruncationIds.has(truncationId)) {
509520
truncationBuffer.push(jsonData);
510521
processedTruncationIds.add(truncationId);
511522
}
512523
return newMessages; // Don't update stepContent for truncation
513524
}
514-
525+
515526
let stepContent = "";
516527
switch (jsonData.type) {
517528
case "progress":
518-
stepContent = jsonData.message;
529+
if (jsonData.message_data) {
530+
const i18nKey = getI18nKeyByType(jsonData.type);
531+
stepContent = String(
532+
t(i18nKey, jsonData.message_data.params)
533+
);
534+
} else {
535+
stepContent = jsonData.message || "";
536+
}
519537
break;
520538
case "error":
521539
stepContent = t("chatInterface.parseFileFailed", {
@@ -531,14 +549,26 @@ export function ChatInterface() {
531549
case "complete":
532550
// When complete, process all buffered truncation messages
533551
if (truncationBuffer.length > 0) {
534-
// Directly concatenate all truncation messages with internationalized separator
552+
// Process truncation messages using internationalization
535553
const truncationInfo = truncationBuffer
536-
.map((truncation) => truncation.message)
537-
.join(t("chatInterface.truncationSeparator")); // Use internationalized separator
538-
539-
stepContent = t("chatInterface.fileParsingCompleteWithTruncation", {
540-
truncationInfo: truncationInfo
541-
});
554+
.map((truncation) => {
555+
if (truncation.message_data) {
556+
const i18nKey = getI18nKeyByType(truncation.type);
557+
return String(
558+
t(i18nKey, truncation.message_data.params)
559+
);
560+
} else {
561+
return truncation.message;
562+
}
563+
})
564+
.join(String(t("chatInterface.truncationSeparator")));
565+
566+
stepContent = t(
567+
"chatInterface.fileParsingCompleteWithTruncation",
568+
{
569+
truncationInfo: truncationInfo,
570+
}
571+
);
542572
} else {
543573
stepContent = t("chatInterface.fileParsingComplete");
544574
}
@@ -1659,5 +1689,3 @@ export function ChatInterface() {
16591689
</>
16601690
);
16611691
}
1662-
1663-

frontend/public/locales/en/common.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
"chatInterface.createDialogFailedButContinue": "Failed to create new conversation, but will still attempt to send message:",
2222
"chatInterface.filePreprocessing": "File Preprocessing",
2323
"chatInterface.parsingFile": "Parsing file...",
24+
"chatInterface.parsingFileWithProgress": "Parsing file {{index}}/{{total}}: {{filename}}",
25+
"chatInterface.fileTruncated": "{{filename}} exceeds word limit, only read the first {{percentage}}%",
2426
"chatInterface.parseFileFailed": "Failed to parse file {{filename}}: {{message}}",
2527
"chatInterface.fileParsed": "File {{filename}} has been parsed successfully",
2628
"chatInterface.fileParsingComplete": "File parsing complete",

frontend/public/locales/zh/common.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
"chatInterface.createDialogFailedButContinue": "创建新对话失败,但仍会尝试发送消息:",
2222
"chatInterface.filePreprocessing": "文件预处理",
2323
"chatInterface.parsingFile": "正在解析文件…",
24+
"chatInterface.parsingFileWithProgress": "正在解析文件 {{index}}/{{total}}: {{filename}}",
25+
"chatInterface.fileTruncated": "{{filename}} 超出字数限制,只阅读了前 {{percentage}}%",
2426
"chatInterface.parseFileFailed": "解析文件 {{filename}} 失败: {{message}}",
2527
"chatInterface.fileParsed": "文件 {{filename}} 已解析完成",
2628
"chatInterface.fileParsingComplete": "文件解析完成",

0 commit comments

Comments
 (0)