Skip to content

Commit 800798f

Browse files
committed
✨ Comments fix: Truncation information of files that are too large is displayed to the user #1119
1 parent 6730650 commit 800798f

File tree

6 files changed

+1906
-1812
lines changed

6 files changed

+1906
-1812
lines changed

backend/apps/file_management_app.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -326,5 +326,3 @@ async def agent_preprocess_api(
326326
except Exception as e:
327327
raise HTTPException(
328328
status_code=500, detail=f"File preprocessing error: {str(e)}")
329-
330-

backend/services/file_management_service.py

Lines changed: 36 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -140,41 +140,44 @@ async def list_files_impl(prefix: str, limit: Optional[int] = None):
140140
return files
141141

142142

143-
def get_parsing_file_message(language: str, index: int, total_files: int, filename: str) -> str:
143+
def get_parsing_file_data(index: int, total_files: int, filename: str) -> dict:
144144
"""
145-
Get internationalized parsing file message
146-
145+
Get structured data for parsing file message
146+
147147
Args:
148-
language: Language code ('zh' or 'en')
149148
index: Current file index (0-based)
150149
total_files: Total number of files
151150
filename: Name of the file being parsed
152-
151+
153152
Returns:
154-
str: Internationalized message
153+
dict: Structured data with parameters for internationalization
155154
"""
156-
if language == 'zh':
157-
return f"正在解析文件 {index + 1}/{total_files}: {filename}"
158-
else:
159-
return f"Parsing file {index + 1}/{total_files}: {filename}"
155+
return {
156+
"params": {
157+
"index": index + 1,
158+
"total": total_files,
159+
"filename": filename
160+
}
161+
}
160162

161163

162-
def get_truncation_message(language: str, filename: str, truncation_percentage: int) -> str:
164+
def get_truncation_data(filename: str, truncation_percentage: int) -> dict:
163165
"""
164-
Get internationalized truncation message
165-
166+
Get structured data for truncation message
167+
166168
Args:
167-
language: Language code ('zh' or 'en')
168169
filename: Name of the file being truncated
169170
truncation_percentage: Percentage of content that was read
170-
171+
171172
Returns:
172-
str: Internationalized truncation message
173+
dict: Structured data with parameters for internationalization
173174
"""
174-
if language == 'zh':
175-
return f"{filename} 超出字数限制,只阅读了前 {truncation_percentage}%"
176-
else:
177-
return f"{filename} exceeds word limit, only read the first {truncation_percentage}%"
175+
return {
176+
"params": {
177+
"filename": filename,
178+
"percentage": truncation_percentage
179+
}
180+
}
178181

179182

180183
async def preprocess_files_generator(
@@ -187,15 +190,15 @@ async def preprocess_files_generator(
187190
) -> AsyncGenerator[str, None]:
188191
"""
189192
Generate streaming response for file preprocessing
190-
193+
191194
Args:
192195
query: User query string
193196
file_cache: List of cached file data
194197
tenant_id: Tenant ID
195198
language: Language preference
196199
task_id: Unique task ID
197200
conversation_id: Conversation ID
198-
201+
199202
Yields:
200203
str: JSON formatted streaming messages
201204
"""
@@ -205,7 +208,8 @@ async def preprocess_files_generator(
205208
# Create and register the preprocess task
206209
task = asyncio.current_task()
207210
if task:
208-
preprocess_manager.register_preprocess_task(task_id, conversation_id, task)
211+
preprocess_manager.register_preprocess_task(
212+
task_id, conversation_id, task)
209213

210214
try:
211215
for index, file_data in enumerate(file_cache):
@@ -217,7 +221,7 @@ async def preprocess_files_generator(
217221
progress_message = json.dumps({
218222
"type": "progress",
219223
"progress": progress,
220-
"message": get_parsing_file_message(language, index, total_files, file_data['filename'])
224+
"message_data": get_parsing_file_data(index, total_files, file_data['filename'])
221225
}, ensure_ascii=False)
222226
yield f"data: {progress_message}\n\n"
223227
await asyncio.sleep(0.1)
@@ -240,20 +244,19 @@ async def preprocess_files_generator(
240244
"filename": file_data["filename"],
241245
"description": description
242246
}
243-
file_message = json.dumps(file_message_data, ensure_ascii=False)
247+
file_message = json.dumps(
248+
file_message_data, ensure_ascii=False)
244249
yield f"data: {file_message}\n\n"
245250
await asyncio.sleep(0.1)
246-
251+
247252
# Send truncation notice immediately if file was truncated
248253
if truncation_percentage is not None and int(truncation_percentage) < 100:
249254
if int(truncation_percentage) == 0:
250255
truncation_percentage = "< 1"
251256

252-
truncation_msg = get_truncation_message(language, file_data['filename'], truncation_percentage)
253-
254257
truncation_message = json.dumps({
255258
"type": "truncation",
256-
"message": truncation_msg
259+
"message_data": get_truncation_data(file_data['filename'], truncation_percentage)
257260
}, ensure_ascii=False)
258261
yield f"data: {truncation_message}\n\n"
259262
await asyncio.sleep(0.1)
@@ -319,7 +322,7 @@ async def process_text_file(query: str, filename: str, file_content: bytes, tena
319322
logger.info(
320323
f"File processed successfully: {raw_text[:200]}...{raw_text[-200:]}..., length: {len(raw_text)}")
321324
else:
322-
error_detail = response.json().get('detail', '未知错误') if response.headers.get(
325+
error_detail = response.json().get('detail', 'unknown error') if response.headers.get(
323326
'content-type', '').startswith('application/json') else response.text
324327
logger.error(
325328
f"File processing failed (status code: {response.status_code}): {error_detail}")
@@ -330,7 +333,8 @@ async def process_text_file(query: str, filename: str, file_content: bytes, tena
330333
return f"File {filename} content: Error processing text file {filename}: {str(e)}", None
331334

332335
try:
333-
text, truncation_percentage = convert_long_text_to_text(query, raw_text, tenant_id, language)
336+
text, truncation_percentage = convert_long_text_to_text(
337+
query, raw_text, tenant_id, language)
334338
return f"File {filename} content: {text}", truncation_percentage
335339
except Exception as e:
336340
return f"File {filename} content: Error processing text file {filename}: {str(e)}", None
@@ -342,7 +346,7 @@ def get_file_description(files: List[UploadFile]) -> str:
342346
"""
343347
if not files:
344348
return "User provided some reference files:\nNo files provided"
345-
349+
346350
description = "User provided some reference files:\n"
347351
for file in files:
348352
ext = os.path.splitext(file.filename or "")[1].lower()

frontend/app/[locale]/chat/internal/chatInterface.tsx

Lines changed: 43 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,15 @@ import { X } from "lucide-react";
4646

4747
const stepIdCounter = { current: 0 };
4848

49+
// Get internationalization key based on message type
50+
const getI18nKeyByType = (type: string): string => {
51+
const typeToKeyMap: Record<string, string> = {
52+
"progress": "chatInterface.parsingFileWithProgress",
53+
"truncation": "chatInterface.fileTruncated",
54+
};
55+
return typeToKeyMap[type] || "";
56+
};
57+
4958
export function ChatInterface() {
5059
const router = useRouter();
5160
const { user } = useAuth(); // Get user information
@@ -457,7 +466,7 @@ export function ChatInterface() {
457466
// Buffer for truncation messages with deduplication
458467
const truncationBuffer: any[] = [];
459468
const processedTruncationIds = new Set<string>(); // Track processed truncation messages to avoid duplicates
460-
469+
461470
// Use extracted preprocessing function to process attachments
462471
const result = await preprocessAttachments(
463472
userMessage.content,
@@ -498,24 +507,33 @@ export function ChatInterface() {
498507
};
499508
lastMsg.steps.push(step);
500509
}
501-
510+
502511
// Handle truncation messages - buffer them instead of updating immediately
503512
if (jsonData.type === "truncation") {
504513
// Create a unique ID for this truncation message to avoid duplicates
505-
const truncationId = `${jsonData.filename || 'unknown'}_${jsonData.message || ''}`;
506-
514+
const truncationId = `${jsonData.filename || "unknown"}_${
515+
jsonData.message || ""
516+
}`;
517+
507518
// Only add if not already processed
508519
if (!processedTruncationIds.has(truncationId)) {
509520
truncationBuffer.push(jsonData);
510521
processedTruncationIds.add(truncationId);
511522
}
512523
return newMessages; // Don't update stepContent for truncation
513524
}
514-
525+
515526
let stepContent = "";
516527
switch (jsonData.type) {
517528
case "progress":
518-
stepContent = jsonData.message;
529+
if (jsonData.message_data) {
530+
const i18nKey = getI18nKeyByType(jsonData.type);
531+
stepContent = String(
532+
t(i18nKey, jsonData.message_data.params)
533+
);
534+
} else {
535+
stepContent = jsonData.message || "";
536+
}
519537
break;
520538
case "error":
521539
stepContent = t("chatInterface.parseFileFailed", {
@@ -531,14 +549,26 @@ export function ChatInterface() {
531549
case "complete":
532550
// When complete, process all buffered truncation messages
533551
if (truncationBuffer.length > 0) {
534-
// Directly concatenate all truncation messages with internationalized separator
552+
// Process truncation messages using internationalization
535553
const truncationInfo = truncationBuffer
536-
.map((truncation) => truncation.message)
537-
.join(t("chatInterface.truncationSeparator")); // Use internationalized separator
538-
539-
stepContent = t("chatInterface.fileParsingCompleteWithTruncation", {
540-
truncationInfo: truncationInfo
541-
});
554+
.map((truncation) => {
555+
if (truncation.message_data) {
556+
const i18nKey = getI18nKeyByType(truncation.type);
557+
return String(
558+
t(i18nKey, truncation.message_data.params)
559+
);
560+
} else {
561+
return truncation.message;
562+
}
563+
})
564+
.join(String(t("chatInterface.truncationSeparator")));
565+
566+
stepContent = t(
567+
"chatInterface.fileParsingCompleteWithTruncation",
568+
{
569+
truncationInfo: truncationInfo,
570+
}
571+
);
542572
} else {
543573
stepContent = t("chatInterface.fileParsingComplete");
544574
}
@@ -1659,5 +1689,3 @@ export function ChatInterface() {
16591689
</>
16601690
);
16611691
}
1662-
1663-

0 commit comments

Comments
 (0)