1717from database .attachment_db import upload_fileobj , get_file_url , get_content_type , get_file_stream , delete_file , \
1818 list_files
1919from utils .attachment_utils import convert_image_to_text , convert_long_text_to_text
20+ from utils .prompt_template_utils import get_file_processing_messages_template
2021from utils .file_management_utils import save_upload_file
2122
2223# Create upload directory
@@ -140,41 +141,44 @@ async def list_files_impl(prefix: str, limit: Optional[int] = None):
140141 return files
141142
142143
143- def get_parsing_file_message ( language : str , index : int , total_files : int , filename : str ) -> str :
144+ def get_parsing_file_data ( index : int , total_files : int , filename : str ) -> dict :
144145 """
145- Get internationalized parsing file message
146-
146+ Get structured data for parsing file message
147+
147148 Args:
148- language: Language code ('zh' or 'en')
149149 index: Current file index (0-based)
150150 total_files: Total number of files
151151 filename: Name of the file being parsed
152-
152+
153153 Returns:
154- str: Internationalized message
154+ dict: Structured data with parameters for internationalization
155155 """
156- if language == 'zh' :
157- return f"正在解析文件 { index + 1 } /{ total_files } : { filename } "
158- else :
159- return f"Parsing file { index + 1 } /{ total_files } : { filename } "
156+ return {
157+ "params" : {
158+ "index" : index + 1 ,
159+ "total" : total_files ,
160+ "filename" : filename
161+ }
162+ }
160163
161164
162- def get_truncation_message ( language : str , filename : str , truncation_percentage : int ) -> str :
165+ def get_truncation_data ( filename : str , truncation_percentage : int ) -> dict :
163166 """
164- Get internationalized truncation message
165-
167+ Get structured data for truncation message
168+
166169 Args:
167- language: Language code ('zh' or 'en')
168170 filename: Name of the file being truncated
169171 truncation_percentage: Percentage of content that was read
170-
172+
171173 Returns:
172- str: Internationalized truncation message
174+ dict: Structured data with parameters for internationalization
173175 """
174- if language == 'zh' :
175- return f"{ filename } 超出字数限制,只阅读了前 { truncation_percentage } %"
176- else :
177- return f"{ filename } exceeds word limit, only read the first { truncation_percentage } %"
176+ return {
177+ "params" : {
178+ "filename" : filename ,
179+ "percentage" : truncation_percentage
180+ }
181+ }
178182
179183
180184async def preprocess_files_generator (
@@ -187,15 +191,15 @@ async def preprocess_files_generator(
187191) -> AsyncGenerator [str , None ]:
188192 """
189193 Generate streaming response for file preprocessing
190-
194+
191195 Args:
192196 query: User query string
193197 file_cache: List of cached file data
194198 tenant_id: Tenant ID
195199 language: Language preference
196200 task_id: Unique task ID
197201 conversation_id: Conversation ID
198-
202+
199203 Yields:
200204 str: JSON formatted streaming messages
201205 """
@@ -205,7 +209,8 @@ async def preprocess_files_generator(
205209 # Create and register the preprocess task
206210 task = asyncio .current_task ()
207211 if task :
208- preprocess_manager .register_preprocess_task (task_id , conversation_id , task )
212+ preprocess_manager .register_preprocess_task (
213+ task_id , conversation_id , task )
209214
210215 try :
211216 for index , file_data in enumerate (file_cache ):
@@ -217,7 +222,7 @@ async def preprocess_files_generator(
217222 progress_message = json .dumps ({
218223 "type" : "progress" ,
219224 "progress" : progress ,
220- "message " : get_parsing_file_message ( language , index , total_files , file_data ['filename' ])
225+ "message_data " : get_parsing_file_data ( index , total_files , file_data ['filename' ])
221226 }, ensure_ascii = False )
222227 yield f"data: { progress_message } \n \n "
223228 await asyncio .sleep (0.1 )
@@ -240,20 +245,19 @@ async def preprocess_files_generator(
240245 "filename" : file_data ["filename" ],
241246 "description" : description
242247 }
243- file_message = json .dumps (file_message_data , ensure_ascii = False )
248+ file_message = json .dumps (
249+ file_message_data , ensure_ascii = False )
244250 yield f"data: { file_message } \n \n "
245251 await asyncio .sleep (0.1 )
246-
252+
247253 # Send truncation notice immediately if file was truncated
248254 if truncation_percentage is not None and int (truncation_percentage ) < 100 :
249255 if int (truncation_percentage ) == 0 :
250256 truncation_percentage = "< 1"
251257
252- truncation_msg = get_truncation_message (language , file_data ['filename' ], truncation_percentage )
253-
254258 truncation_message = json .dumps ({
255259 "type" : "truncation" ,
256- "message " : truncation_msg
260+ "message_data " : get_truncation_data ( file_data [ 'filename' ], truncation_percentage )
257261 }, ensure_ascii = False )
258262 yield f"data: { truncation_message } \n \n "
259263 await asyncio .sleep (0.1 )
@@ -284,18 +288,24 @@ async def process_image_file(query: str, filename: str, file_content: bytes, ten
284288 """
285289 Process image file, convert to text using external API
286290 """
291+ # Load messages based on language
292+ messages = get_file_processing_messages_template (language )
293+
287294 try :
288295 image_stream = BytesIO (file_content )
289296 text = convert_image_to_text (query , image_stream , tenant_id , language )
290- return f"Image file { filename } content: { text } "
297+ return messages [ "IMAGE_CONTENT_SUCCESS" ]. format ( filename = filename , content = text )
291298 except Exception as e :
292- return f"Image file { filename } content: Error processing image file { filename } : { str (e )} "
299+ return messages [ "IMAGE_CONTENT_ERROR" ]. format ( filename = filename , error = str (e ))
293300
294301
295302async def process_text_file (query : str , filename : str , file_content : bytes , tenant_id : str , language : str = 'zh' ) -> tuple [str , Optional [str ]]:
296303 """
297304 Process text file, convert to text using external API
298305 """
306+ # Load messages based on language
307+ messages = get_file_processing_messages_template (language )
308+
299309 # file_content is byte data, need to send to API through file upload
300310 data_process_service_url = DATA_PROCESS_SERVICE
301311 api_url = f"{ data_process_service_url } /tasks/process_text_file"
@@ -319,21 +329,22 @@ async def process_text_file(query: str, filename: str, file_content: bytes, tena
319329 logger .info (
320330 f"File processed successfully: { raw_text [:200 ]} ...{ raw_text [- 200 :]} ..., length: { len (raw_text )} " )
321331 else :
322- error_detail = response .json ().get ('detail' , '未知错误 ' ) if response .headers .get (
332+ error_detail = response .json ().get ('detail' , 'unknown error ' ) if response .headers .get (
323333 'content-type' , '' ).startswith ('application/json' ) else response .text
324334 logger .error (
325335 f"File processing failed (status code: { response .status_code } ): { error_detail } " )
326336 raise Exception (
327- f"File processing failed (status code: { response .status_code } ): { error_detail } " )
337+ messages [ "FILE_PROCESSING_ERROR" ]. format ( status_code = response .status_code , error_detail = error_detail ) )
328338
329339 except Exception as e :
330- return f"File { filename } content: Error processing text file { filename } : { str (e )} " , None
340+ return messages [ "FILE_CONTENT_ERROR" ]. format ( filename = filename , error = str (e )) , None
331341
332342 try :
333- text , truncation_percentage = convert_long_text_to_text (query , raw_text , tenant_id , language )
334- return f"File { filename } content: { text } " , truncation_percentage
343+ text , truncation_percentage = convert_long_text_to_text (
344+ query , raw_text , tenant_id , language )
345+ return messages ["FILE_CONTENT_SUCCESS" ].format (filename = filename , content = text ), truncation_percentage
335346 except Exception as e :
336- return f"File { filename } content: Error processing text file { filename } : { str (e )} " , None
347+ return messages [ "FILE_CONTENT_ERROR" ]. format ( filename = filename , error = str (e )) , None
337348
338349
339350def get_file_description (files : List [UploadFile ]) -> str :
@@ -342,7 +353,7 @@ def get_file_description(files: List[UploadFile]) -> str:
342353 """
343354 if not files :
344355 return "User provided some reference files:\n No files provided"
345-
356+
346357 description = "User provided some reference files:\n "
347358 for file in files :
348359 ext = os .path .splitext (file .filename or "" )[1 ].lower ()
0 commit comments