@@ -140,41 +140,44 @@ async def list_files_impl(prefix: str, limit: Optional[int] = None):
140140 return files
141141
142142
143- def get_parsing_file_message ( language : str , index : int , total_files : int , filename : str ) -> str :
143+ def get_parsing_file_data ( index : int , total_files : int , filename : str ) -> dict :
144144 """
145- Get internationalized parsing file message
146-
145+ Get structured data for parsing file message
146+
147147 Args:
148- language: Language code ('zh' or 'en')
149148 index: Current file index (0-based)
150149 total_files: Total number of files
151150 filename: Name of the file being parsed
152-
151+
153152 Returns:
154- str: Internationalized message
153+ dict: Structured data with parameters for internationalization
155154 """
156- if language == 'zh' :
157- return f"正在解析文件 { index + 1 } /{ total_files } : { filename } "
158- else :
159- return f"Parsing file { index + 1 } /{ total_files } : { filename } "
155+ return {
156+ "params" : {
157+ "index" : index + 1 ,
158+ "total" : total_files ,
159+ "filename" : filename
160+ }
161+ }
160162
161163
162- def get_truncation_message ( language : str , filename : str , truncation_percentage : int ) -> str :
164+ def get_truncation_data ( filename : str , truncation_percentage : int ) -> dict :
163165 """
164- Get internationalized truncation message
165-
166+ Get structured data for truncation message
167+
166168 Args:
167- language: Language code ('zh' or 'en')
168169 filename: Name of the file being truncated
169170 truncation_percentage: Percentage of content that was read
170-
171+
171172 Returns:
172- str: Internationalized truncation message
173+ dict: Structured data with parameters for internationalization
173174 """
174- if language == 'zh' :
175- return f"{ filename } 超出字数限制,只阅读了前 { truncation_percentage } %"
176- else :
177- return f"{ filename } exceeds word limit, only read the first { truncation_percentage } %"
175+ return {
176+ "params" : {
177+ "filename" : filename ,
178+ "percentage" : truncation_percentage
179+ }
180+ }
178181
179182
180183async def preprocess_files_generator (
@@ -187,15 +190,15 @@ async def preprocess_files_generator(
187190) -> AsyncGenerator [str , None ]:
188191 """
189192 Generate streaming response for file preprocessing
190-
193+
191194 Args:
192195 query: User query string
193196 file_cache: List of cached file data
194197 tenant_id: Tenant ID
195198 language: Language preference
196199 task_id: Unique task ID
197200 conversation_id: Conversation ID
198-
201+
199202 Yields:
200203 str: JSON formatted streaming messages
201204 """
@@ -205,7 +208,8 @@ async def preprocess_files_generator(
205208 # Create and register the preprocess task
206209 task = asyncio .current_task ()
207210 if task :
208- preprocess_manager .register_preprocess_task (task_id , conversation_id , task )
211+ preprocess_manager .register_preprocess_task (
212+ task_id , conversation_id , task )
209213
210214 try :
211215 for index , file_data in enumerate (file_cache ):
@@ -217,7 +221,7 @@ async def preprocess_files_generator(
217221 progress_message = json .dumps ({
218222 "type" : "progress" ,
219223 "progress" : progress ,
220- "message " : get_parsing_file_message ( language , index , total_files , file_data ['filename' ])
224+ "message_data " : get_parsing_file_data ( index , total_files , file_data ['filename' ])
221225 }, ensure_ascii = False )
222226 yield f"data: { progress_message } \n \n "
223227 await asyncio .sleep (0.1 )
@@ -240,20 +244,19 @@ async def preprocess_files_generator(
240244 "filename" : file_data ["filename" ],
241245 "description" : description
242246 }
243- file_message = json .dumps (file_message_data , ensure_ascii = False )
247+ file_message = json .dumps (
248+ file_message_data , ensure_ascii = False )
244249 yield f"data: { file_message } \n \n "
245250 await asyncio .sleep (0.1 )
246-
251+
247252 # Send truncation notice immediately if file was truncated
248253 if truncation_percentage is not None and int (truncation_percentage ) < 100 :
249254 if int (truncation_percentage ) == 0 :
250255 truncation_percentage = "< 1"
251256
252- truncation_msg = get_truncation_message (language , file_data ['filename' ], truncation_percentage )
253-
254257 truncation_message = json .dumps ({
255258 "type" : "truncation" ,
256- "message " : truncation_msg
259+ "message_data " : get_truncation_data ( file_data [ 'filename' ], truncation_percentage )
257260 }, ensure_ascii = False )
258261 yield f"data: { truncation_message } \n \n "
259262 await asyncio .sleep (0.1 )
@@ -319,7 +322,7 @@ async def process_text_file(query: str, filename: str, file_content: bytes, tena
319322 logger .info (
320323 f"File processed successfully: { raw_text [:200 ]} ...{ raw_text [- 200 :]} ..., length: { len (raw_text )} " )
321324 else :
322- error_detail = response .json ().get ('detail' , '未知错误 ' ) if response .headers .get (
325+ error_detail = response .json ().get ('detail' , 'unknown error ' ) if response .headers .get (
323326 'content-type' , '' ).startswith ('application/json' ) else response .text
324327 logger .error (
325328 f"File processing failed (status code: { response .status_code } ): { error_detail } " )
@@ -330,7 +333,8 @@ async def process_text_file(query: str, filename: str, file_content: bytes, tena
330333 return f"File { filename } content: Error processing text file { filename } : { str (e )} " , None
331334
332335 try :
333- text , truncation_percentage = convert_long_text_to_text (query , raw_text , tenant_id , language )
336+ text , truncation_percentage = convert_long_text_to_text (
337+ query , raw_text , tenant_id , language )
334338 return f"File { filename } content: { text } " , truncation_percentage
335339 except Exception as e :
336340 return f"File { filename } content: Error processing text file { filename } : { str (e )} " , None
@@ -342,7 +346,7 @@ def get_file_description(files: List[UploadFile]) -> str:
342346 """
343347 if not files :
344348 return "User provided some reference files:\n No files provided"
345-
349+
346350 description = "User provided some reference files:\n "
347351 for file in files :
348352 ext = os .path .splitext (file .filename or "" )[1 ].lower ()
0 commit comments