2929from dataset .models import Paragraph , Status , Document , ProblemParagraphMapping , TaskType , State
3030from embedding .models import SourceType , SearchMode
3131from smartdoc .conf import PROJECT_DIR
32+ from django .utils .translation import gettext_lazy as _
3233
3334max_kb_error = logging .getLogger (__file__ )
3435max_kb = logging .getLogger (__file__ )
@@ -90,11 +91,12 @@ def embedding_by_paragraph_list(paragraph_id_list, embedding_model: Embeddings):
9091 ListenerManagement .embedding_by_paragraph_data_list (data_list , paragraph_id_list = paragraph_id_list ,
9192 embedding_model = embedding_model )
9293 except Exception as e :
93- max_kb_error .error (f'查询向量数据:{ paragraph_id_list } 出现错误{ str (e )} { traceback .format_exc ()} ' )
94+ max_kb_error .error (_ ('Query vector data: {paragraph_id_list} error {error} {traceback}' ).format (
95+ paragraph_id_list = paragraph_id_list , error = str (e ), traceback = traceback .format_exc ()))
9496
9597 @staticmethod
9698 def embedding_by_paragraph_data_list (data_list , paragraph_id_list , embedding_model : Embeddings ):
97- max_kb .info (f'开始 --->向量化段落: { paragraph_id_list } ' )
99+ max_kb .info (_ ( 'Start --->Embedding paragraph: {paragraph_id_list}'). format ( paragraph_id_list = paragraph_id_list ) )
98100 status = Status .success
99101 try :
100102 # 删除段落
@@ -106,11 +108,13 @@ def is_save_function():
106108 # 批量向量化
107109 VectorStore .get_embedding_vector ().batch_save (data_list , embedding_model , is_save_function )
108110 except Exception as e :
109- max_kb_error .error (f'向量化段落:{ paragraph_id_list } 出现错误{ str (e )} { traceback .format_exc ()} ' )
111+ max_kb_error .error (_ ('Vectorized paragraph: {paragraph_id_list} error {error} {traceback}' ).format (
112+ paragraph_id_list = paragraph_id_list , error = str (e ), traceback = traceback .format_exc ()))
110113 status = Status .error
111114 finally :
112115 QuerySet (Paragraph ).filter (id__in = paragraph_id_list ).update (** {'status' : status })
113- max_kb .info (f'结束--->向量化段落:{ paragraph_id_list } ' )
116+ max_kb .info (
117+ _ ('End--->Embedding paragraph: {paragraph_id_list}' ).format (paragraph_id_list = paragraph_id_list ))
114118
115119 @staticmethod
116120 def embedding_by_paragraph (paragraph_id , embedding_model : Embeddings ):
@@ -119,7 +123,7 @@ def embedding_by_paragraph(paragraph_id, embedding_model: Embeddings):
119123 @param paragraph_id: 段落id
120124 @param embedding_model: 向量模型
121125 """
122- max_kb .info (f"开始 --->向量化段落: { paragraph_id } " )
126+ max_kb .info (_ ( 'Start --->Embedding paragraph: {paragraph_id}' ). format ( paragraph_id = paragraph_id ) )
123127 # 更新到开始状态
124128 ListenerManagement .update_status (QuerySet (Paragraph ).filter (id = paragraph_id ), TaskType .EMBEDDING , State .STARTED )
125129 try :
@@ -144,11 +148,12 @@ def is_the_task_interrupted():
144148 ListenerManagement .update_status (QuerySet (Paragraph ).filter (id = paragraph_id ), TaskType .EMBEDDING ,
145149 State .SUCCESS )
146150 except Exception as e :
147- max_kb_error .error (f'向量化段落:{ paragraph_id } 出现错误{ str (e )} { traceback .format_exc ()} ' )
151+ max_kb_error .error (_ ('Vectorized paragraph: {paragraph_id} error {error} {traceback}' ).format (
152+ paragraph_id = paragraph_id , error = str (e ), traceback = traceback .format_exc ()))
148153 ListenerManagement .update_status (QuerySet (Paragraph ).filter (id = paragraph_id ), TaskType .EMBEDDING ,
149154 State .FAILURE )
150155 finally :
151- max_kb .info (f'结束 --->向量化段落: { paragraph_id } ' )
156+ max_kb .info (_ ( 'End --->Embedding paragraph: {paragraph_id}'). format ( paragraph_id = paragraph_id ) )
152157
153158 @staticmethod
154159 def embedding_by_data_list (data_list : List , embedding_model : Embeddings ):
@@ -259,7 +264,8 @@ def is_the_task_interrupted():
259264
260265 if is_the_task_interrupted ():
261266 return
262- max_kb .info (f"开始--->向量化文档:{ document_id } " )
267+ max_kb .info (_ ('Start--->Embedding document: {document_id}' ).format (document_id = document_id )
268+ )
263269 # 批量修改状态为PADDING
264270 ListenerManagement .update_status (QuerySet (Document ).filter (id = document_id ), TaskType .EMBEDDING ,
265271 State .STARTED )
@@ -274,11 +280,12 @@ def is_the_task_interrupted():
274280 document_id )),
275281 is_the_task_interrupted )
276282 except Exception as e :
277- max_kb_error .error (f'向量化文档:{ document_id } 出现错误{ str (e )} { traceback .format_exc ()} ' )
283+ max_kb_error .error (_ ('Vectorized document: {document_id} error {error} {traceback}' ).format (
284+ document_id = document_id , error = str (e ), traceback = traceback .format_exc ()))
278285 finally :
279286 ListenerManagement .post_update_document_status (document_id , TaskType .EMBEDDING )
280287 ListenerManagement .get_aggregation_document_status (document_id )()
281- max_kb .info (f"结束 --->向量化文档: { document_id } " )
288+ max_kb .info (_ ( 'End --->Embedding document: {document_id}' ). format ( document_id = document_id ) )
282289 un_lock ('embedding' + str (document_id ))
283290
284291 @staticmethod
@@ -289,17 +296,18 @@ def embedding_by_dataset(dataset_id, embedding_model: Embeddings):
289296 @param embedding_model 向量模型
290297 :return: None
291298 """
292- max_kb .info (f"开始 --->向量化数据集: { dataset_id } " )
299+ max_kb .info (_ ( 'Start --->Embedding dataset: {dataset_id}' ). format ( dataset_id = dataset_id ) )
293300 try :
294301 ListenerManagement .delete_embedding_by_dataset (dataset_id )
295302 document_list = QuerySet (Document ).filter (dataset_id = dataset_id )
296- max_kb .info (f"数据集文档: { [ d . name for d in document_list ] } " )
303+ max_kb .info (_ ( 'Start--->Embedding document: { document_list}' ). format ( document_list = document_list ) )
297304 for document in document_list :
298305 ListenerManagement .embedding_by_document (document .id , embedding_model = embedding_model )
299306 except Exception as e :
300- max_kb_error .error (f'向量化数据集:{ dataset_id } 出现错误{ str (e )} { traceback .format_exc ()} ' )
307+ max_kb_error .error (_ ('Vectorized dataset: {dataset_id} error {error} {traceback}' ).format (
308+ dataset_id = dataset_id , error = str (e ), traceback = traceback .format_exc ()))
301309 finally :
302- max_kb .info (f"结束 --->向量化数据集: { dataset_id } " )
310+ max_kb .info (_ ( 'End --->Embedding dataset: {dataset_id}' ). format ( dataset_id = dataset_id ) )
303311
304312 @staticmethod
305313 def delete_embedding_by_document (document_id ):
0 commit comments