2525from dataset .models import Paragraph , Status , Document , ProblemParagraphMapping , TaskType , State
2626from embedding .models import SourceType , SearchMode
2727from smartdoc .conf import PROJECT_DIR
28+ from django .utils .translation import gettext_lazy as _
2829
2930max_kb_error = logging .getLogger (__file__ )
3031max_kb = logging .getLogger (__file__ )
@@ -86,11 +87,12 @@ def embedding_by_paragraph_list(paragraph_id_list, embedding_model: Embeddings):
8687 ListenerManagement .embedding_by_paragraph_data_list (data_list , paragraph_id_list = paragraph_id_list ,
8788 embedding_model = embedding_model )
8889 except Exception as e :
89- max_kb_error .error (f'查询向量数据:{ paragraph_id_list } 出现错误{ str (e )} { traceback .format_exc ()} ' )
90+ max_kb_error .error (_ ('Query vector data: {paragraph_id_list} error {error} {traceback}' ).format (
91+ paragraph_id_list = paragraph_id_list , error = str (e ), traceback = traceback .format_exc ()))
9092
9193 @staticmethod
9294 def embedding_by_paragraph_data_list (data_list , paragraph_id_list , embedding_model : Embeddings ):
93- max_kb .info (f'开始 --->向量化段落: { paragraph_id_list } ' )
95+ max_kb .info (_ ( 'Start --->Embedding paragraph: {paragraph_id_list}'). format ( paragraph_id_list = paragraph_id_list ) )
9496 status = Status .success
9597 try :
9698 # 删除段落
@@ -102,11 +104,13 @@ def is_save_function():
102104 # 批量向量化
103105 VectorStore .get_embedding_vector ().batch_save (data_list , embedding_model , is_save_function )
104106 except Exception as e :
105- max_kb_error .error (f'向量化段落:{ paragraph_id_list } 出现错误{ str (e )} { traceback .format_exc ()} ' )
107+ max_kb_error .error (_ ('Vectorized paragraph: {paragraph_id_list} error {error} {traceback}' ).format (
108+ paragraph_id_list = paragraph_id_list , error = str (e ), traceback = traceback .format_exc ()))
106109 status = Status .error
107110 finally :
108111 QuerySet (Paragraph ).filter (id__in = paragraph_id_list ).update (** {'status' : status })
109- max_kb .info (f'结束--->向量化段落:{ paragraph_id_list } ' )
112+ max_kb .info (
113+ _ ('End--->Embedding paragraph: {paragraph_id_list}' ).format (paragraph_id_list = paragraph_id_list ))
110114
111115 @staticmethod
112116 def embedding_by_paragraph (paragraph_id , embedding_model : Embeddings ):
@@ -115,7 +119,7 @@ def embedding_by_paragraph(paragraph_id, embedding_model: Embeddings):
115119 @param paragraph_id: 段落id
116120 @param embedding_model: 向量模型
117121 """
118- max_kb .info (f"开始 --->向量化段落: { paragraph_id } " )
122+ max_kb .info (_ ( 'Start --->Embedding paragraph: {paragraph_id}' ). format ( paragraph_id = paragraph_id ) )
119123 # 更新到开始状态
120124 ListenerManagement .update_status (QuerySet (Paragraph ).filter (id = paragraph_id ), TaskType .EMBEDDING , State .STARTED )
121125 try :
@@ -140,11 +144,12 @@ def is_the_task_interrupted():
140144 ListenerManagement .update_status (QuerySet (Paragraph ).filter (id = paragraph_id ), TaskType .EMBEDDING ,
141145 State .SUCCESS )
142146 except Exception as e :
143- max_kb_error .error (f'向量化段落:{ paragraph_id } 出现错误{ str (e )} { traceback .format_exc ()} ' )
147+ max_kb_error .error (_ ('Vectorized paragraph: {paragraph_id} error {error} {traceback}' ).format (
148+ paragraph_id = paragraph_id , error = str (e ), traceback = traceback .format_exc ()))
144149 ListenerManagement .update_status (QuerySet (Paragraph ).filter (id = paragraph_id ), TaskType .EMBEDDING ,
145150 State .FAILURE )
146151 finally :
147- max_kb .info (f'结束 --->向量化段落: { paragraph_id } ' )
152+ max_kb .info (_ ( 'End --->Embedding paragraph: {paragraph_id}'). format ( paragraph_id = paragraph_id ) )
148153
149154 @staticmethod
150155 def embedding_by_data_list (data_list : List , embedding_model : Embeddings ):
@@ -258,7 +263,8 @@ def is_the_task_interrupted():
258263
259264 if is_the_task_interrupted ():
260265 return
261- max_kb .info (f"开始--->向量化文档:{ document_id } " )
266+ max_kb .info (_ ('Start--->Embedding document: {document_id}' ).format (document_id = document_id )
267+ )
262268 # 批量修改状态为PADDING
263269 ListenerManagement .update_status (QuerySet (Document ).filter (id = document_id ), TaskType .EMBEDDING ,
264270 State .STARTED )
@@ -279,11 +285,12 @@ def is_the_task_interrupted():
279285 document_id )),
280286 is_the_task_interrupted )
281287 except Exception as e :
282- max_kb_error .error (f'向量化文档:{ document_id } 出现错误{ str (e )} { traceback .format_exc ()} ' )
288+ max_kb_error .error (_ ('Vectorized document: {document_id} error {error} {traceback}' ).format (
289+ document_id = document_id , error = str (e ), traceback = traceback .format_exc ()))
283290 finally :
284291 ListenerManagement .post_update_document_status (document_id , TaskType .EMBEDDING )
285292 ListenerManagement .get_aggregation_document_status (document_id )()
286- max_kb .info (f"结束 --->向量化文档: { document_id } " )
293+ max_kb .info (_ ( 'End --->Embedding document: {document_id}' ). format ( document_id = document_id ) )
287294 un_lock ('embedding' + str (document_id ))
288295
289296 @staticmethod
@@ -294,17 +301,18 @@ def embedding_by_dataset(dataset_id, embedding_model: Embeddings):
294301 @param embedding_model 向量模型
295302 :return: None
296303 """
297- max_kb .info (f"开始 --->向量化数据集: { dataset_id } " )
304+ max_kb .info (_ ( 'Start --->Embedding dataset: {dataset_id}' ). format ( dataset_id = dataset_id ) )
298305 try :
299306 ListenerManagement .delete_embedding_by_dataset (dataset_id )
300307 document_list = QuerySet (Document ).filter (dataset_id = dataset_id )
301- max_kb .info (f"数据集文档: { [ d . name for d in document_list ] } " )
308+ max_kb .info (_ ( 'Start--->Embedding document: { document_list}' ). format ( document_list = document_list ) )
302309 for document in document_list :
303310 ListenerManagement .embedding_by_document (document .id , embedding_model = embedding_model )
304311 except Exception as e :
305- max_kb_error .error (f'向量化数据集:{ dataset_id } 出现错误{ str (e )} { traceback .format_exc ()} ' )
312+ max_kb_error .error (_ ('Vectorized dataset: {dataset_id} error {error} {traceback}' ).format (
313+ dataset_id = dataset_id , error = str (e ), traceback = traceback .format_exc ()))
306314 finally :
307- max_kb .info (f"结束 --->向量化数据集: { dataset_id } " )
315+ max_kb .info (_ ( 'End --->Embedding dataset: {dataset_id}' ). format ( dataset_id = dataset_id ) )
308316
309317 @staticmethod
310318 def delete_embedding_by_document (document_id ):
0 commit comments