1717from application .flow .i_step_node import NodeResult
1818from application .flow .step_node .knowledge_write_node .i_knowledge_write_node import IKnowledgeWriteNode
1919from common .chunk import text_to_chunk
20- from common .utils .common import bulk_create_in_batches
20+ from common .utils .common import bulk_create_in_batches , filter_special_character
2121from knowledge .models import Document , KnowledgeType , Paragraph , File , FileSourceType , Problem , ProblemParagraphMapping , \
2222 Tag , DocumentTag
2323from knowledge .serializers .common import ProblemParagraphObject , ProblemParagraphManage
@@ -83,10 +83,11 @@ def get_paragraph_problem_model(knowledge_id: str, document_id: str, instance: D
8383 paragraph = Paragraph (
8484 id = uuid .uuid7 (),
8585 document_id = document_id ,
86- content = instance .get ("content" ),
86+ content = filter_special_character ( instance .get ("content" ) ),
8787 knowledge_id = knowledge_id ,
8888 title = instance .get ("title" ) if 'title' in instance else '' ,
89- chunks = instance .get ('chunks' ) if 'chunks' in instance else text_to_chunk (instance .get ("content" )),
89+ chunks = [filter_special_character (c ) for c in (instance .get ('chunks' ) if 'chunks' in instance else text_to_chunk (
90+ instance .get ("content" )))],
9091 )
9192
9293 problem_paragraph_object_list = [ProblemParagraphObject (
@@ -145,11 +146,11 @@ def get_document_paragraph_model(knowledge_id: str, instance: Dict):
145146 instance .get ('paragraphs' ) if 'paragraphs' in instance else []
146147 )
147148
148- def save_knowledge_tags (knowledge_id : str , tags : List [Dict [str ,Any ]]):
149149
150+ def save_knowledge_tags (knowledge_id : str , tags : List [Dict [str , Any ]]):
150151 existed_tags_dict = {
151152 (key , value ): str (tag_id )
152- for key ,value ,tag_id in QuerySet (Tag ).filter (knowledge_id = knowledge_id ).values_list ("key" , "value" , "id" )
153+ for key , value , tag_id in QuerySet (Tag ).filter (knowledge_id = knowledge_id ).values_list ("key" , "value" , "id" )
153154 }
154155
155156 tag_model_list = []
@@ -158,23 +159,24 @@ def save_knowledge_tags(knowledge_id: str, tags: List[Dict[str,Any]]):
158159 key = tag .get ("key" )
159160 value = tag .get ("value" )
160161
161- if (key ,value ) not in existed_tags_dict :
162+ if (key , value ) not in existed_tags_dict :
162163 tag_model = Tag (
163164 id = uuid .uuid7 (),
164165 knowledge_id = knowledge_id ,
165166 key = key ,
166167 value = value
167168 )
168169 tag_model_list .append (tag_model )
169- new_tag_dict [(key ,value )] = str (tag_model .id )
170+ new_tag_dict [(key , value )] = str (tag_model .id )
170171
171172 if tag_model_list :
172173 Tag .objects .bulk_create (tag_model_list )
173174
174- all_tag_dict = {** existed_tags_dict ,** new_tag_dict }
175+ all_tag_dict = {** existed_tags_dict , ** new_tag_dict }
175176
176177 return all_tag_dict , new_tag_dict
177178
179+
178180def batch_add_document_tag (document_tag_map : Dict [str , List [str ]]):
179181 """
180182 批量添加文档-标签关联
@@ -199,12 +201,13 @@ def batch_add_document_tag(document_tag_map: Dict[str, List[str]]):
199201 )
200202 for doc_id , tag_ids in document_tag_map .items ()
201203 for tag_id in tag_ids
202- if (doc_id ,tag_id ) not in existed_relations
204+ if (doc_id , tag_id ) not in existed_relations
203205 ]
204206
205207 if new_relations :
206208 QuerySet (DocumentTag ).bulk_create (new_relations )
207209
210+
208211class BaseKnowledgeWriteNode (IKnowledgeWriteNode ):
209212
210213 def save_context (self , details , workflow_manage ):
@@ -241,7 +244,7 @@ def save(self, document_list):
241244 for tag in single_document_tag_list :
242245 tag_key = (tag ['key' ], tag ['value' ])
243246 if tag_key not in knowledge_tag_dict :
244- knowledge_tag_dict [tag_key ]= tag
247+ knowledge_tag_dict [tag_key ] = tag
245248
246249 if single_document_tag_list :
247250 document_tags_map [str (document_instance .id )] = single_document_tag_list
@@ -259,9 +262,9 @@ def save(self, document_list):
259262 # 为每个文档添加其对应的标签
260263 for doc_id , doc_tags in document_tags_map .items ():
261264 doc_tag_ids = [
262- all_tag_dict [(tag .get ("key" ),tag .get ("value" ))]
265+ all_tag_dict [(tag .get ("key" ), tag .get ("value" ))]
263266 for tag in doc_tags
264- if (tag .get ("key" ),tag .get ("value" )) in all_tag_dict
267+ if (tag .get ("key" ), tag .get ("value" )) in all_tag_dict
265268 ]
266269 if doc_tag_ids :
267270 document_tag_id_map [doc_id ] = doc_tag_ids
0 commit comments