diff --git a/apps/application/flow/step_node/knowledge_write_node/impl/base_knowledge_write_node.py b/apps/application/flow/step_node/knowledge_write_node/impl/base_knowledge_write_node.py index 41c9d5c07fb..601f10ceb88 100644 --- a/apps/application/flow/step_node/knowledge_write_node/impl/base_knowledge_write_node.py +++ b/apps/application/flow/step_node/knowledge_write_node/impl/base_knowledge_write_node.py @@ -17,7 +17,7 @@ from application.flow.i_step_node import NodeResult from application.flow.step_node.knowledge_write_node.i_knowledge_write_node import IKnowledgeWriteNode from common.chunk import text_to_chunk -from common.utils.common import bulk_create_in_batches +from common.utils.common import bulk_create_in_batches, filter_special_character from knowledge.models import Document, KnowledgeType, Paragraph, File, FileSourceType, Problem, ProblemParagraphMapping, \ Tag, DocumentTag from knowledge.serializers.common import ProblemParagraphObject, ProblemParagraphManage @@ -83,10 +83,11 @@ def get_paragraph_problem_model(knowledge_id: str, document_id: str, instance: D paragraph = Paragraph( id=uuid.uuid7(), document_id=document_id, - content=instance.get("content"), + content=filter_special_character(instance.get("content")), knowledge_id=knowledge_id, title=instance.get("title") if 'title' in instance else '', - chunks=instance.get('chunks') if 'chunks' in instance else text_to_chunk(instance.get("content")), + chunks=[filter_special_character(c) for c in (instance.get('chunks') if 'chunks' in instance else text_to_chunk( + instance.get("content")))], ) problem_paragraph_object_list = [ProblemParagraphObject( @@ -145,11 +146,11 @@ def get_document_paragraph_model(knowledge_id: str, instance: Dict): instance.get('paragraphs') if 'paragraphs' in instance else [] ) -def save_knowledge_tags(knowledge_id: str, tags: List[Dict[str,Any]]): +def save_knowledge_tags(knowledge_id: str, tags: List[Dict[str, Any]]): existed_tags_dict = { (key, value): str(tag_id) - for key,value,tag_id in QuerySet(Tag).filter(knowledge_id=knowledge_id).values_list("key", "value", "id") + for key, value, tag_id in QuerySet(Tag).filter(knowledge_id=knowledge_id).values_list("key", "value", "id") } tag_model_list = [] @@ -158,7 +159,7 @@ def save_knowledge_tags(knowledge_id: str, tags: List[Dict[str,Any]]): key = tag.get("key") value = tag.get("value") - if (key,value) not in existed_tags_dict: + if (key, value) not in existed_tags_dict: tag_model = Tag( id=uuid.uuid7(), knowledge_id=knowledge_id, @@ -166,15 +167,16 @@ def save_knowledge_tags(knowledge_id: str, tags: List[Dict[str,Any]]): value=value ) tag_model_list.append(tag_model) - new_tag_dict[(key,value)] = str(tag_model.id) + new_tag_dict[(key, value)] = str(tag_model.id) if tag_model_list: Tag.objects.bulk_create(tag_model_list) - all_tag_dict={**existed_tags_dict,**new_tag_dict} + all_tag_dict = {**existed_tags_dict, **new_tag_dict} return all_tag_dict, new_tag_dict + def batch_add_document_tag(document_tag_map: Dict[str, List[str]]): """ 批量添加文档-标签关联 @@ -199,12 +201,13 @@ def batch_add_document_tag(document_tag_map: Dict[str, List[str]]): ) for doc_id, tag_ids in document_tag_map.items() for tag_id in tag_ids - if (doc_id,tag_id) not in existed_relations + if (doc_id, tag_id) not in existed_relations ] if new_relations: QuerySet(DocumentTag).bulk_create(new_relations) + class BaseKnowledgeWriteNode(IKnowledgeWriteNode): def save_context(self, details, workflow_manage): @@ -241,7 +244,7 @@ def save(self, document_list): for tag in single_document_tag_list: tag_key = (tag['key'], tag['value']) if tag_key not in knowledge_tag_dict: - knowledge_tag_dict[tag_key]= tag + knowledge_tag_dict[tag_key] = tag if single_document_tag_list: document_tags_map[str(document_instance.id)] = single_document_tag_list @@ -259,9 +262,9 @@ def save(self, document_list): # 为每个文档添加其对应的标签 for doc_id, doc_tags in document_tags_map.items(): doc_tag_ids = [ - all_tag_dict[(tag.get("key"),tag.get("value"))] + all_tag_dict[(tag.get("key"), tag.get("value"))] for tag in doc_tags - if (tag.get("key"),tag.get("value")) in all_tag_dict + if (tag.get("key"), tag.get("value")) in all_tag_dict ] if doc_tag_ids: document_tag_id_map[doc_id] = doc_tag_ids diff --git a/apps/common/utils/common.py b/apps/common/utils/common.py index d9418ec195e..6d9c752331b 100644 --- a/apps/common/utils/common.py +++ b/apps/common/utils/common.py @@ -340,3 +340,13 @@ def generate_uuid(tag: str): def filter_workspace(query_list): return [q for q in query_list if q.name != "workspace_id"] + + +def filter_special_character(_str): + """ + 过滤特殊字符 + """ + s_list = ["\\u0000"] + for t in s_list: + _str = _str.replace(t, '') + return _str