Skip to content

Commit e919666

Browse files
committed
fix: Filter special character
1 parent 41c0503 commit e919666

File tree

2 files changed

+25
-12
lines changed

2 files changed

+25
-12
lines changed

apps/application/flow/step_node/knowledge_write_node/impl/base_knowledge_write_node.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from application.flow.i_step_node import NodeResult
1818
from application.flow.step_node.knowledge_write_node.i_knowledge_write_node import IKnowledgeWriteNode
1919
from common.chunk import text_to_chunk
20-
from common.utils.common import bulk_create_in_batches
20+
from common.utils.common import bulk_create_in_batches, filter_special_character
2121
from knowledge.models import Document, KnowledgeType, Paragraph, File, FileSourceType, Problem, ProblemParagraphMapping, \
2222
Tag, DocumentTag
2323
from knowledge.serializers.common import ProblemParagraphObject, ProblemParagraphManage
@@ -83,10 +83,11 @@ def get_paragraph_problem_model(knowledge_id: str, document_id: str, instance: D
8383
paragraph = Paragraph(
8484
id=uuid.uuid7(),
8585
document_id=document_id,
86-
content=instance.get("content"),
86+
content=filter_special_character(instance.get("content")),
8787
knowledge_id=knowledge_id,
8888
title=instance.get("title") if 'title' in instance else '',
89-
chunks=instance.get('chunks') if 'chunks' in instance else text_to_chunk(instance.get("content")),
89+
chunks=[filter_special_character(c) for c in (instance.get('chunks') if 'chunks' in instance else text_to_chunk(
90+
instance.get("content")))],
9091
)
9192

9293
problem_paragraph_object_list = [ProblemParagraphObject(
@@ -145,11 +146,11 @@ def get_document_paragraph_model(knowledge_id: str, instance: Dict):
145146
instance.get('paragraphs') if 'paragraphs' in instance else []
146147
)
147148

148-
def save_knowledge_tags(knowledge_id: str, tags: List[Dict[str,Any]]):
149149

150+
def save_knowledge_tags(knowledge_id: str, tags: List[Dict[str, Any]]):
150151
existed_tags_dict = {
151152
(key, value): str(tag_id)
152-
for key,value,tag_id in QuerySet(Tag).filter(knowledge_id=knowledge_id).values_list("key", "value", "id")
153+
for key, value, tag_id in QuerySet(Tag).filter(knowledge_id=knowledge_id).values_list("key", "value", "id")
153154
}
154155

155156
tag_model_list = []
@@ -158,23 +159,24 @@ def save_knowledge_tags(knowledge_id: str, tags: List[Dict[str,Any]]):
158159
key = tag.get("key")
159160
value = tag.get("value")
160161

161-
if (key,value) not in existed_tags_dict:
162+
if (key, value) not in existed_tags_dict:
162163
tag_model = Tag(
163164
id=uuid.uuid7(),
164165
knowledge_id=knowledge_id,
165166
key=key,
166167
value=value
167168
)
168169
tag_model_list.append(tag_model)
169-
new_tag_dict[(key,value)] = str(tag_model.id)
170+
new_tag_dict[(key, value)] = str(tag_model.id)
170171

171172
if tag_model_list:
172173
Tag.objects.bulk_create(tag_model_list)
173174

174-
all_tag_dict={**existed_tags_dict,**new_tag_dict}
175+
all_tag_dict = {**existed_tags_dict, **new_tag_dict}
175176

176177
return all_tag_dict, new_tag_dict
177178

179+
178180
def batch_add_document_tag(document_tag_map: Dict[str, List[str]]):
179181
"""
180182
批量添加文档-标签关联
@@ -199,12 +201,13 @@ def batch_add_document_tag(document_tag_map: Dict[str, List[str]]):
199201
)
200202
for doc_id, tag_ids in document_tag_map.items()
201203
for tag_id in tag_ids
202-
if (doc_id,tag_id) not in existed_relations
204+
if (doc_id, tag_id) not in existed_relations
203205
]
204206

205207
if new_relations:
206208
QuerySet(DocumentTag).bulk_create(new_relations)
207209

210+
208211
class BaseKnowledgeWriteNode(IKnowledgeWriteNode):
209212

210213
def save_context(self, details, workflow_manage):
@@ -241,7 +244,7 @@ def save(self, document_list):
241244
for tag in single_document_tag_list:
242245
tag_key = (tag['key'], tag['value'])
243246
if tag_key not in knowledge_tag_dict:
244-
knowledge_tag_dict[tag_key]= tag
247+
knowledge_tag_dict[tag_key] = tag
245248

246249
if single_document_tag_list:
247250
document_tags_map[str(document_instance.id)] = single_document_tag_list
@@ -259,9 +262,9 @@ def save(self, document_list):
259262
# 为每个文档添加其对应的标签
260263
for doc_id, doc_tags in document_tags_map.items():
261264
doc_tag_ids = [
262-
all_tag_dict[(tag.get("key"),tag.get("value"))]
265+
all_tag_dict[(tag.get("key"), tag.get("value"))]
263266
for tag in doc_tags
264-
if (tag.get("key"),tag.get("value")) in all_tag_dict
267+
if (tag.get("key"), tag.get("value")) in all_tag_dict
265268
]
266269
if doc_tag_ids:
267270
document_tag_id_map[doc_id] = doc_tag_ids

apps/common/utils/common.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,3 +340,13 @@ def generate_uuid(tag: str):
340340

341341
def filter_workspace(query_list):
342342
return [q for q in query_list if q.name != "workspace_id"]
343+
344+
345+
def filter_special_character(_str):
346+
"""
347+
过滤特殊字符
348+
"""
349+
s_list = ["\\u0000"]
350+
for t in s_list:
351+
_str = _str.replace(t, '')
352+
return _str

0 commit comments

Comments
 (0)