Skip to content

Commit debee49

Browse files
committed
fix: 修复删除文档不会删除文档中分段关联的问题的缺陷
--bug=1048687 --user=王孝刚 【知识库】删除文档不会删除文档中分段关联的问题 https://www.tapd.cn/57709429/s/1623302
1 parent d4f9ac9 commit debee49

File tree

1 file changed

+15
-2
lines changed

1 file changed

+15
-2
lines changed

apps/dataset/serializers/document_serializers.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
delete_embedding_by_document, update_embedding_dataset_id, delete_embedding_by_paragraph_ids, \
5757
embedding_by_document_list
5858
from smartdoc.conf import PROJECT_DIR
59+
from django.db import models
5960

6061
parse_qa_handle_list = [XlsParseQAHandle(), CsvParseQAHandle(), XlsxParseQAHandle()]
6162
parse_table_handle_list = [CsvSplitHandle(), XlsSplitHandle(), XlsxSplitHandle()]
@@ -442,6 +443,7 @@ def sync(self, with_valid=True, with_embedding=True):
442443
QuerySet(model=Paragraph).filter(document_id=document_id).delete()
443444
# 删除问题
444445
QuerySet(model=ProblemParagraphMapping).filter(document_id=document_id).delete()
446+
delete_problems_and_mappings([document_id])
445447
# 删除向量库
446448
delete_embedding_by_document(document_id)
447449
paragraphs = get_split_model('web.md').parse(result.content)
@@ -660,7 +662,7 @@ def delete(self):
660662
# 删除段落
661663
QuerySet(model=Paragraph).filter(document_id=document_id).delete()
662664
# 删除问题
663-
QuerySet(model=ProblemParagraphMapping).filter(document_id=document_id).delete()
665+
delete_problems_and_mappings([document_id])
664666
# 删除向量库
665667
delete_embedding_by_document(document_id)
666668
return True
@@ -987,7 +989,7 @@ def batch_delete(self, instance: Dict, with_valid=True):
987989
document_id_list = instance.get("id_list")
988990
QuerySet(Document).filter(id__in=document_id_list).delete()
989991
QuerySet(Paragraph).filter(document_id__in=document_id_list).delete()
990-
QuerySet(ProblemParagraphMapping).filter(document_id__in=document_id_list).delete()
992+
delete_problems_and_mappings(document_id_list)
991993
# 删除向量库
992994
delete_embedding_by_document_list(document_id_list)
993995
return True
@@ -1086,3 +1088,14 @@ def file_to_paragraph(file, pattern_list: List, with_filter: bool, limit: int):
10861088
if split_handle.support(file, get_buffer):
10871089
return split_handle.handle(file, pattern_list, with_filter, limit, get_buffer, save_image)
10881090
return default_split_handle.handle(file, pattern_list, with_filter, limit, get_buffer, save_image)
1091+
1092+
1093+
def delete_problems_and_mappings(document_ids):
1094+
problem_ids = ProblemParagraphMapping.objects.filter(document_id__in=document_ids).values_list('problem_id',
1095+
flat=True)
1096+
if problem_ids:
1097+
problem_counts = ProblemParagraphMapping.objects.filter(problem_id__in=problem_ids).values(
1098+
'problem_id').annotate(count=models.Count('id'))
1099+
problem_ids_to_delete = [item['problem_id'] for item in problem_counts if item['count'] == 1]
1100+
Problem.objects.filter(id__in=problem_ids_to_delete).delete()
1101+
ProblemParagraphMapping.objects.filter(document_id__in=document_ids).delete()

0 commit comments

Comments
 (0)