Skip to content

Commit 8acd069

Browse files
committed
fix: 修复批量删除文档没有删除问题的缺陷
--bug=1048687 --user=王孝刚 【知识库】删除文档不会删除文档中分段关联的问题 https://www.tapd.cn/57709429/s/1624544
1 parent 6508404 commit 8acd069

File tree

2 files changed

+32
-17
lines changed

2 files changed

+32
-17
lines changed

apps/dataset/serializers/document_serializers.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from celery_once import AlreadyQueued
1919
from django.core import validators
2020
from django.db import transaction
21-
from django.db.models import QuerySet
21+
from django.db.models import QuerySet, Count
2222
from django.db.models.functions import Substr, Reverse
2323
from django.http import HttpResponse
2424
from drf_yasg import openapi
@@ -1091,11 +1091,17 @@ def file_to_paragraph(file, pattern_list: List, with_filter: bool, limit: int):
10911091

10921092

10931093
def delete_problems_and_mappings(document_ids):
1094-
problem_ids = ProblemParagraphMapping.objects.filter(document_id__in=document_ids).values_list('problem_id',
1095-
flat=True)
1094+
# 获取所有需要删除的问题ID
1095+
problem_ids = list(
1096+
ProblemParagraphMapping.objects.filter(document_id__in=document_ids).values_list('problem_id', flat=True))
1097+
10961098
if problem_ids:
1097-
problem_counts = ProblemParagraphMapping.objects.filter(problem_id__in=problem_ids).values(
1098-
'problem_id').annotate(count=models.Count('id'))
1099-
problem_ids_to_delete = [item['problem_id'] for item in problem_counts if item['count'] == 1]
1099+
ProblemParagraphMapping.objects.filter(document_id__in=document_ids).delete()
1100+
remaining_problem_counts = ProblemParagraphMapping.objects.filter(problem_id__in=problem_ids).values(
1101+
'problem_id').annotate(count=Count('problem_id'))
1102+
1103+
problem_ids_to_delete = [pid for pid in problem_ids if
1104+
not any(pc['problem_id'] == pid for pc in remaining_problem_counts)]
11001105
Problem.objects.filter(id__in=problem_ids_to_delete).delete()
1101-
ProblemParagraphMapping.objects.filter(document_id__in=document_ids).delete()
1106+
else:
1107+
ProblemParagraphMapping.objects.filter(document_id__in=document_ids).delete()

apps/dataset/serializers/paragraph_serializers.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from celery_once import AlreadyQueued
1313
from django.db import transaction
14-
from django.db.models import QuerySet
14+
from django.db.models import QuerySet, Count
1515
from drf_yasg import openapi
1616
from rest_framework import serializers
1717

@@ -291,7 +291,7 @@ def batch_delete(self, instance: Dict, with_valid=True):
291291
self.is_valid(raise_exception=True)
292292
paragraph_id_list = instance.get("id_list")
293293
QuerySet(Paragraph).filter(id__in=paragraph_id_list).delete()
294-
QuerySet(ProblemParagraphMapping).filter(paragraph_id__in=paragraph_id_list).delete()
294+
delete_problems_and_mappings(paragraph_id_list)
295295
update_document_char_length(self.data.get('document_id'))
296296
# 删除向量库
297297
delete_embedding_by_paragraph_ids(paragraph_id_list)
@@ -541,14 +541,7 @@ def delete(self, with_valid=False):
541541
self.is_valid(raise_exception=True)
542542
paragraph_id = self.data.get('paragraph_id')
543543
Paragraph.objects.filter(id=paragraph_id).delete()
544-
545-
problem_id = ProblemParagraphMapping.objects.filter(paragraph_id=paragraph_id).values_list('problem_id',
546-
flat=True).first()
547-
548-
if problem_id is not None:
549-
if ProblemParagraphMapping.objects.filter(problem_id=problem_id).count() == 1:
550-
Problem.objects.filter(id=problem_id).delete()
551-
ProblemParagraphMapping.objects.filter(paragraph_id=paragraph_id).delete()
544+
delete_problems_and_mappings([paragraph_id])
552545

553546
update_document_char_length(self.data.get('document_id'))
554547
delete_embedding_by_paragraph(paragraph_id)
@@ -755,3 +748,19 @@ def batch_generate_related(self, instance: Dict, with_valid=True):
755748
prompt)
756749
except AlreadyQueued as e:
757750
raise AppApiException(500, "任务正在执行中,请勿重复下发")
751+
752+
753+
def delete_problems_and_mappings(paragraph_ids):
754+
problem_ids = list(
755+
ProblemParagraphMapping.objects.filter(paragraph_id__in=paragraph_ids).values_list('problem_id', flat=True))
756+
757+
if problem_ids:
758+
ProblemParagraphMapping.objects.filter(paragraph_id__in=paragraph_ids).delete()
759+
remaining_problem_counts = ProblemParagraphMapping.objects.filter(problem_id__in=problem_ids).values(
760+
'problem_id').annotate(count=Count('problem_id'))
761+
762+
problem_ids_to_delete = [pid for pid in problem_ids if
763+
not any(pc['problem_id'] == pid for pc in remaining_problem_counts)]
764+
Problem.objects.filter(id__in=problem_ids_to_delete).delete()
765+
else:
766+
ProblemParagraphMapping.objects.filter(paragraph_id__in=paragraph_ids).delete()

0 commit comments

Comments
 (0)