|
56 | 56 | delete_embedding_by_document, update_embedding_dataset_id, delete_embedding_by_paragraph_ids, \ |
57 | 57 | embedding_by_document_list |
58 | 58 | from smartdoc.conf import PROJECT_DIR |
| 59 | +from django.db import models |
59 | 60 |
|
60 | 61 | parse_qa_handle_list = [XlsParseQAHandle(), CsvParseQAHandle(), XlsxParseQAHandle()] |
61 | 62 | parse_table_handle_list = [CsvSplitHandle(), XlsSplitHandle(), XlsxSplitHandle()] |
@@ -442,6 +443,7 @@ def sync(self, with_valid=True, with_embedding=True): |
442 | 443 | QuerySet(model=Paragraph).filter(document_id=document_id).delete() |
443 | 444 | # 删除问题 |
444 | 445 | QuerySet(model=ProblemParagraphMapping).filter(document_id=document_id).delete() |
| 446 | + delete_problems_and_mappings([document_id]) |
445 | 447 | # 删除向量库 |
446 | 448 | delete_embedding_by_document(document_id) |
447 | 449 | paragraphs = get_split_model('web.md').parse(result.content) |
@@ -660,7 +662,7 @@ def delete(self): |
660 | 662 | # 删除段落 |
661 | 663 | QuerySet(model=Paragraph).filter(document_id=document_id).delete() |
662 | 664 | # 删除问题 |
663 | | - QuerySet(model=ProblemParagraphMapping).filter(document_id=document_id).delete() |
| 665 | + delete_problems_and_mappings([document_id]) |
664 | 666 | # 删除向量库 |
665 | 667 | delete_embedding_by_document(document_id) |
666 | 668 | return True |
@@ -987,7 +989,7 @@ def batch_delete(self, instance: Dict, with_valid=True): |
987 | 989 | document_id_list = instance.get("id_list") |
988 | 990 | QuerySet(Document).filter(id__in=document_id_list).delete() |
989 | 991 | QuerySet(Paragraph).filter(document_id__in=document_id_list).delete() |
990 | | - QuerySet(ProblemParagraphMapping).filter(document_id__in=document_id_list).delete() |
| 992 | + delete_problems_and_mappings(document_id_list) |
991 | 993 | # 删除向量库 |
992 | 994 | delete_embedding_by_document_list(document_id_list) |
993 | 995 | return True |
@@ -1086,3 +1088,14 @@ def file_to_paragraph(file, pattern_list: List, with_filter: bool, limit: int): |
1086 | 1088 | if split_handle.support(file, get_buffer): |
1087 | 1089 | return split_handle.handle(file, pattern_list, with_filter, limit, get_buffer, save_image) |
1088 | 1090 | return default_split_handle.handle(file, pattern_list, with_filter, limit, get_buffer, save_image) |
| 1091 | + |
| 1092 | + |
| 1093 | +def delete_problems_and_mappings(document_ids): |
| 1094 | + problem_ids = ProblemParagraphMapping.objects.filter(document_id__in=document_ids).values_list('problem_id', |
| 1095 | + flat=True) |
| 1096 | + if problem_ids: |
| 1097 | + problem_counts = ProblemParagraphMapping.objects.filter(problem_id__in=problem_ids).values( |
| 1098 | + 'problem_id').annotate(count=models.Count('id')) |
| 1099 | + problem_ids_to_delete = [item['problem_id'] for item in problem_counts if item['count'] == 1] |
| 1100 | + Problem.objects.filter(id__in=problem_ids_to_delete).delete() |
| 1101 | + ProblemParagraphMapping.objects.filter(document_id__in=document_ids).delete() |
0 commit comments