44import datetime
55
66from django .db import transaction
7- from django .db .models .fields .json import KeyTextTransform
87from django .utils import timezone
98from apscheduler .schedulers .background import BackgroundScheduler
109from django_apscheduler .jobstores import DjangoJobStore
11- from application .models import Application , Chat
12- from django .db .models import Q
10+ from application .models import Application , Chat , ChatRecord
11+ from django .db .models import Q , Max
1312from common .lock .impl .file_lock import FileLock
1413from dataset .models import File
15- from django .db .models .functions import Cast
16- from django .db import models
14+ from django .db import connection
1715
1816scheduler = BackgroundScheduler ()
1917scheduler .add_jobstore (DjangoJobStore (), "default" )
@@ -32,19 +30,38 @@ def clean_chat_log_job():
3230
3331 query_conditions = Q ()
3432 for app_id , cutoff_date in cutoff_dates .items ():
35- query_conditions |= Q (application_id = app_id , create_time__lt = cutoff_date )
36-
33+ query_conditions |= Q (chat__application_id = app_id , create_time__lt = cutoff_date )
3734 batch_size = 500
3835 while True :
3936 with transaction .atomic ():
40- logs_to_delete = Chat .objects .filter (query_conditions ).values_list ( ' id' , flat = True )[: batch_size ]
41- count = logs_to_delete . count ()
42- logs_to_delete_str = [ str ( uuid ) for uuid in logs_to_delete ]
43- if count == 0 :
37+ chat_records = ChatRecord .objects .filter (query_conditions ).select_related ( 'chat' ). only ( ' id' , 'chat_id' ,
38+ 'create_time' )[
39+ : batch_size ]
40+ if not chat_records :
4441 break
45- deleted_count , _ = Chat .objects .filter (id__in = logs_to_delete ).delete ()
46- # 删除对应的文件
47- File .objects .filter (meta__chat_id__in = logs_to_delete_str ).delete ()
42+ chat_record_ids = [record .id for record in chat_records ]
43+ chat_ids = {record .chat_id for record in chat_records }
44+
45+ # 计算每个 chat_id 的最大 create_time
46+ max_create_times = ChatRecord .objects .filter (id__in = chat_record_ids ).values ('chat_id' ).annotate (
47+ max_create_time = Max ('create_time' ))
48+
49+ # 收集需要删除的文件
50+ files_to_delete = []
51+ for record in chat_records :
52+ max_create_time = next (
53+ (item ['max_create_time' ] for item in max_create_times if item ['chat_id' ] == record .chat_id ), None )
54+ if max_create_time :
55+ files_to_delete .extend (
56+ File .objects .filter (meta__chat_id = str (record .chat_id ), create_time__lt = max_create_time )
57+ )
58+ # 删除 ChatRecord
59+ deleted_count = ChatRecord .objects .filter (id__in = chat_record_ids ).delete ()[0 ]
60+
61+ # 删除没有关联 ChatRecord 的 Chat
62+ Chat .objects .filter (chatrecord__isnull = True , id__in = chat_ids ).delete ()
63+ File .objects .filter (loid__in = [file .loid for file in files_to_delete ]).delete ()
64+
4865 if deleted_count < batch_size :
4966 break
5067
0 commit comments