77from django .utils import timezone
88from apscheduler .schedulers .background import BackgroundScheduler
99from django_apscheduler .jobstores import DjangoJobStore
10- from application .models import Application , Chat
11- from django .db .models import Q
10+ from application .models import Application , Chat , ChatRecord
11+ from django .db .models import Q , Max
1212from common .lock .impl .file_lock import FileLock
1313from dataset .models import File
14-
15-
14+ from django .db import connection
1615
1716scheduler = BackgroundScheduler ()
1817scheduler .add_jobstore (DjangoJobStore (), "default" )
@@ -32,23 +31,42 @@ def clean_chat_log_job():
3231
3332 query_conditions = Q ()
3433 for app_id , cutoff_date in cutoff_dates .items ():
35- query_conditions |= Q (application_id = app_id , create_time__lt = cutoff_date )
36-
34+ query_conditions |= Q (chat__application_id = app_id , create_time__lt = cutoff_date )
3735 batch_size = 500
3836 while True :
3937 with transaction .atomic ():
40- logs_to_delete = Chat .objects .filter (query_conditions ).values_list ( ' id' , flat = True )[: batch_size ]
41- count = logs_to_delete . count ()
42- logs_to_delete_str = [ str ( uuid ) for uuid in logs_to_delete ]
43- if count == 0 :
38+ chat_records = ChatRecord .objects .filter (query_conditions ).select_related ( 'chat' ). only ( ' id' , 'chat_id' ,
39+ 'create_time' )[
40+ : batch_size ]
41+ if not chat_records :
4442 break
45- deleted_count , _ = Chat .objects .filter (id__in = logs_to_delete ).delete ()
46- # 删除对应的文件
47- File .objects .filter (meta__chat_id__in = logs_to_delete_str ).delete ()
43+ chat_record_ids = [record .id for record in chat_records ]
44+ chat_ids = {record .chat_id for record in chat_records }
45+
46+ # 计算每个 chat_id 的最大 create_time
47+ max_create_times = ChatRecord .objects .filter (id__in = chat_record_ids ).values ('chat_id' ).annotate (
48+ max_create_time = Max ('create_time' ))
49+
50+ # 收集需要删除的文件
51+ files_to_delete = []
52+ for record in chat_records :
53+ max_create_time = next (
54+ (item ['max_create_time' ] for item in max_create_times if item ['chat_id' ] == record .chat_id ), None )
55+ if max_create_time :
56+ files_to_delete .extend (
57+ File .objects .filter (meta__chat_id = str (record .chat_id ), create_time__lt = max_create_time )
58+ )
59+ # 删除 ChatRecord
60+ deleted_count = ChatRecord .objects .filter (id__in = chat_record_ids ).delete ()[0 ]
61+
62+ # 删除没有关联 ChatRecord 的 Chat
63+ Chat .objects .filter (chatrecord__isnull = True , id__in = chat_ids ).delete ()
64+ File .objects .filter (loid__in = [file .loid for file in files_to_delete ]).delete ()
65+
4866 if deleted_count < batch_size :
4967 break
5068
51- logging .getLogger ("max_kb" ).info (_ ( 'end clean chat log' ) )
69+ logging .getLogger ("max_kb" ).info (f'结束清理对话记录' )
5270
5371
5472def run ():
0 commit comments