Skip to content

Commit aba37e9

Browse files
committed
Add celery task to delete old anonymous exports
1 parent 3288199 commit aba37e9

File tree

3 files changed

+188
-1
lines changed

3 files changed

+188
-1
lines changed

kobo/settings/base.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,10 @@
475475
'Enable automatic deletion of attachments for users who have exceeded '
476476
'their storage limits.'
477477
),
478+
'ANON_EXPORTS_CLEANUP_AGE': (
479+
30,
480+
'Number of minutes after which anonymous export tasks are cleaned up.'
481+
),
478482
'LIMIT_ATTACHMENT_REMOVAL_GRACE_PERIOD': (
479483
90,
480484
'Number of days to keep attachments after the user has exceeded their '
@@ -729,6 +733,7 @@
729733
'MASS_EMAIL_ENQUEUED_RECORD_EXPIRY',
730734
'MASS_EMAIL_TEST_EMAILS',
731735
'USAGE_LIMIT_ENFORCEMENT',
736+
'ANON_EXPORTS_CLEANUP_AGE',
732737
),
733738
'Rest Services': (
734739
'ALLOW_UNSECURED_HOOK_ENDPOINTS',
@@ -1445,6 +1450,12 @@ def dj_stripe_request_callback_method():
14451450
'options': {'queue': 'kpi_low_priority_queue'}
14461451
},
14471452
# Schedule every 15 minutes
1453+
'cleanup-anonymous-exports': {
1454+
'task': 'kpi.tasks.cleanup_anonymous_exports',
1455+
'schedule': crontab(minute='*/1'),
1456+
'options': {'queue': 'kpi_low_priority_queue'}
1457+
},
1458+
# Schedule every 15 minutes
14481459
'refresh-user-report-snapshot': {
14491460
'task': 'kobo.apps.user_reports.tasks.refresh_user_report_snapshots',
14501461
'schedule': crontab(minute='*/15'),

kpi/tasks.py

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,29 @@
11
import time
2+
from datetime import timedelta
23

34
import requests
5+
from constance import config
46
from django.apps import apps
57
from django.conf import settings
68
from django.core import mail
9+
from django.core.cache import cache
710
from django.core.exceptions import ObjectDoesNotExist
811
from django.core.management import call_command
12+
from django.utils import timezone
913

1014
from kobo.apps.kobo_auth.shortcuts import User
1115
from kobo.apps.markdownx_uploader.tasks import remove_unused_markdown_files
1216
from kobo.celery import celery_app
1317
from kpi.constants import LIMIT_HOURS_23
1418
from kpi.maintenance_tasks import remove_old_asset_snapshots, remove_old_import_tasks
1519
from kpi.models.asset import Asset
16-
from kpi.models.import_export_task import ImportTask, SubmissionExportTask
20+
from kpi.models.import_export_task import (
21+
ImportExportStatusChoices,
22+
ImportTask,
23+
SubmissionExportTask,
24+
)
25+
from kpi.utils.log import logging
26+
from kpi.utils.object_permission import get_anonymous_user
1727

1828

1929
@celery_app.task(
@@ -68,6 +78,56 @@ def export_task_in_background(
6878
)
6979

7080

81+
@celery_app.task
82+
def cleanup_anonymous_exports(**kwargs):
83+
"""
84+
Task to clean up export tasks created by the AnonymousUser that are older
85+
than `ANON_EXPORTS_CLEANUP_AGE`, excluding those that are still processing
86+
"""
87+
BATCH_SIZE = 50
88+
lock_timeout = 15*60
89+
cache_key = 'cleanup_anonymous_exports:lock'
90+
lock = cache.lock(cache_key, timeout=lock_timeout + 60)
91+
if not lock.acquire(blocking=False, blocking_timeout=0):
92+
logging.info('Nothing to do, task is already running!')
93+
return
94+
95+
try:
96+
cutoff_time = timezone.now() - timedelta(
97+
minutes=config.ANON_EXPORTS_CLEANUP_AGE
98+
)
99+
100+
old_exports = SubmissionExportTask.objects.filter(
101+
user=get_anonymous_user(),
102+
date_created__lt=cutoff_time,
103+
).exclude(
104+
status=ImportExportStatusChoices.PROCESSING
105+
).order_by('date_created')[:BATCH_SIZE]
106+
107+
if not old_exports.exists():
108+
logging.info('No old anonymous exports to clean up.')
109+
return
110+
111+
deleted_count = 0
112+
for export in old_exports:
113+
try:
114+
if export.result:
115+
try:
116+
export.result.delete(save=False)
117+
except Exception as e:
118+
logging.error(
119+
f'Error deleting file for export {export.uid}: {e}'
120+
)
121+
export.delete()
122+
deleted_count += 1
123+
except Exception as e:
124+
logging.error(f'Error deleting export {export.uid}: {e}')
125+
126+
logging.info(f'Cleaned up {deleted_count} old anonymous exports.')
127+
finally:
128+
lock.release()
129+
130+
71131
@celery_app.task
72132
def sync_kobocat_xforms(
73133
username=None,
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
import os
2+
from datetime import timedelta
3+
4+
from django.core.cache import cache
5+
from django.core.files.base import ContentFile
6+
from django.utils import timezone
7+
from django.test import TestCase, override_settings
8+
9+
from kpi.models.import_export_task import (
10+
ImportExportStatusChoices,
11+
SubmissionExportTask
12+
)
13+
from kpi.tasks import cleanup_anonymous_exports
14+
from kpi.utils.object_permission import get_anonymous_user
15+
16+
17+
class AnonymousExportCleanupTestCase(TestCase):
18+
def _create_export_task(
19+
self, status=ImportExportStatusChoices.COMPLETE, minutes_old=60
20+
):
21+
export = SubmissionExportTask()
22+
export.user = get_anonymous_user()
23+
export.status = status
24+
export.data = {'type': 'xls', 'source': 'test'}
25+
export.save()
26+
27+
if minutes_old > 0:
28+
past_time = timezone.now() - timedelta(minutes=minutes_old)
29+
SubmissionExportTask.objects.filter(uid=export.uid).update(
30+
date_created=past_time
31+
)
32+
export.refresh_from_db()
33+
return export
34+
35+
def test_exports_older_than_30_minutes_are_deleted(self):
36+
# Export older than 30 min - should be deleted
37+
old_export = self._create_export_task(minutes_old=31)
38+
39+
# Export newer than 30 min - should be kept
40+
recent_export = self._create_export_task(minutes_old=29)
41+
42+
cleanup_anonymous_exports()
43+
self.assertFalse(
44+
SubmissionExportTask.objects.filter(uid=old_export.uid).exists()
45+
)
46+
self.assertTrue(
47+
SubmissionExportTask.objects.filter(uid=recent_export.uid).exists()
48+
)
49+
50+
def test_export_result_file_is_deleted_from_storage(self):
51+
"""
52+
Test that export files are deleted from storage
53+
"""
54+
export = self._create_export_task(minutes_old=60)
55+
56+
# Create actual file in storage
57+
file_content = ContentFile(
58+
b'PK\x03\x04' +
59+
b'{"data": "export"}' * 100,
60+
name='test_export.xlsx'
61+
)
62+
export.result.save(f'test_export_{export.uid}.xlsx', file_content, save=True)
63+
export.refresh_from_db()
64+
65+
storage = export.result.storage
66+
file_path = storage.path(export.result.name)
67+
self.assertTrue(os.path.exists(file_path))
68+
self.assertTrue(SubmissionExportTask.objects.filter(uid=export.uid).exists())
69+
70+
cleanup_anonymous_exports()
71+
72+
self.assertFalse(os.path.exists(file_path))
73+
self.assertFalse(SubmissionExportTask.objects.filter(uid=export.uid).exists())
74+
75+
def test_processing_exports_are_not_deleted(self):
76+
"""
77+
Test that exports with PROCESSING status are never deleted
78+
"""
79+
processing_export = self._create_export_task(
80+
status=ImportExportStatusChoices.PROCESSING,
81+
minutes_old=100
82+
)
83+
84+
cleanup_anonymous_exports()
85+
self.assertTrue(
86+
SubmissionExportTask.objects.filter(
87+
uid=processing_export.uid
88+
).exists()
89+
)
90+
91+
def test_cache_lock_prevents_concurrent_execution(self):
92+
"""
93+
Test that cache lock prevents concurrent task execution
94+
"""
95+
for i in range(5):
96+
self._create_export_task(minutes_old=60)
97+
98+
cache_key = 'cleanup_anonymous_exports:lock'
99+
lock_timeout = 15 * 60
100+
101+
# Acquire lock manually (simulate first task running)
102+
lock = cache.lock(cache_key, timeout=lock_timeout + 60)
103+
lock.acquire(blocking=False)
104+
105+
try:
106+
# Task should return early without deleting
107+
cleanup_anonymous_exports()
108+
109+
# Verify no exports were deleted
110+
remaining = SubmissionExportTask.objects.filter(
111+
user__username='AnonymousUser'
112+
).count()
113+
self.assertEqual(remaining, 5)
114+
115+
finally:
116+
lock.release()

0 commit comments

Comments
 (0)