Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions kobo/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,10 @@
'Enable automatic deletion of attachments for users who have exceeded '
'their storage limits.'
),
'ANON_EXPORTS_CLEANUP_AGE': (
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's be consistent with other similar variables. e.g.: *_GRACE_PERIOD.
Also I would write ANONYMOUS_EXPORTS_... .

30,
'Number of minutes after which anonymous export tasks are cleaned up.'
),
'LIMIT_ATTACHMENT_REMOVAL_GRACE_PERIOD': (
90,
'Number of days to keep attachments after the user has exceeded their '
Expand Down Expand Up @@ -729,6 +733,7 @@
'MASS_EMAIL_ENQUEUED_RECORD_EXPIRY',
'MASS_EMAIL_TEST_EMAILS',
'USAGE_LIMIT_ENFORCEMENT',
'ANON_EXPORTS_CLEANUP_AGE',
),
'Rest Services': (
'ALLOW_UNSECURED_HOOK_ENDPOINTS',
Expand Down Expand Up @@ -1445,6 +1450,12 @@ def dj_stripe_request_callback_method():
'options': {'queue': 'kpi_low_priority_queue'}
},
# Schedule every 15 minutes
'cleanup-anonymous-exports': {
'task': 'kpi.tasks.cleanup_anonymous_exports',
'schedule': crontab(minute='*/15'),
'options': {'queue': 'kpi_low_priority_queue'}
},
# Schedule every 15 minutes
'refresh-user-report-snapshot': {
'task': 'kobo.apps.user_reports.tasks.refresh_user_report_snapshots',
'schedule': crontab(minute='*/15'),
Expand Down
62 changes: 61 additions & 1 deletion kpi/tasks.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,29 @@
import time
from datetime import timedelta

import requests
from constance import config
from django.apps import apps
from django.conf import settings
from django.core import mail
from django.core.cache import cache
from django.core.exceptions import ObjectDoesNotExist
from django.core.management import call_command
from django.utils import timezone

from kobo.apps.kobo_auth.shortcuts import User
from kobo.apps.markdownx_uploader.tasks import remove_unused_markdown_files
from kobo.celery import celery_app
from kpi.constants import LIMIT_HOURS_23
from kpi.maintenance_tasks import remove_old_asset_snapshots, remove_old_import_tasks
from kpi.models.asset import Asset
from kpi.models.import_export_task import ImportTask, SubmissionExportTask
from kpi.models.import_export_task import (
ImportExportStatusChoices,
ImportTask,
SubmissionExportTask,
)
from kpi.utils.log import logging
from kpi.utils.object_permission import get_anonymous_user


@celery_app.task(
Expand Down Expand Up @@ -68,6 +78,56 @@ def export_task_in_background(
)


@celery_app.task
def cleanup_anonymous_exports(**kwargs):
"""
Task to clean up export tasks created by the AnonymousUser that are older
than `ANON_EXPORTS_CLEANUP_AGE`, excluding those that are still processing
"""
BATCH_SIZE = 50
lock_timeout = 15*60
cache_key = 'cleanup_anonymous_exports:lock'
lock = cache.lock(cache_key, timeout=lock_timeout + 60)
if not lock.acquire(blocking=False, blocking_timeout=0):
logging.info('Nothing to do, task is already running!')
return

try:
cutoff_time = timezone.now() - timedelta(
minutes=config.ANON_EXPORTS_CLEANUP_AGE
)

old_exports = SubmissionExportTask.objects.filter(
user=get_anonymous_user(),
date_created__lt=cutoff_time,
).exclude(
status=ImportExportStatusChoices.PROCESSING
).order_by('date_created')[:BATCH_SIZE]

if not old_exports.exists():
logging.info('No old anonymous exports to clean up.')
return

deleted_count = 0
for export in old_exports:
try:
if export.result:
try:
export.result.delete(save=False)
except Exception as e:
logging.error(
f'Error deleting file for export {export.uid}: {e}'
)
export.delete()
deleted_count += 1
except Exception as e:
logging.error(f'Error deleting export {export.uid}: {e}')

logging.info(f'Cleaned up {deleted_count} old anonymous exports.')
finally:
lock.release()


@celery_app.task
def sync_kobocat_xforms(
username=None,
Expand Down
116 changes: 116 additions & 0 deletions kpi/tests/test_cleanup_anonymous_exports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import os
from datetime import timedelta

from django.core.cache import cache
from django.core.files.base import ContentFile
from django.utils import timezone
from django.test import TestCase

from kpi.models.import_export_task import (
ImportExportStatusChoices,
SubmissionExportTask
)
from kpi.tasks import cleanup_anonymous_exports
from kpi.utils.object_permission import get_anonymous_user


class AnonymousExportCleanupTestCase(TestCase):
def _create_export_task(
self, status=ImportExportStatusChoices.COMPLETE, minutes_old=60
):
export = SubmissionExportTask()
export.user = get_anonymous_user()
export.status = status
export.data = {'type': 'xls', 'source': 'test'}
export.save()

if minutes_old > 0:
past_time = timezone.now() - timedelta(minutes=minutes_old)
SubmissionExportTask.objects.filter(uid=export.uid).update(
date_created=past_time
)
export.refresh_from_db()
return export

def test_exports_older_than_30_minutes_are_deleted(self):
# Export older than 30 min - should be deleted
old_export = self._create_export_task(minutes_old=31)

# Export newer than 30 min - should be kept
recent_export = self._create_export_task(minutes_old=29)

cleanup_anonymous_exports()
self.assertFalse(
SubmissionExportTask.objects.filter(uid=old_export.uid).exists()
)
self.assertTrue(
SubmissionExportTask.objects.filter(uid=recent_export.uid).exists()
)

def test_export_result_file_is_deleted_from_storage(self):
"""
Test that export files are deleted from storage
"""
export = self._create_export_task(minutes_old=60)

# Create actual file in storage
file_content = ContentFile(
b'PK\x03\x04' +
b'{"data": "export"}' * 100,
name='test_export.xlsx'
)
export.result.save(f'test_export_{export.uid}.xlsx', file_content, save=True)
export.refresh_from_db()

storage = export.result.storage
file_path = storage.path(export.result.name)
self.assertTrue(os.path.exists(file_path))
self.assertTrue(SubmissionExportTask.objects.filter(uid=export.uid).exists())

cleanup_anonymous_exports()

self.assertFalse(os.path.exists(file_path))
self.assertFalse(SubmissionExportTask.objects.filter(uid=export.uid).exists())

def test_processing_exports_are_not_deleted(self):
"""
Test that exports with PROCESSING status are never deleted
"""
processing_export = self._create_export_task(
status=ImportExportStatusChoices.PROCESSING,
minutes_old=100
)

cleanup_anonymous_exports()
self.assertTrue(
SubmissionExportTask.objects.filter(
uid=processing_export.uid
).exists()
)

def test_cache_lock_prevents_concurrent_execution(self):
"""
Test that cache lock prevents concurrent task execution
"""
for i in range(5):
self._create_export_task(minutes_old=60)

cache_key = 'cleanup_anonymous_exports:lock'
lock_timeout = 15 * 60

# Acquire lock manually (simulate first task running)
lock = cache.lock(cache_key, timeout=lock_timeout + 60)
lock.acquire(blocking=False)

try:
# Task should return early without deleting
cleanup_anonymous_exports()

# Verify no exports were deleted
remaining = SubmissionExportTask.objects.filter(
user__username='AnonymousUser'
).count()
self.assertEqual(remaining, 5)

finally:
lock.release()