Skip to content
This repository was archived by the owner on May 5, 2025. It is now read-only.

Commit 5ac629e

Browse files
committed
make flare cleanup task safer, conver to manual task for testing
1 parent b6fd340 commit 5ac629e

File tree

5 files changed

+67
-35
lines changed

5 files changed

+67
-35
lines changed

celery_config.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from shared.celery_config import (
1111
BaseCeleryConfig,
1212
brolly_stats_rollup_task_name,
13-
flare_cleanup_task_name,
13+
# flare_cleanup_task_name,
1414
gh_app_webhook_check_task_name,
1515
health_check_task_name,
1616
profiling_finding_task_name,
@@ -89,19 +89,18 @@ def _beat_schedule():
8989
},
9090
"trial_expiration_cron": {
9191
"task": trial_expiration_cron_task_name,
92-
# 4 UTC is 12am EDT
93-
"schedule": crontab(minute="0", hour="4"),
94-
"kwargs": {
95-
"cron_task_generation_time_iso": BeatLazyFunc(get_utc_now_as_iso_format)
96-
},
97-
},
98-
"flare_cleanup": {
99-
"task": flare_cleanup_task_name,
100-
"schedule": crontab(minute="0", hour="4"), # every day, 4am UTC (8pm PT)
92+
"schedule": crontab(minute="0", hour="4"), # 4 UTC is 12am EDT
10193
"kwargs": {
10294
"cron_task_generation_time_iso": BeatLazyFunc(get_utc_now_as_iso_format)
10395
},
10496
},
97+
# "flare_cleanup": {
98+
# "task": flare_cleanup_task_name,
99+
# "schedule": crontab(minute="0", hour="5"), # every day, 5am UTC (10pm PDT)
100+
# "kwargs": {
101+
# "cron_task_generation_time_iso": BeatLazyFunc(get_utc_now_as_iso_format)
102+
# },
103+
# },
105104
}
106105

107106
if get_config("setup", "find_uncollected_profilings", "enabled", default=True):

requirements.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
https://github.com/codecov/test-results-parser/archive/c840502d1b4dd7d05b2efc2c1328affaf2acd27c.tar.gz#egg=test-results-parser
2-
https://github.com/codecov/shared/archive/2674ae99811767e63151590906691aed4c5ce1f9.tar.gz#egg=shared
2+
https://github.com/codecov/shared/archive/96d0b0ce0ac9ef14b74ac97e6ca2c7659032887d.tar.gz#egg=shared
33
https://github.com/codecov/timestring/archive/d37ceacc5954dff3b5bd2f887936a98a668dda42.tar.gz#egg=timestring
44
asgiref>=3.7.2
55
analytics-python==1.3.0b1

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ sentry-sdk==2.13.0
336336
# shared
337337
setuptools==75.6.0
338338
# via nodeenv
339-
shared @ https://github.com/codecov/shared/archive/2674ae99811767e63151590906691aed4c5ce1f9.tar.gz#egg=shared
339+
shared @ https://github.com/codecov/shared/archive/96d0b0ce0ac9ef14b74ac97e6ca2c7659032887d.tar.gz#egg=shared
340340
# via -r requirements.in
341341
six==1.16.0
342342
# via

tasks/flare_cleanup.py

Lines changed: 54 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -25,38 +25,73 @@ class FlareCleanupTask(CodecovCronTask, name=flare_cleanup_task_name):
2525
def get_min_seconds_interval_between_executions(cls):
2626
return 72000 # 20h
2727

28-
def run_cron_task(self, db_session, *args, **kwargs):
29-
# for any Pull that is not OPEN, clear the flare field(s)
30-
non_open_pulls = Pull.objects.exclude(state=PullStates.OPEN.value)
28+
def run_cron_task(self, db_session, batch_size=1000, limit=10000, *args, **kwargs):
29+
# for any Pull that is not OPEN, clear the flare field(s), targeting older data
30+
non_open_pulls = Pull.objects.exclude(state=PullStates.OPEN.value).order_by(
31+
"updatestamp"
32+
)
3133

3234
log.info("Starting FlareCleanupTask")
3335

3436
# clear in db
3537
non_open_pulls_with_flare_in_db = non_open_pulls.filter(
3638
_flare__isnull=False
3739
).exclude(_flare={})
38-
# single query, objs are not loaded into memory, does not call .save(), does not refresh updatestamp
39-
n_updated = non_open_pulls_with_flare_in_db.update(_flare=None)
40-
log.info(f"FlareCleanupTask cleared {n_updated} _flares")
40+
41+
# Process in batches using an offset
42+
total_updated = 0
43+
offset = 0
44+
while offset < limit:
45+
batch = non_open_pulls_with_flare_in_db.values_list("id", flat=True)[
46+
offset : offset + batch_size
47+
]
48+
if not batch:
49+
break
50+
n_updated = non_open_pulls_with_flare_in_db.filter(id__in=batch).update(
51+
_flare=None
52+
)
53+
total_updated += n_updated
54+
offset += batch_size
55+
56+
log.info(f"FlareCleanupTask cleared {total_updated} database flares")
4157

4258
# clear in Archive
4359
non_open_pulls_with_flare_in_archive = non_open_pulls.filter(
4460
_flare_storage_path__isnull=False
45-
).select_related("repository")
46-
log.info(
47-
f"FlareCleanupTask will clear {non_open_pulls_with_flare_in_archive.count()} Archive flares"
48-
)
49-
# single query, loads all pulls and repos in qset into memory, deletes file in Archive 1 by 1
50-
for pull in non_open_pulls_with_flare_in_archive:
51-
archive_service = ArchiveService(repository=pull.repository)
52-
archive_service.delete_file(pull._flare_storage_path)
53-
54-
# single query, objs are not loaded into memory, does not call .save(), does not refresh updatestamp
55-
n_updated = non_open_pulls_with_flare_in_archive.update(
56-
_flare_storage_path=None
5761
)
5862

59-
log.info(f"FlareCleanupTask cleared {n_updated} Archive flares")
63+
# Process archive deletions in batches using an offset
64+
total_updated = 0
65+
offset = 0
66+
while offset < limit:
67+
batch = non_open_pulls_with_flare_in_archive.values_list("id", flat=True)[
68+
offset : offset + batch_size
69+
]
70+
if not batch:
71+
break
72+
flare_paths_from_batch = Pull.objects.filter(id__in=batch).values_list(
73+
"_flare_storage_path", flat=True
74+
)
75+
try:
76+
archive_service = ArchiveService()
77+
archive_service.delete_files(flare_paths_from_batch)
78+
except Exception as e:
79+
# if something fails with deleting from archive, leave the _flare_storage_path on the pull object.
80+
# only delete _flare_storage_path if the deletion from archive was successful.
81+
log.error(f"FlareCleanupTask failed to delete archive files: {e}")
82+
continue
83+
84+
# Update the _flare_storage_path field for successfully processed pulls
85+
n_updated = Pull.objects.filter(id__in=batch).update(
86+
_flare_storage_path=None
87+
)
88+
total_updated += n_updated
89+
offset += batch_size
90+
91+
log.info(f"FlareCleanupTask cleared {total_updated} Archive flares")
92+
93+
def manual_run(self, db_session=None, limit=1000, *args, **kwargs):
94+
self.run_cron_task(db_session, limit=limit, *args, **kwargs)
6095

6196

6297
RegisteredFlareCleanupTask = celery_app.register_task(FlareCleanupTask())

tasks/tests/unit/test_flare_cleanup.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,7 @@ def test_successful_run(self, transactional_db, mocker):
7878
mock_logs.assert_has_calls(
7979
[
8080
call("Starting FlareCleanupTask"),
81-
call("FlareCleanupTask cleared 1 _flares"),
82-
call("FlareCleanupTask will clear 1 Archive flares"),
81+
call("FlareCleanupTask cleared 1 database flares"),
8382
call("FlareCleanupTask cleared 1 Archive flares"),
8483
]
8584
)
@@ -120,8 +119,7 @@ def test_successful_run(self, transactional_db, mocker):
120119
mock_logs.assert_has_calls(
121120
[
122121
call("Starting FlareCleanupTask"),
123-
call("FlareCleanupTask cleared 0 _flares"),
124-
call("FlareCleanupTask will clear 0 Archive flares"),
122+
call("FlareCleanupTask cleared 0 database flares"),
125123
call("FlareCleanupTask cleared 0 Archive flares"),
126124
]
127125
)

0 commit comments

Comments
 (0)