Skip to content
This repository was archived by the owner on May 5, 2025. It is now read-only.

Commit e1d597d

Browse files
authored
add FlareCleanupTask (#947)
1 parent 1c06547 commit e1d597d

File tree

7 files changed

+344
-22
lines changed

7 files changed

+344
-22
lines changed

celery_config.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from shared.celery_config import (
1111
BaseCeleryConfig,
1212
brolly_stats_rollup_task_name,
13+
# flare_cleanup_task_name,
1314
gh_app_webhook_check_task_name,
1415
health_check_task_name,
1516
profiling_finding_task_name,
@@ -88,12 +89,18 @@ def _beat_schedule():
8889
},
8990
"trial_expiration_cron": {
9091
"task": trial_expiration_cron_task_name,
91-
# 4 UTC is 12am EDT
92-
"schedule": crontab(minute="0", hour="4"),
92+
"schedule": crontab(minute="0", hour="4"), # 4 UTC is 12am EDT
9393
"kwargs": {
9494
"cron_task_generation_time_iso": BeatLazyFunc(get_utc_now_as_iso_format)
9595
},
9696
},
97+
# "flare_cleanup": {
98+
# "task": flare_cleanup_task_name,
99+
# "schedule": crontab(minute="0", hour="5"), # every day, 5am UTC (10pm PDT)
100+
# "kwargs": {
101+
# "cron_task_generation_time_iso": BeatLazyFunc(get_utc_now_as_iso_format)
102+
# },
103+
# },
97104
}
98105

99106
if get_config("setup", "find_uncollected_profilings", "enabled", default=True):

conftest.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,18 @@ def mock_storage(mocker):
276276
return storage_server
277277

278278

279+
@pytest.fixture
280+
def mock_archive_storage(mocker):
281+
m = mocker.patch("shared.api_archive.archive.StorageService")
282+
use_archive = mocker.patch(
283+
"shared.django_apps.core.models.should_write_data_to_storage_config_check"
284+
)
285+
use_archive.return_value = True
286+
storage_server = MemoryStorageService({})
287+
m.return_value = storage_server
288+
return storage_server
289+
290+
279291
@pytest.fixture
280292
def mock_smtp(mocker):
281293
m = mocker.patch("services.smtp.SMTPService")

database/models/core.py

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ def get_repository(self):
380380
def get_commitid(self):
381381
return self.commitid
382382

383-
def should_write_to_storage(self) -> bool:
383+
def should_write_to_storage(self: object) -> bool:
384384
if self.repository is None or self.repository.owner is None:
385385
return False
386386
is_codecov_repo = self.repository.owner.username == "codecov"
@@ -447,7 +447,6 @@ class Pull(CodecovBaseModel):
447447
commentid = Column(types.Text)
448448
bundle_analysis_commentid = Column(types.Text)
449449
diff = Column(postgresql.JSON)
450-
flare = Column(postgresql.JSON)
451450
author_id = Column("author", types.Integer, ForeignKey("owners.ownerid"))
452451
behind_by = Column(types.Integer)
453452
behind_by_commit = Column(types.Text)
@@ -457,6 +456,22 @@ class Pull(CodecovBaseModel):
457456
Repository, backref=backref("pulls", cascade="delete", lazy="dynamic")
458457
)
459458

459+
def should_write_to_storage(self: object) -> bool:
460+
if self.repository is None or self.repository.owner is None:
461+
return False
462+
is_codecov_repo = self.repository.owner.username == "codecov"
463+
return should_write_data_to_storage_config_check(
464+
master_switch_key="pull_flare",
465+
is_codecov_repo=is_codecov_repo,
466+
repoid=self.repository.repoid,
467+
)
468+
469+
_flare = Column("flare", postgresql.JSON)
470+
_flare_storage_path = Column("flare_storage_path", types.Text, nullable=True)
471+
flare = ArchiveField(
472+
should_write_to_storage_fn=should_write_to_storage, default_value_class=dict
473+
)
474+
460475
__table_args__ = (Index("pulls_repoid_pullid", "repoid", "pullid", unique=True),)
461476

462477
def __repr__(self):
@@ -503,16 +518,6 @@ def external_id(self):
503518
def id(self):
504519
return self.id_
505520

506-
def should_write_to_storage(self) -> bool:
507-
if self.repository is None or self.repository.owner is None:
508-
return False
509-
is_codecov_repo = self.repository.owner.username == "codecov"
510-
return should_write_data_to_storage_config_check(
511-
master_switch_key="pull_flare",
512-
is_codecov_repo=is_codecov_repo,
513-
repoid=self.repository.repoid,
514-
)
515-
516521
@cached_property
517522
def is_first_coverage_pull(self):
518523
"""
@@ -536,12 +541,6 @@ def is_first_coverage_pull(self):
536541
return first_pull_with_coverage.id_ == self.id_
537542
return True
538543

539-
_flare = Column("flare", postgresql.JSON)
540-
_flare_storage_path = Column("flare_storage_path", types.Text, nullable=True)
541-
flare = ArchiveField(
542-
should_write_to_storage_fn=should_write_to_storage, default_value_class=dict
543-
)
544-
545544

546545
class CommitNotification(CodecovBaseModel):
547546
__tablename__ = "commit_notifications"

requirements.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
https://github.com/codecov/test-results-parser/archive/c840502d1b4dd7d05b2efc2c1328affaf2acd27c.tar.gz#egg=test-results-parser
2-
https://github.com/codecov/shared/archive/2674ae99811767e63151590906691aed4c5ce1f9.tar.gz#egg=shared
2+
https://github.com/codecov/shared/archive/efe48352e172f658c21465371453dcefc98f6793.tar.gz#egg=shared
33
https://github.com/codecov/timestring/archive/d37ceacc5954dff3b5bd2f887936a98a668dda42.tar.gz#egg=timestring
44
asgiref>=3.7.2
55
analytics-python==1.3.0b1

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ sentry-sdk==2.13.0
336336
# shared
337337
setuptools==75.6.0
338338
# via nodeenv
339-
shared @ https://github.com/codecov/shared/archive/2674ae99811767e63151590906691aed4c5ce1f9.tar.gz#egg=shared
339+
shared @ https://github.com/codecov/shared/archive/efe48352e172f658c21465371453dcefc98f6793.tar.gz#egg=shared
340340
# via -r requirements.in
341341
six==1.16.0
342342
# via

tasks/flare_cleanup.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import logging
2+
3+
from shared.api_archive.archive import ArchiveService
4+
from shared.celery_config import flare_cleanup_task_name
5+
from shared.django_apps.core.models import Pull, PullStates
6+
7+
from app import celery_app
8+
from tasks.crontasks import CodecovCronTask
9+
10+
log = logging.getLogger(__name__)
11+
12+
13+
class FlareCleanupTask(CodecovCronTask, name=flare_cleanup_task_name):
14+
"""
15+
Flare is a field on a Pull object.
16+
Flare is used to draw static graphs (see GraphHandler view in api) and can be large.
17+
The majority of flare graphs are used in pr comments, so we keep the (maybe large) flare "available"
18+
in either the db or Archive storage while the pull is OPEN.
19+
If the pull is not OPEN, we dump the flare to save space.
20+
If we need to generate a flare graph for a non-OPEN pull, we build_report_from_commit
21+
and generate fresh flare from that report (see GraphHandler view in api).
22+
"""
23+
24+
@classmethod
25+
def get_min_seconds_interval_between_executions(cls):
26+
return 72000 # 20h
27+
28+
def run_cron_task(self, db_session, batch_size=1000, limit=10000, *args, **kwargs):
29+
# for any Pull that is not OPEN, clear the flare field(s), targeting older data
30+
non_open_pulls = Pull.objects.exclude(state=PullStates.OPEN.value).order_by(
31+
"updatestamp"
32+
)
33+
34+
log.info("Starting FlareCleanupTask")
35+
36+
# clear in db
37+
non_open_pulls_with_flare_in_db = non_open_pulls.filter(
38+
_flare__isnull=False
39+
).exclude(_flare={})
40+
41+
# Process in batches
42+
total_updated = 0
43+
start = 0
44+
while start < limit:
45+
stop = start + batch_size if start + batch_size < limit else limit
46+
batch = non_open_pulls_with_flare_in_db.values_list("id", flat=True)[
47+
start:stop
48+
]
49+
if not batch:
50+
break
51+
n_updated = non_open_pulls_with_flare_in_db.filter(id__in=batch).update(
52+
_flare=None
53+
)
54+
total_updated += n_updated
55+
start = stop
56+
57+
log.info(f"FlareCleanupTask cleared {total_updated} database flares")
58+
59+
# clear in Archive
60+
non_open_pulls_with_flare_in_archive = non_open_pulls.filter(
61+
_flare_storage_path__isnull=False
62+
)
63+
64+
# Process archive deletions in batches
65+
total_updated = 0
66+
start = 0
67+
while start < limit:
68+
stop = start + batch_size if start + batch_size < limit else limit
69+
batch = non_open_pulls_with_flare_in_archive.values_list("id", flat=True)[
70+
start:stop
71+
]
72+
if not batch:
73+
break
74+
flare_paths_from_batch = Pull.objects.filter(id__in=batch).values_list(
75+
"_flare_storage_path", flat=True
76+
)
77+
try:
78+
archive_service = ArchiveService(repository=None)
79+
archive_service.delete_files(flare_paths_from_batch)
80+
except Exception as e:
81+
# if something fails with deleting from archive, leave the _flare_storage_path on the pull object.
82+
# only delete _flare_storage_path if the deletion from archive was successful.
83+
log.error(f"FlareCleanupTask failed to delete archive files: {e}")
84+
continue
85+
86+
# Update the _flare_storage_path field for successfully processed pulls
87+
n_updated = Pull.objects.filter(id__in=batch).update(
88+
_flare_storage_path=None
89+
)
90+
total_updated += n_updated
91+
start = stop
92+
93+
log.info(f"FlareCleanupTask cleared {total_updated} Archive flares")
94+
95+
def manual_run(self, db_session=None, limit=1000, *args, **kwargs):
96+
self.run_cron_task(db_session, limit=limit, *args, **kwargs)
97+
98+
99+
RegisteredFlareCleanupTask = celery_app.register_task(FlareCleanupTask())
100+
flare_cleanup_task = celery_app.tasks[RegisteredFlareCleanupTask.name]

0 commit comments

Comments
 (0)