Skip to content

Commit fc64741

Browse files
authored
refactor(db-scheduler): optimize all_as_schedule query (#835)
* refactor(db-scheduler): improve all_as_schedule logic to optimize querying the database * flake8 * exempt 4 in the morning from exclusion because of cleanup task, also refactor all_as_schedule test * flake8 tests * add tests for optimization
1 parent 5a61ead commit fc64741

File tree

2 files changed

+109
-13
lines changed

2 files changed

+109
-13
lines changed

django_celery_beat/schedulers.py

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,16 @@
1111
from django.conf import settings
1212
from django.core.exceptions import ObjectDoesNotExist
1313
from django.db import close_old_connections, transaction
14+
from django.db.models import Q
1415
from django.db.utils import DatabaseError, InterfaceError
16+
from django.utils import timezone
1517
from kombu.utils.encoding import safe_repr, safe_str
1618
from kombu.utils.json import dumps, loads
1719

1820
from .clockedschedule import clocked
1921
from .models import (ClockedSchedule, CrontabSchedule, IntervalSchedule,
2022
PeriodicTask, PeriodicTasks, SolarSchedule)
21-
from .utils import NEVER_CHECK_TIMEOUT
23+
from .utils import NEVER_CHECK_TIMEOUT, now
2224

2325
# This scheduler must wake up more frequently than the
2426
# regular of 5 minutes because it needs to take external
@@ -252,7 +254,17 @@ def setup_schedule(self):
252254
def all_as_schedule(self):
253255
debug('DatabaseScheduler: Fetching database schedule')
254256
s = {}
255-
for model in self.Model.objects.enabled():
257+
next_five_minutes = now() + datetime.timedelta(minutes=5)
258+
exclude_clock_tasks_query = Q(
259+
clocked__isnull=False, clocked__clocked_time__gt=next_five_minutes
260+
)
261+
exclude_hours = self.get_excluded_hours_for_crontab_tasks()
262+
exclude_cron_tasks_query = Q(
263+
crontab__isnull=False, crontab__hour__in=exclude_hours
264+
)
265+
for model in self.Model.objects.enabled().exclude(
266+
exclude_clock_tasks_query | exclude_cron_tasks_query
267+
):
256268
try:
257269
s[model.name] = self.Entry(model, app=self.app)
258270
except ValueError:
@@ -378,3 +390,32 @@ def schedule(self):
378390
repr(entry) for entry in self._schedule.values()),
379391
)
380392
return self._schedule
393+
394+
@staticmethod
395+
def get_excluded_hours_for_crontab_tasks():
396+
# Generate the full list of allowed hours for crontabs
397+
allowed_crontab_hours = [
398+
str(hour).zfill(2) for hour in range(24)
399+
] + [
400+
str(hour) for hour in range(10)
401+
]
402+
403+
# Get current, next, and previous hours
404+
current_time = timezone.localtime(now())
405+
current_hour = current_time.hour
406+
next_hour = (current_hour + 1) % 24
407+
previous_hour = (current_hour - 1) % 24
408+
409+
# Create a set of hours to remove (both padded and non-padded versions)
410+
hours_to_remove = {
411+
str(current_hour).zfill(2), str(current_hour),
412+
str(next_hour).zfill(2), str(next_hour),
413+
str(previous_hour).zfill(2), str(previous_hour),
414+
str(4), "04", # celery's default cleanup task
415+
}
416+
417+
# Filter out 'should be considered' hours
418+
return [
419+
hour for hour in allowed_crontab_hours
420+
if hour not in hours_to_remove
421+
]

t/unit/test_schedulers.py

Lines changed: 66 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -456,22 +456,67 @@ def setup_scheduler(self, app):
456456
self.m4.save()
457457
self.m4.refresh_from_db()
458458

459-
dt_aware = make_aware(datetime(day=26,
460-
month=7,
461-
year=3000,
462-
hour=1,
463-
minute=0)) # future time
459+
# disabled, should not be in schedule
460+
self.m5 = self.create_model_interval(
461+
schedule(timedelta(seconds=1)))
462+
self.m5.enabled = False
463+
self.m5.save()
464+
465+
# near future time (should be in schedule)
466+
now = datetime.now()
467+
two_minutes_later = now + timedelta(minutes=2)
468+
dt_aware = make_aware(
469+
datetime(
470+
day=two_minutes_later.day,
471+
month=two_minutes_later.month,
472+
year=two_minutes_later.year,
473+
hour=two_minutes_later.hour,
474+
minute=two_minutes_later.minute
475+
)
476+
)
464477
self.m6 = self.create_model_clocked(
465478
clocked(dt_aware)
466479
)
467480
self.m6.save()
468481
self.m6.refresh_from_db()
469482

470-
# disabled, should not be in schedule
471-
m5 = self.create_model_interval(
472-
schedule(timedelta(seconds=1)))
473-
m5.enabled = False
474-
m5.save()
483+
# distant future time (should not be in schedule)
484+
ten_minutes_later = now + timedelta(minutes=10)
485+
distant_dt_aware = make_aware(
486+
datetime(
487+
day=ten_minutes_later.day,
488+
month=ten_minutes_later.month,
489+
year=ten_minutes_later.year,
490+
hour=ten_minutes_later.hour,
491+
minute=ten_minutes_later.minute
492+
)
493+
)
494+
self.m7 = self.create_model_clocked(
495+
clocked(distant_dt_aware)
496+
)
497+
self.m7.save()
498+
self.m7.refresh_from_db()
499+
500+
now_hour = timezone.localtime(timezone.now()).hour
501+
# near future time (should be in schedule)
502+
self.m8 = self.create_model_crontab(
503+
crontab(hour=str(now_hour)))
504+
self.m8.save()
505+
self.m8.refresh_from_db()
506+
self.m9 = self.create_model_crontab(
507+
crontab(hour=str((now_hour + 1) % 24)))
508+
self.m9.save()
509+
self.m9.refresh_from_db()
510+
self.m10 = self.create_model_crontab(
511+
crontab(hour=str((now_hour - 1) % 24)))
512+
self.m10.save()
513+
self.m10.refresh_from_db()
514+
515+
# distant future time (should not be in schedule)
516+
self.m11 = self.create_model_crontab(
517+
crontab(hour=str((now_hour + 2) % 24)))
518+
self.m11.save()
519+
self.m11.refresh_from_db()
475520

476521
self.s = self.Scheduler(app=self.app)
477522

@@ -483,11 +528,21 @@ def test_constructor(self):
483528
def test_all_as_schedule(self):
484529
sched = self.s.schedule
485530
assert sched
486-
assert len(sched) == 6
531+
assert len(sched) == 9
487532
assert 'celery.backend_cleanup' in sched
488533
for n, e in sched.items():
489534
assert isinstance(e, self.s.Entry)
490535

536+
def test_get_excluded_hours_for_crontab_tasks(self):
537+
now_hour = timezone.localtime(timezone.now()).hour
538+
excluded_hours = self.s.get_excluded_hours_for_crontab_tasks()
539+
540+
assert str(now_hour) not in excluded_hours
541+
assert str((now_hour + 1) % 24) not in excluded_hours
542+
assert str((now_hour - 1) % 24) not in excluded_hours
543+
assert str((now_hour + 2) % 24) in excluded_hours
544+
assert str((now_hour - 2) % 24) in excluded_hours
545+
491546
def test_schedule_changed(self):
492547
self.m2.args = '[16, 16]'
493548
self.m2.save()

0 commit comments

Comments
 (0)