Skip to content

Commit 9ffd0c7

Browse files
authored
OAuth: re-enable sync active user repos task (#12443)
- Take into consideration active users from the last 30 days only. - Ignore GitHub App, so we don't re-sync installations over and over again, as users can share the same installation, and repos from the GH app are kept in sync via a webhook. This task now completes in 42 minutes. Closes #12406
1 parent f959b2a commit 9ffd0c7

File tree

2 files changed

+55
-37
lines changed

2 files changed

+55
-37
lines changed

readthedocs/oauth/tasks.py

Lines changed: 50 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from django.utils import timezone
1111

1212
from readthedocs.api.v2.views.integrations import ExternalVersionData
13+
from readthedocs.builds.utils import memcache_lock
1314
from readthedocs.core.utils.tasks import PublicTask
1415
from readthedocs.core.utils.tasks import user_id_matches_or_superuser
1516
from readthedocs.core.views.hooks import VersionInfo
@@ -26,15 +27,15 @@
2627
from readthedocs.oauth.notifications import MESSAGE_OAUTH_WEBHOOK_INVALID
2728
from readthedocs.oauth.notifications import MESSAGE_OAUTH_WEBHOOK_NO_ACCOUNT
2829
from readthedocs.oauth.notifications import MESSAGE_OAUTH_WEBHOOK_NO_PERMISSIONS
30+
from readthedocs.oauth.services import GitHubAppService
31+
from readthedocs.oauth.services import registry
2932
from readthedocs.oauth.services.base import SyncServiceError
3033
from readthedocs.oauth.utils import SERVICE_MAP
3134
from readthedocs.projects.models import Project
3235
from readthedocs.sso.models import SSOIntegration
3336
from readthedocs.vcs_support.backends.git import parse_version_from_ref
3437
from readthedocs.worker import app
3538

36-
from .services import registry
37-
3839

3940
log = structlog.get_logger(__name__)
4041

@@ -49,13 +50,16 @@
4950
time_limit=900,
5051
soft_time_limit=600,
5152
)
52-
def sync_remote_repositories(user_id):
53+
def sync_remote_repositories(user_id, skip_githubapp=False):
5354
user = User.objects.filter(pk=user_id).first()
5455
if not user:
5556
return
5657

5758
failed_services = set()
5859
for service_cls in registry:
60+
if skip_githubapp and service_cls == GitHubAppService:
61+
continue
62+
5963
try:
6064
service_cls.sync_user_access(user)
6165
except SyncServiceError:
@@ -110,12 +114,13 @@ def sync_remote_repositories_from_sso_organizations():
110114
queue="web",
111115
time_limit=60 * 60 * 3, # 3h
112116
soft_time_limit=(60 * 60 * 3) - 5 * 60, # 2h 55m
117+
bind=True,
113118
)
114-
def sync_active_users_remote_repositories():
119+
def sync_active_users_remote_repositories(self):
115120
"""
116121
Sync ``RemoteRepository`` for active users.
117122
118-
We consider active users those that logged in at least once in the last 90 days.
123+
We consider active users those that logged in at least once in the last 45 days.
119124
120125
This task is thought to be executed daily. It checks the weekday of the
121126
last login of the user with today's weekday. If they match, the re-sync is
@@ -124,34 +129,50 @@ def sync_active_users_remote_repositories():
124129
Note this is a long running task syncronizhing all the users in the same Celery process,
125130
and it will require a pretty high ``time_limit`` and ``soft_time_limit``.
126131
"""
127-
today_weekday = timezone.now().isoweekday()
128-
three_months_ago = timezone.now() - datetime.timedelta(days=90)
129-
users = User.objects.annotate(weekday=ExtractIsoWeekDay("last_login")).filter(
130-
last_login__gt=three_months_ago,
131-
socialaccount__isnull=False,
132-
weekday=today_weekday,
133-
)
134-
135-
users_count = users.count()
136-
structlog.contextvars.bind_contextvars(total_users=users_count)
137-
log.info("Triggering re-sync of RemoteRepository for active users.")
132+
# This task is expensive, and we run it daily.
133+
# We have had issues with it being triggered multiple times per day,
134+
# so we use a lock (12 hours) to avoid that.
135+
lock_id = "{0}-lock".format(self.name)
136+
with memcache_lock(lock_id, 60 * 60 * 12, self.app.oid) as acquired:
137+
if acquired:
138+
log.exception("Task has already been run recently, exiting.")
139+
return
138140

139-
for i, user in enumerate(users):
140-
structlog.contextvars.bind_contextvars(
141-
user_username=user.username,
142-
progress=f"{i}/{users_count}",
141+
today_weekday = timezone.now().isoweekday()
142+
days_ago = timezone.now() - datetime.timedelta(days=45)
143+
users = User.objects.annotate(weekday=ExtractIsoWeekDay("last_login")).filter(
144+
last_login__gt=days_ago,
145+
socialaccount__isnull=False,
146+
weekday=today_weekday,
143147
)
144148

145-
# Log an update every 50 users
146-
if i % 50 == 0:
147-
log.info("Progress on re-syncing RemoteRepository for active users.")
149+
users_count = users.count()
150+
structlog.contextvars.bind_contextvars(total_users=users_count)
151+
log.info("Triggering re-sync of RemoteRepository for active users.")
148152

149-
try:
150-
# NOTE: sync all the users/repositories in the same Celery process.
151-
# Do not trigger a new task per user.
152-
sync_remote_repositories(user.pk)
153-
except Exception:
154-
log.exception("There was a problem re-syncing RemoteRepository.")
153+
for i, user in enumerate(users.iterator()):
154+
structlog.contextvars.bind_contextvars(
155+
user_username=user.username,
156+
progress=f"{i}/{users_count}",
157+
)
158+
159+
# Log an update every 50 users
160+
if i % 50 == 0:
161+
log.info("Progress on re-syncing RemoteRepository for active users.")
162+
163+
try:
164+
# NOTE: sync all the users/repositories in the same Celery process.
165+
# Do not trigger a new task per user.
166+
# NOTE: We skip the GitHub App, since all the repositories
167+
# and permissions are kept up to date via webhooks.
168+
# Triggering a sync per-user, will re-sync the same installation
169+
# multiple times.
170+
sync_remote_repositories(
171+
user.pk,
172+
skip_githubapp=True,
173+
)
174+
except Exception:
175+
log.exception("There was a problem re-syncing RemoteRepository.")
155176

156177

157178
# TODO: remove user_pk from the signature on the next release.

readthedocs/settings/base.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -693,14 +693,11 @@ def BUILD_MEMORY_LIMIT(self):
693693
"schedule": crontab(minute="*/30", hour="*"),
694694
"options": {"queue": "web"},
695695
},
696-
# Stop running this task,
697-
# it was rerunning multiple times per day,
698-
# and causing celery instances to freeze up.
699-
# "every-day-resync-remote-repositories": {
700-
# "task": "readthedocs.oauth.tasks.sync_active_users_remote_repositories",
701-
# "schedule": crontab(minute=30, hour=2),
702-
# "options": {"queue": "web"},
703-
# },
696+
"every-day-resync-remote-repositories": {
697+
"task": "readthedocs.oauth.tasks.sync_active_users_remote_repositories",
698+
"schedule": crontab(minute=30, hour=2),
699+
"options": {"queue": "web"},
700+
},
704701
"every-day-email-pending-custom-domains": {
705702
"task": "readthedocs.domains.tasks.email_pending_custom_domains",
706703
"schedule": crontab(minute=0, hour=3),

0 commit comments

Comments
 (0)