1010from django .utils import timezone
1111
1212from readthedocs .api .v2 .views .integrations import ExternalVersionData
13+ from readthedocs .builds .utils import memcache_lock
1314from readthedocs .core .utils .tasks import PublicTask
1415from readthedocs .core .utils .tasks import user_id_matches_or_superuser
1516from readthedocs .core .views .hooks import VersionInfo
2627from readthedocs .oauth .notifications import MESSAGE_OAUTH_WEBHOOK_INVALID
2728from readthedocs .oauth .notifications import MESSAGE_OAUTH_WEBHOOK_NO_ACCOUNT
2829from readthedocs .oauth .notifications import MESSAGE_OAUTH_WEBHOOK_NO_PERMISSIONS
30+ from readthedocs .oauth .services import GitHubAppService
31+ from readthedocs .oauth .services import registry
2932from readthedocs .oauth .services .base import SyncServiceError
3033from readthedocs .oauth .utils import SERVICE_MAP
3134from readthedocs .projects .models import Project
3235from readthedocs .sso .models import SSOIntegration
3336from readthedocs .vcs_support .backends .git import parse_version_from_ref
3437from readthedocs .worker import app
3538
36- from .services import registry
37-
3839
3940log = structlog .get_logger (__name__ )
4041
4950 time_limit = 900 ,
5051 soft_time_limit = 600 ,
5152)
52- def sync_remote_repositories (user_id ):
53+ def sync_remote_repositories (user_id , skip_githubapp = False ):
5354 user = User .objects .filter (pk = user_id ).first ()
5455 if not user :
5556 return
5657
5758 failed_services = set ()
5859 for service_cls in registry :
60+ if skip_githubapp and service_cls == GitHubAppService :
61+ continue
62+
5963 try :
6064 service_cls .sync_user_access (user )
6165 except SyncServiceError :
@@ -110,12 +114,13 @@ def sync_remote_repositories_from_sso_organizations():
110114 queue = "web" ,
111115 time_limit = 60 * 60 * 3 , # 3h
112116 soft_time_limit = (60 * 60 * 3 ) - 5 * 60 , # 2h 55m
117+ bind = True ,
113118)
114- def sync_active_users_remote_repositories ():
119+ def sync_active_users_remote_repositories (self ):
115120 """
116121 Sync ``RemoteRepository`` for active users.
117122
118- We consider active users those that logged in at least once in the last 90 days.
123+ We consider active users those that logged in at least once in the last 45 days.
119124
120125 This task is thought to be executed daily. It checks the weekday of the
121126 last login of the user with today's weekday. If they match, the re-sync is
@@ -124,34 +129,50 @@ def sync_active_users_remote_repositories():
124129 Note this is a long running task syncronizhing all the users in the same Celery process,
125130 and it will require a pretty high ``time_limit`` and ``soft_time_limit``.
126131 """
127- today_weekday = timezone .now ().isoweekday ()
128- three_months_ago = timezone .now () - datetime .timedelta (days = 90 )
129- users = User .objects .annotate (weekday = ExtractIsoWeekDay ("last_login" )).filter (
130- last_login__gt = three_months_ago ,
131- socialaccount__isnull = False ,
132- weekday = today_weekday ,
133- )
134-
135- users_count = users .count ()
136- structlog .contextvars .bind_contextvars (total_users = users_count )
137- log .info ("Triggering re-sync of RemoteRepository for active users." )
132+ # This task is expensive, and we run it daily.
133+ # We have had issues with it being triggered multiple times per day,
134+ # so we use a lock (12 hours) to avoid that.
135+ lock_id = "{0}-lock" .format (self .name )
136+ with memcache_lock (lock_id , 60 * 60 * 12 , self .app .oid ) as acquired :
137+ if acquired :
138+ log .exception ("Task has already been run recently, exiting." )
139+ return
138140
139- for i , user in enumerate (users ):
140- structlog .contextvars .bind_contextvars (
141- user_username = user .username ,
142- progress = f"{ i } /{ users_count } " ,
141+ today_weekday = timezone .now ().isoweekday ()
142+ days_ago = timezone .now () - datetime .timedelta (days = 45 )
143+ users = User .objects .annotate (weekday = ExtractIsoWeekDay ("last_login" )).filter (
144+ last_login__gt = days_ago ,
145+ socialaccount__isnull = False ,
146+ weekday = today_weekday ,
143147 )
144148
145- # Log an update every 50 users
146- if i % 50 == 0 :
147- log .info ("Progress on re-syncing RemoteRepository for active users." )
149+ users_count = users . count ()
150+ structlog . contextvars . bind_contextvars ( total_users = users_count )
151+ log .info ("Triggering re-sync of RemoteRepository for active users." )
148152
149- try :
150- # NOTE: sync all the users/repositories in the same Celery process.
151- # Do not trigger a new task per user.
152- sync_remote_repositories (user .pk )
153- except Exception :
154- log .exception ("There was a problem re-syncing RemoteRepository." )
153+ for i , user in enumerate (users .iterator ()):
154+ structlog .contextvars .bind_contextvars (
155+ user_username = user .username ,
156+ progress = f"{ i } /{ users_count } " ,
157+ )
158+
159+ # Log an update every 50 users
160+ if i % 50 == 0 :
161+ log .info ("Progress on re-syncing RemoteRepository for active users." )
162+
163+ try :
164+ # NOTE: sync all the users/repositories in the same Celery process.
165+ # Do not trigger a new task per user.
166+ # NOTE: We skip the GitHub App, since all the repositories
167+ # and permissions are kept up to date via webhooks.
168+ # Triggering a sync per-user, will re-sync the same installation
169+ # multiple times.
170+ sync_remote_repositories (
171+ user .pk ,
172+ skip_githubapp = True ,
173+ )
174+ except Exception :
175+ log .exception ("There was a problem re-syncing RemoteRepository." )
155176
156177
157178# TODO: remove user_pk from the signature on the next release.
0 commit comments