Skip to content

Commit a8a29fe

Browse files
authored
Git services: allow updating a single repository (#12429)
When syncing permissions for SSO, we don't need to sync all repositories the user has access to, we just need to update the permissions of the repositories that are connected to projects that belong to SSO organizations. So this adds a way to just update one single repository at a time for all providers. We don't need to do this for GH app projects, since they are kept up to date via a webhook. Since this task should be faster now, we could run it more frequently, so permissions are reflected faster on RTD.
1 parent 077abcf commit a8a29fe

File tree

9 files changed

+527
-85
lines changed

9 files changed

+527
-85
lines changed

readthedocs/oauth/services/base.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
from readthedocs.core.permissions import AdminPermission
1717
from readthedocs.oauth.clients import get_oauth2_client
18+
from readthedocs.oauth.models import RemoteRepository
1819

1920

2021
log = structlog.get_logger(__name__)
@@ -68,6 +69,19 @@ def sync(self):
6869
"""
6970
raise NotImplementedError
7071

72+
def update_repository(self, remote_repository: RemoteRepository):
73+
"""
74+
Update a repository using the service API.
75+
76+
This also updates the user relationship with the repository,
77+
if user is an admin or not, and in case the user no longer has access
78+
to the repository, the relationship is removed.
79+
In the case of services that aren't linked to a user (GitHub Apps),
80+
this method will update the permissions of all users that have access
81+
to the repository.
82+
"""
83+
raise NotImplementedError
84+
7185
def setup_webhook(self, project, integration=None) -> bool:
7286
"""
7387
Setup webhook for project.

readthedocs/oauth/services/bitbucket.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,43 @@ def create_repository(self, fields, privacy=None):
145145
repository=fields["name"],
146146
)
147147

148+
def update_repository(self, remote_repository: RemoteRepository):
149+
# Bitbucket doesn't return the admin status of the user,
150+
# so we need to infer it by filtering the repositories the user has admin/read access to.
151+
repo_from_admin_access = self._get_repository(remote_repository, role="admin")
152+
repo_from_member_access = self._get_repository(remote_repository, role="member")
153+
repo = repo_from_admin_access or repo_from_member_access
154+
relation = remote_repository.get_remote_repository_relation(self.user, self.account)
155+
if not repo:
156+
log.info(
157+
"User no longer has access to the repository, removing remote relationship.",
158+
remote_repository_id=remote_repository.remote_id,
159+
)
160+
relation.delete()
161+
return
162+
163+
self._update_repository_from_fields(remote_repository, repo)
164+
relation.admin = bool(repo_from_admin_access)
165+
relation.save()
166+
167+
def _get_repository(self, remote_repository, role):
168+
"""
169+
Get a single repository by its remote ID where the user has a specific role.
170+
171+
Bitbucket doesn't provide an endpoint to get a single repository by its ID (it requires the group ID as well),
172+
and it also doesn't return the user's role in the repository, so we filter the repositories by role
173+
and then look for the repository with the matching ID.
174+
"""
175+
repos = self.paginate(
176+
f"{self.base_api_url}/2.0/repositories/",
177+
role=role,
178+
q=f'uuid="{remote_repository.remote_id}"',
179+
)
180+
for repo in repos:
181+
if repo["uuid"] == remote_repository.remote_id:
182+
return repo
183+
return None
184+
148185
def _update_repository_from_fields(self, repo, fields):
149186
# All repositories are created under a workspace,
150187
# which we consider an organization.

readthedocs/oauth/services/github.py

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,59 @@ def sync_organizations(self):
8282

8383
return organization_remote_ids, []
8484

85+
def _has_access_to_repository(self, fields):
86+
"""Check if the user has access to the repository, and if they are an admin."""
87+
permissions = fields.get("permissions", {})
88+
# If the repo is public, the user can still access it,
89+
# so we need to check if the user has any access
90+
# to the repository, even if they are not an admin.
91+
has_access = any(
92+
permissions.get(key, False) for key in ["admin", "maintain", "push", "triage"]
93+
)
94+
is_admin = permissions.get("admin", False)
95+
return has_access, is_admin
96+
97+
def update_repository(self, remote_repository: RemoteRepository):
98+
resp = self.session.get(f"{self.base_api_url}/repositories/{remote_repository.remote_id}")
99+
100+
# The repo was deleted, or the user does not have access to it.
101+
# In any case, we remove the user relationship.
102+
if resp.status_code in [403, 404]:
103+
log.info(
104+
"User no longer has access to the repository, removing remote relationship.",
105+
remote_repository=remote_repository.remote_id,
106+
)
107+
remote_repository.get_remote_repository_relation(self.user, self.account).delete()
108+
return
109+
110+
if resp.status_code != 200:
111+
log.warning(
112+
"Error fetching repository from GitHub",
113+
remote_repository=remote_repository.remote_id,
114+
status_code=resp.status_code,
115+
)
116+
return
117+
118+
data = resp.json()
119+
self._update_repository_from_fields(remote_repository, data)
120+
121+
has_access, is_admin = self._has_access_to_repository(data)
122+
relation = remote_repository.get_remote_repository_relation(
123+
self.user,
124+
self.account,
125+
)
126+
if not has_access:
127+
# If the user no longer has access to the repository,
128+
# we remove the remote relationship.
129+
log.info(
130+
"User no longer has access to the repository, removing remote relationship.",
131+
remote_repository=remote_repository.remote_id,
132+
)
133+
relation.delete()
134+
else:
135+
relation.admin = is_admin
136+
relation.save()
137+
85138
def create_repository(self, fields, privacy=None):
86139
"""
87140
Update or create a repository from GitHub API response.
@@ -106,7 +159,8 @@ def create_repository(self, fields, privacy=None):
106159
remote_repository_relation = repo.get_remote_repository_relation(
107160
self.user, self.account
108161
)
109-
remote_repository_relation.admin = fields.get("permissions", {}).get("admin", False)
162+
_, is_admin = self._has_access_to_repository(fields)
163+
remote_repository_relation.admin = is_admin
110164
remote_repository_relation.save()
111165

112166
return repo

readthedocs/oauth/services/githubapp.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,18 @@ def sync(self):
240240
).values_list("remote_id", flat=True)
241241
self.installation.delete_repositories(repos_to_delete)
242242

243+
def update_repository(self, remote_repository: RemoteRepository):
244+
"""
245+
Update a single repository from the given remote repository.
246+
247+
.. note::
248+
249+
Unlike the other providers, this method doesn't update the
250+
`remote_repository` object itself. If you need the updated object,
251+
fetch it again from the database.
252+
"""
253+
self.update_or_create_repositories([remote_repository.remote_id])
254+
243255
def update_or_create_repositories(self, repository_ids: list[int]):
244256
"""Update or create repositories from the given list of repository IDs."""
245257
repositories_to_delete = []

readthedocs/oauth/services/gitlab.py

Lines changed: 58 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,62 @@ def sync_organizations(self):
139139

140140
return organization_remote_ids, []
141141

142+
def _has_access_to_repository(self, fields):
143+
"""Check if the user has access to the repository, and if they are an admin."""
144+
permissions = fields.get("permissions", {})
145+
project_access = permissions.get("project_access") or {}
146+
project_access_level = project_access.get("access_level", self.PERMISSION_NO_ACCESS)
147+
group_access = permissions.get("group_access") or {}
148+
group_access_level = group_access.get("access_level", self.PERMISSION_NO_ACCESS)
149+
has_access = (
150+
group_access_level != self.PERMISSION_NO_ACCESS
151+
or project_access_level != self.PERMISSION_NO_ACCESS
152+
)
153+
project_admin = project_access_level in (self.PERMISSION_MAINTAINER, self.PERMISSION_OWNER)
154+
group_admin = group_access_level in (self.PERMISSION_MAINTAINER, self.PERMISSION_OWNER)
155+
return has_access, project_admin or group_admin
156+
157+
def update_repository(self, remote_repository: RemoteRepository):
158+
resp = self.session.get(
159+
f"{self.base_api_url}/api/v4/projects/{remote_repository.remote_id}"
160+
)
161+
162+
if resp.status_code in [403, 404]:
163+
log.info(
164+
"User no longer has access to the repository, removing remote relationship.",
165+
remote_repository_id=remote_repository.remote_id,
166+
)
167+
remote_repository.get_remote_repository_relation(self.user, self.account).delete()
168+
return
169+
170+
if resp.status_code != 200:
171+
log.warning(
172+
"Error fetching repository from GitLab",
173+
remote_repository_id=remote_repository.remote_id,
174+
status_code=resp.status_code,
175+
)
176+
return
177+
178+
data = resp.json()
179+
self._update_repository_from_fields(remote_repository, data)
180+
181+
has_access, is_admin = self._has_access_to_repository(data)
182+
relation = remote_repository.get_remote_repository_relation(
183+
self.user,
184+
self.account,
185+
)
186+
if not has_access:
187+
# If the user no longer has access to the repository,
188+
# we remove the remote relationship.
189+
log.info(
190+
"User no longer has access to the repository, removing remote relationship.",
191+
remote_repository=remote_repository.remote_id,
192+
)
193+
relation.delete()
194+
else:
195+
relation.admin = is_admin
196+
relation.save()
197+
142198
def create_repository(self, fields, privacy=None):
143199
"""
144200
Update or create a repository from GitLab API response.
@@ -168,23 +224,8 @@ def create_repository(self, fields, privacy=None):
168224
remote_repository_relation = repo.get_remote_repository_relation(
169225
self.user, self.account
170226
)
171-
172-
project_access_level = group_access_level = self.PERMISSION_NO_ACCESS
173-
174-
project_access = fields.get("permissions", {}).get("project_access", {})
175-
if project_access:
176-
project_access_level = project_access.get("access_level", self.PERMISSION_NO_ACCESS)
177-
178-
group_access = fields.get("permissions", {}).get("group_access", {})
179-
if group_access:
180-
group_access_level = group_access.get("access_level", self.PERMISSION_NO_ACCESS)
181-
182-
remote_repository_relation.admin = any(
183-
[
184-
project_access_level in (self.PERMISSION_MAINTAINER, self.PERMISSION_OWNER),
185-
group_access_level in (self.PERMISSION_MAINTAINER, self.PERMISSION_OWNER),
186-
]
187-
)
227+
_, is_admin = self._has_access_to_repository(fields)
228+
remote_repository_relation.admin = is_admin
188229
remote_repository_relation.save()
189230

190231
return repo

readthedocs/oauth/tasks.py

Lines changed: 31 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from django.utils import timezone
1111

1212
from readthedocs.api.v2.views.integrations import ExternalVersionData
13-
from readthedocs.core.permissions import AdminPermission
1413
from readthedocs.core.utils.tasks import PublicTask
1514
from readthedocs.core.utils.tasks import user_id_matches_or_superuser
1615
from readthedocs.core.views.hooks import VersionInfo
@@ -21,13 +20,14 @@
2120
from readthedocs.core.views.hooks import trigger_sync_versions
2221
from readthedocs.notifications.models import Notification
2322
from readthedocs.oauth.clients import get_gh_app_client
23+
from readthedocs.oauth.constants import GITHUB_APP
2424
from readthedocs.oauth.models import GitHubAppInstallation
25+
from readthedocs.oauth.models import RemoteRepository
2526
from readthedocs.oauth.notifications import MESSAGE_OAUTH_WEBHOOK_INVALID
2627
from readthedocs.oauth.notifications import MESSAGE_OAUTH_WEBHOOK_NO_ACCOUNT
2728
from readthedocs.oauth.notifications import MESSAGE_OAUTH_WEBHOOK_NO_PERMISSIONS
2829
from readthedocs.oauth.services.base import SyncServiceError
2930
from readthedocs.oauth.utils import SERVICE_MAP
30-
from readthedocs.organizations.models import Organization
3131
from readthedocs.projects.models import Project
3232
from readthedocs.sso.models import SSOIntegration
3333
from readthedocs.vcs_support.backends.git import parse_version_from_ref
@@ -70,49 +70,40 @@ def sync_remote_repositories(user_id):
7070

7171

7272
@app.task(queue="web")
73-
def sync_remote_repositories_organizations(organization_slugs=None):
73+
def sync_remote_repositories_from_sso_organizations():
7474
"""
75-
Re-sync users member of organizations.
75+
Re-sync all remote repositories from organizations with SSO enabled.
7676
77-
It will trigger one `sync_remote_repositories` task per user.
77+
This is useful, so all the remote repositories are up to date with the
78+
latest permissions from their providers.
7879
79-
:param organization_slugs: list containg organization's slugs to sync. If
80-
not passed, all organizations with ALLAUTH SSO enabled will be synced
81-
82-
:type organization_slugs: list
80+
We ignore repositories from GitHub App installations, since they are kept
81+
up to date via webhooks. For all the other services, we need to sync the
82+
repository for each user that has access to it, since we need to check for
83+
their permissions individually.
8384
"""
84-
if organization_slugs:
85-
query = Organization.objects.filter(slug__in=organization_slugs)
86-
log.info(
87-
"Triggering SSO re-sync for organizations.",
88-
organization_slugs=organization_slugs,
89-
count=query.count(),
90-
)
91-
else:
92-
organization_ids = SSOIntegration.objects.filter(
93-
provider=SSOIntegration.PROVIDER_ALLAUTH
94-
).values_list("organization", flat=True)
95-
query = Organization.objects.filter(id__in=organization_ids)
96-
log.info(
97-
"Triggering SSO re-sync for all organizations.",
98-
count=query.count(),
99-
)
100-
101-
n_task = -1
102-
for organization in query:
103-
members = AdminPermission.members(organization)
104-
log.info(
105-
"Triggering SSO re-sync for organization.",
106-
organization_slug=organization.slug,
107-
count=members.count(),
85+
repositories = (
86+
RemoteRepository.objects.filter(
87+
projects__organizations__ssointegration__provider=SSOIntegration.PROVIDER_ALLAUTH,
10888
)
109-
for user in members:
110-
n_task += 1
111-
sync_remote_repositories.apply_async(
112-
args=[user.pk],
113-
# delay the task by 0, 5, 10, 15, ... seconds
114-
countdown=n_task * 5,
115-
)
89+
.exclude(vcs_provider=GITHUB_APP)
90+
.distinct()
91+
)
92+
for repository in repositories.iterator():
93+
service_class = repository.get_service_class()
94+
relations = repository.remote_repository_relations.select_related("user", "account")
95+
for relation in relations.iterator():
96+
service = service_class(user=relation.user, account=relation.account)
97+
try:
98+
service.update_repository(repository)
99+
except Exception:
100+
log.info(
101+
"There was a problem updating repository for user.",
102+
user_username=relation.user.username,
103+
account_uid=relation.account.uid,
104+
repository_remote_id=repository.remote_id,
105+
repository_name=repository.full_name,
106+
)
116107

117108

118109
@app.task(

0 commit comments

Comments
 (0)