Skip to content

Commit cc69f23

Browse files
stsewdCopilot
andauthored
Git services: improve sync repos (#12428)
Common changes across providers: - A new method to just update a remote repository from the data returned by the API was added, this is in order to make it easy to re-use this method when support for updating a single repo is added (will do this in another PR on top of this one). - We were iterating over repositories twice for GitHub and BB, once by listing all the repositories the user has access to, and then again by iterating over all the repositories from the organizations the user has access to, this was also incorrectly adding a relationship between the user and repositories the use didn't actually have access, as that endpoint will return all public repositories, even if the user doesn't belong to it. - sync_organizations now basically just creates the user-organization relation only. - Since several repositories can belong to the same organization, we don't really need to update the same organization over and over again, just once, so a cache was introduced for it. This is also a reason why the creation of the user-organization relationship was moved outside `create_organization`. Specific changes: - For Bitbucket, we weren't updating a repository if the workspace it belongs was changed, we now update those. This is the same problem we had with Gitlab #12233. - For Bitbucket, we were creating the repo-user relationship and then updating that relationship for the repositories where the user is an admin. This wasn't resetting the admin status to false for repos the user no longer had that permission, we now always default to admin=False, and then we update the repositories where the user is admin in the other call. - In Bitbucket, we don't have organizations, we have projects and workspaces, we are using workspaces as our organizations. But in BB, every repository is linked to a workspace (a user can be a workspace), so we always create an organization for each repository. - Since all other providers are fetching all repositories in the `sync_repositories` method, I changed Gitlab do also do that. In order to do that I reverted to use the previous /projects endpoint (#12233). - GitLab sometimes returns avatars with a relative URL `/~/uploads...` instead of including the domain as well, so we are normalizing all URLs from Gitlab now. --------- Co-authored-by: Copilot <[email protected]>
1 parent 8a089a3 commit cc69f23

File tree

6 files changed

+344
-333
lines changed

6 files changed

+344
-333
lines changed

readthedocs/oauth/services/base.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,10 @@ class UserService(Service):
144144
def __init__(self, user, account):
145145
self.user = user
146146
self.account = account
147+
# Cache organizations to avoid multiple DB hits
148+
# when syncing repositories that belong to the same organization.
149+
# Used by `create_organization` method in subclasses.
150+
self._organizations_cache = {}
147151
structlog.contextvars.bind_contextvars(
148152
user_username=self.user.username,
149153
social_provider=self.allauth_provider.id,

readthedocs/oauth/services/bitbucket.py

Lines changed: 65 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -63,24 +63,26 @@ def sync_repositories(self):
6363
"https://bitbucket.org/api/2.0/repositories/",
6464
role="admin",
6565
)
66-
admin_repo_relations = RemoteRepositoryRelation.objects.filter(
66+
RemoteRepositoryRelation.objects.filter(
6767
user=self.user,
6868
account=self.account,
6969
remote_repository__vcs_provider=self.vcs_provider_slug,
7070
remote_repository__remote_id__in=[r["uuid"] for r in resp],
71-
)
72-
for remote_repository_relation in admin_repo_relations:
73-
remote_repository_relation.admin = True
74-
remote_repository_relation.save()
71+
).update(admin=True)
7572
except (TypeError, ValueError):
76-
pass
73+
log.warning("Error syncing Bitbucket admin repositories")
7774

7875
return remote_ids
7976

8077
def sync_organizations(self):
81-
"""Sync Bitbucket workspaces(our RemoteOrganization) and workspace repositories."""
78+
"""
79+
Sync Bitbucket workspaces (organizations).
80+
81+
This method only creates the relationships between the
82+
organizations and the user, as all the repositories
83+
are already created in the sync_repositories method.
84+
"""
8285
organization_remote_ids = []
83-
repository_remote_ids = []
8486

8587
try:
8688
workspaces = self.paginate(
@@ -89,18 +91,8 @@ def sync_organizations(self):
8991
)
9092
for workspace in workspaces:
9193
remote_organization = self.create_organization(workspace)
92-
repos = self.paginate(workspace["links"]["repositories"]["href"])
93-
94+
remote_organization.get_remote_organization_relation(self.user, self.account)
9495
organization_remote_ids.append(remote_organization.remote_id)
95-
96-
for repo in repos:
97-
remote_repository = self.create_repository(
98-
repo,
99-
organization=remote_organization,
100-
)
101-
if remote_repository:
102-
repository_remote_ids.append(remote_repository.remote_id)
103-
10496
except ValueError:
10597
log.warning("Error syncing Bitbucket organizations")
10698
raise SyncServiceError(
@@ -109,9 +101,9 @@ def sync_organizations(self):
109101
)
110102
)
111103

112-
return organization_remote_ids, repository_remote_ids
104+
return organization_remote_ids, []
113105

114-
def create_repository(self, fields, privacy=None, organization=None):
106+
def create_repository(self, fields, privacy=None):
115107
"""
116108
Update or create a repository from Bitbucket API response.
117109
@@ -136,42 +128,15 @@ def create_repository(self, fields, privacy=None, organization=None):
136128
repo, _ = RemoteRepository.objects.get_or_create(
137129
remote_id=fields["uuid"], vcs_provider=self.vcs_provider_slug
138130
)
139-
repo.get_remote_repository_relation(self.user, self.account)
131+
self._update_repository_from_fields(repo, fields)
140132

141-
if repo.organization and repo.organization != organization:
142-
log.debug(
143-
"Not importing repository because mismatched orgs.",
144-
repository=fields["name"],
145-
)
146-
return None
147-
148-
repo.organization = organization
149-
repo.name = fields["name"]
150-
repo.full_name = fields["full_name"]
151-
repo.description = fields["description"]
152-
repo.private = fields["is_private"]
153-
154-
# Default to HTTPS, use SSH for private repositories
155-
clone_urls = {u["name"]: u["href"] for u in fields["links"]["clone"]}
156-
repo.clone_url = self.https_url_pattern.sub(
157-
"https://bitbucket.org/",
158-
clone_urls.get("https"),
133+
# The repositories API doesn't return the admin status of the user,
134+
# so we default to False, and then update it later using another API call.
135+
remote_repository_relation = repo.get_remote_repository_relation(
136+
self.user, self.account
159137
)
160-
repo.ssh_url = clone_urls.get("ssh")
161-
if repo.private:
162-
repo.clone_url = repo.ssh_url
163-
164-
repo.html_url = fields["links"]["html"]["href"]
165-
repo.vcs = fields["scm"]
166-
mainbranch = fields.get("mainbranch") or {}
167-
repo.default_branch = mainbranch.get("name")
168-
169-
avatar_url = fields["links"]["avatar"]["href"] or ""
170-
repo.avatar_url = re.sub(r"\/16\/$", r"/32/", avatar_url)
171-
if not repo.avatar_url:
172-
repo.avatar_url = self.default_user_avatar_url
173-
174-
repo.save()
138+
remote_repository_relation.admin = False
139+
remote_repository_relation.save()
175140

176141
return repo
177142

@@ -180,17 +145,59 @@ def create_repository(self, fields, privacy=None, organization=None):
180145
repository=fields["name"],
181146
)
182147

148+
def _update_repository_from_fields(self, repo, fields):
149+
# All repositories are created under a workspace,
150+
# which we consider an organization.
151+
organization = self.create_organization(fields["workspace"])
152+
repo.organization = organization
153+
repo.name = fields["name"]
154+
repo.full_name = fields["full_name"]
155+
repo.description = fields["description"]
156+
repo.private = fields["is_private"]
157+
158+
# Default to HTTPS, use SSH for private repositories
159+
clone_urls = {u["name"]: u["href"] for u in fields["links"]["clone"]}
160+
repo.clone_url = self.https_url_pattern.sub(
161+
"https://bitbucket.org/",
162+
clone_urls.get("https"),
163+
)
164+
repo.ssh_url = clone_urls.get("ssh")
165+
if repo.private:
166+
repo.clone_url = repo.ssh_url
167+
168+
repo.html_url = fields["links"]["html"]["href"]
169+
repo.vcs = fields["scm"]
170+
mainbranch = fields.get("mainbranch") or {}
171+
repo.default_branch = mainbranch.get("name")
172+
173+
avatar_url = fields["links"]["avatar"]["href"] or ""
174+
repo.avatar_url = re.sub(r"\/16\/$", r"/32/", avatar_url)
175+
if not repo.avatar_url:
176+
repo.avatar_url = self.default_user_avatar_url
177+
178+
repo.save()
179+
183180
def create_organization(self, fields):
184181
"""
185182
Update or create remote organization from Bitbucket API response.
186183
187184
:param fields: dictionary response of data from API
188185
:rtype: RemoteOrganization
186+
187+
.. note::
188+
189+
This method caches organizations by their remote ID to avoid
190+
unnecessary database queries, specially when creating
191+
multiple repositories that belong to the same organization.
189192
"""
193+
organization_id = fields["uuid"]
194+
if organization_id in self._organizations_cache:
195+
return self._organizations_cache[organization_id]
196+
190197
organization, _ = RemoteOrganization.objects.get_or_create(
191-
remote_id=fields["uuid"], vcs_provider=self.vcs_provider_slug
198+
remote_id=organization_id,
199+
vcs_provider=self.vcs_provider_slug,
192200
)
193-
organization.get_remote_organization_relation(self.user, self.account)
194201

195202
organization.slug = fields.get("slug")
196203
organization.name = fields.get("name")
@@ -201,6 +208,7 @@ def create_organization(self, fields):
201208

202209
organization.save()
203210

211+
self._organizations_cache[organization_id] = organization
204212
return organization
205213

206214
def get_next_url_to_paginate(self, response):

readthedocs/oauth/services/github.py

Lines changed: 60 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -56,30 +56,22 @@ def sync_repositories(self):
5656
return remote_ids
5757

5858
def sync_organizations(self):
59-
"""Sync organizations from GitHub API."""
59+
"""
60+
Sync organizations from GitHub API.
61+
62+
This method only creates the relationships between the
63+
organizations and the user, as all the repositories
64+
are already created in the sync_repositories method.
65+
"""
6066
organization_remote_ids = []
61-
repository_remote_ids = []
6267

6368
try:
6469
orgs = self.paginate(f"{self.base_api_url}/user/orgs", per_page=100)
6570
for org in orgs:
6671
org_details = self.session.get(org["url"]).json()
67-
remote_organization = self.create_organization(
68-
org_details,
69-
create_user_relationship=True,
70-
)
72+
remote_organization = self.create_organization(org_details)
73+
remote_organization.get_remote_organization_relation(self.user, self.account)
7174
organization_remote_ids.append(remote_organization.remote_id)
72-
73-
org_url = org["url"]
74-
org_repos = self.paginate(
75-
f"{org_url}/repos",
76-
per_page=100,
77-
)
78-
for repo in org_repos:
79-
remote_repository = self.create_repository(repo)
80-
if remote_repository:
81-
repository_remote_ids.append(remote_repository.remote_id)
82-
8375
except (TypeError, ValueError):
8476
log.warning("Error syncing GitHub organizations")
8577
raise SyncServiceError(
@@ -88,16 +80,14 @@ def sync_organizations(self):
8880
)
8981
)
9082

91-
return organization_remote_ids, repository_remote_ids
83+
return organization_remote_ids, []
9284

9385
def create_repository(self, fields, privacy=None):
9486
"""
9587
Update or create a repository from GitHub API response.
9688
9789
:param fields: dictionary of response data from API
9890
:param privacy: privacy level to support
99-
:param organization: remote organization to associate with
100-
:type organization: RemoteOrganization
10191
:rtype: RemoteRepository
10292
"""
10393
privacy = privacy or settings.DEFAULT_PRIVACY_LEVEL
@@ -111,47 +101,7 @@ def create_repository(self, fields, privacy=None):
111101
remote_id=str(fields["id"]),
112102
vcs_provider=self.vcs_provider_slug,
113103
)
114-
115-
owner_type = fields["owner"]["type"]
116-
organization = None
117-
if owner_type == "Organization":
118-
# We aren't creating a remote relationship between the current user
119-
# and the organization, since the user can have access to the repository,
120-
# but not to the organization.
121-
organization = self.create_organization(
122-
fields=fields["owner"],
123-
create_user_relationship=False,
124-
)
125-
126-
# If there is an organization associated with this repository,
127-
# attach the organization to the repository.
128-
if organization and owner_type == "Organization":
129-
repo.organization = organization
130-
131-
# If the repository belongs to a user,
132-
# remove the organization linked to the repository.
133-
if owner_type == "User":
134-
repo.organization = None
135-
136-
repo.name = fields["name"]
137-
repo.full_name = fields["full_name"]
138-
repo.description = fields["description"]
139-
repo.ssh_url = fields["ssh_url"]
140-
repo.html_url = fields["html_url"]
141-
repo.private = fields["private"]
142-
repo.vcs = "git"
143-
repo.avatar_url = fields.get("owner", {}).get("avatar_url")
144-
repo.default_branch = fields.get("default_branch")
145-
146-
if repo.private:
147-
repo.clone_url = fields["ssh_url"]
148-
else:
149-
repo.clone_url = fields["clone_url"]
150-
151-
if not repo.avatar_url:
152-
repo.avatar_url = self.default_user_avatar_url
153-
154-
repo.save()
104+
self._update_repository_from_fields(repo, fields)
155105

156106
remote_repository_relation = repo.get_remote_repository_relation(
157107
self.user, self.account
@@ -166,7 +116,43 @@ def create_repository(self, fields, privacy=None):
166116
repository=fields["name"],
167117
)
168118

169-
def create_organization(self, fields, create_user_relationship=False):
119+
def _update_repository_from_fields(self, repo, fields):
120+
owner_type = fields["owner"]["type"]
121+
organization = None
122+
if owner_type == "Organization":
123+
organization = self.create_organization(fields=fields["owner"])
124+
125+
# If there is an organization associated with this repository,
126+
# attach the organization to the repository.
127+
if organization and owner_type == "Organization":
128+
repo.organization = organization
129+
130+
# If the repository belongs to a user,
131+
# remove the organization linked to the repository.
132+
if owner_type == "User":
133+
repo.organization = None
134+
135+
repo.name = fields["name"]
136+
repo.full_name = fields["full_name"]
137+
repo.description = fields["description"]
138+
repo.ssh_url = fields["ssh_url"]
139+
repo.html_url = fields["html_url"]
140+
repo.private = fields["private"]
141+
repo.vcs = "git"
142+
repo.avatar_url = fields.get("owner", {}).get("avatar_url")
143+
repo.default_branch = fields.get("default_branch")
144+
145+
if repo.private:
146+
repo.clone_url = fields["ssh_url"]
147+
else:
148+
repo.clone_url = fields["clone_url"]
149+
150+
if not repo.avatar_url:
151+
repo.avatar_url = self.default_user_avatar_url
152+
153+
repo.save()
154+
155+
def create_organization(self, fields):
170156
"""
171157
Update or create remote organization from GitHub API response.
172158
@@ -175,13 +161,21 @@ def create_organization(self, fields, create_user_relationship=False):
175161
organization and the current user. If `False`, only the `RemoteOrganization` object
176162
will be created/updated.
177163
:rtype: RemoteOrganization
164+
165+
.. note::
166+
167+
This method caches organizations by their remote ID to avoid
168+
unnecessary database queries, specially when creating
169+
multiple repositories that belong to the same organization.
178170
"""
171+
organization_id = str(fields["id"])
172+
if organization_id in self._organizations_cache:
173+
return self._organizations_cache[organization_id]
174+
179175
organization, _ = RemoteOrganization.objects.get_or_create(
180-
remote_id=str(fields["id"]),
176+
remote_id=organization_id,
181177
vcs_provider=self.vcs_provider_slug,
182178
)
183-
if create_user_relationship:
184-
organization.get_remote_organization_relation(self.user, self.account)
185179

186180
organization.url = fields.get("html_url")
187181
# fields['login'] contains GitHub Organization slug
@@ -195,6 +189,7 @@ def create_organization(self, fields, create_user_relationship=False):
195189

196190
organization.save()
197191

192+
self._organizations_cache[organization_id] = organization
198193
return organization
199194

200195
def get_next_url_to_paginate(self, response):

0 commit comments

Comments
 (0)