Skip to content

Commit 54ef16e

Browse files
authored
Addons: improve performance when versions have downloads (#12360)
- This call for getting the project domain wasn't cached, so it was creating a new resolver for each download type of each version, resulting in several queries being done. This was extremely slow on .com, where the resolver needs to check for the organization subscription. Now the resolver is shared across all calls. - The resolver cache was updated to not have a limit, so we can cache more results per instance. - When getting the version from a project, we were calling Versions.objects instead of projects.versions, when using the later, the project object will be shared across all versions, when using the former, a new instance will be created for each version, which is slow and doesn't preserve any cached attributes. We should remove the `project` option from the version querysets, so we are forced to always use project.versions (will do that in another PR). All these improvements resulted in going down from 71 queries for a project with 10 versions with downloads to just 13, and this number is constant, if more versions are added, we will always have 13 queries.
1 parent bc63837 commit 54ef16e

File tree

8 files changed

+184
-134
lines changed

8 files changed

+184
-134
lines changed

readthedocs/api/v3/serializers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,7 @@ def __init__(self, *args, resolver=None, **kwargs):
362362
super().__init__(*args, **kwargs)
363363

364364
def get_downloads(self, obj):
365-
downloads = obj.get_downloads()
365+
downloads = obj.get_downloads(resolver=self.resolver)
366366
data = {}
367367

368368
for k, v in downloads.items():

readthedocs/builds/models.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,7 @@ def get_subdomain_url(self):
489489
external=external,
490490
)
491491

492-
def get_downloads(self, pretty=False):
492+
def get_downloads(self, pretty=False, resolver=None):
493493
project = self.project
494494
data = {}
495495

@@ -500,17 +500,20 @@ def prettify(k):
500500
data[prettify("PDF")] = project.get_production_media_url(
501501
"pdf",
502502
self.slug,
503+
resolver=resolver,
503504
)
504505

505506
if self.has_htmlzip:
506507
data[prettify("HTML")] = project.get_production_media_url(
507508
"htmlzip",
508509
self.slug,
510+
resolver=resolver,
509511
)
510512
if self.has_epub:
511513
data[prettify("Epub")] = project.get_production_media_url(
512514
"epub",
513515
self.slug,
516+
resolver=resolver,
514517
)
515518
return data
516519

readthedocs/core/resolver.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""URL resolver for documentation."""
22

3-
from functools import lru_cache
3+
from functools import cache
44
from urllib.parse import urlunparse
55

66
import structlog
@@ -56,6 +56,16 @@ class Resolver:
5656
/docs/<project_slug>/projects/<subproject_slug>/<lang>/<version>/<filename>
5757
# Subproject Single Version
5858
/docs/<project_slug>/projects/<subproject_slug>/<filename>
59+
60+
.. note::
61+
62+
Several methods ara cached per each instance of the resolver
63+
to avoid hitting the database multiple times for the same project.
64+
65+
A global instance of the resolver shouldn't be used,
66+
as resources can change, and results from the resolver will be out of date.
67+
Instead, a shared instance of the resolver should be used
68+
when doing multiple resolutions for the same set of projects/versions.
5969
"""
6070

6171
def base_resolve_path(
@@ -169,7 +179,7 @@ def resolve_project(self, project, filename="/"):
169179
protocol = "https" if use_https else "http"
170180
return urlunparse((protocol, domain, filename, "", "", ""))
171181

172-
@lru_cache(maxsize=1)
182+
@cache
173183
def _get_project_domain(self, project, external_version_slug=None, use_canonical_domain=True):
174184
"""
175185
Get the domain from where the documentation of ``project`` is served from.
@@ -178,7 +188,7 @@ def _get_project_domain(self, project, external_version_slug=None, use_canonical
178188
:param bool use_canonical_domain: If `True` use its canonical custom domain if available.
179189
:returns: Tuple of ``(domain, use_https)``.
180190
181-
Note that we are using ``lru_cache`` decorator on this function.
191+
Note that we are using ``cache`` decorator on this function.
182192
This is useful when generating the flyout addons response since we call
183193
``resolver.resolve`` multi times for the same ``Project``.
184194
This cache avoids hitting the DB to get the canonical custom domain over and over again.
@@ -267,7 +277,7 @@ def get_subproject_url_prefix(self, project, external_version_slug=None):
267277
path = project.subproject_prefix
268278
return urlunparse((protocol, domain, path, "", "", ""))
269279

270-
@lru_cache(maxsize=1)
280+
@cache
271281
def _get_canonical_project(self, project):
272282
"""
273283
Get the parent project and subproject relationship from the canonical project of `project`.
@@ -339,7 +349,7 @@ def _get_project_subdomain(self, project):
339349
subdomain_slug = project.slug.replace("_", "-")
340350
return "{}.{}".format(subdomain_slug, settings.PUBLIC_DOMAIN)
341351

342-
@lru_cache(maxsize=1)
352+
@cache
343353
def _is_external(self, project, version_slug):
344354
type_ = project.versions.values_list("type", flat=True).filter(slug=version_slug).first()
345355
return type_ == EXTERNAL

readthedocs/projects/models.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -763,11 +763,11 @@ def get_storage_path(self, type_, version_slug=LATEST, include_file=True, versio
763763
)
764764
return folder_path
765765

766-
def get_production_media_url(self, type_, version_slug):
766+
def get_production_media_url(self, type_, version_slug, resolver=None):
767767
"""Get the URL for downloading a specific media file."""
768768
# Use project domain for full path --same domain as docs
769769
# (project-slug.{PUBLIC_DOMAIN} or docs.project.com)
770-
domain = self.subdomain()
770+
domain = self.subdomain(resolver=resolver)
771771

772772
# NOTE: we can't use ``reverse('project_download_media')`` here
773773
# because this URL only exists in El Proxito and this method is
@@ -885,20 +885,20 @@ def supports_translations(self):
885885
"""Return whether or not this project supports translations."""
886886
return self.versioning_scheme == MULTIPLE_VERSIONS_WITH_TRANSLATIONS
887887

888-
def subdomain(self, use_canonical_domain=True):
888+
def subdomain(self, use_canonical_domain=True, resolver=None):
889889
"""Get project subdomain from resolver."""
890-
return Resolver().get_domain_without_protocol(
891-
self, use_canonical_domain=use_canonical_domain
892-
)
890+
resolver = resolver or Resolver()
891+
return resolver.get_domain_without_protocol(self, use_canonical_domain=use_canonical_domain)
893892

894-
def get_downloads(self):
893+
def get_downloads(self, resolver=None):
895894
downloads = {}
896895
default_version = self.get_default_version()
897896

898897
for type_ in ("htmlzip", "epub", "pdf"):
899898
downloads[type_] = self.get_production_media_url(
900899
type_,
901900
default_version,
901+
resolver=resolver,
902902
)
903903

904904
return downloads

readthedocs/proxito/tests/test_hosting.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,36 @@ def test_flyout_downloads(self):
384384
}
385385
assert r.json()["versions"]["current"]["downloads"] == expected
386386

387+
def test_number_of_queries_versions_with_downloads(self):
388+
for i in range(10):
389+
fixture.get(
390+
Version,
391+
project=self.project,
392+
privacy_level=PUBLIC,
393+
slug=f"offline-{i}",
394+
verbose_name=f"offline-{i}",
395+
built=True,
396+
has_pdf=True,
397+
has_epub=True,
398+
has_htmlzip=True,
399+
active=True,
400+
)
401+
402+
with self.assertNumQueries(13):
403+
r = self.client.get(
404+
reverse("proxito_readthedocs_docs_addons"),
405+
{
406+
"url": "https://project.dev.readthedocs.io/en/offline/",
407+
"client-version": "0.6.0",
408+
"api-version": "1.0.0",
409+
},
410+
secure=True,
411+
headers={
412+
"host": "project.dev.readthedocs.io",
413+
},
414+
)
415+
assert r.status_code == 200
416+
387417
def test_flyout_single_version_project(self):
388418
self.version.has_pdf = True
389419
self.version.has_epub = True
@@ -1042,7 +1072,7 @@ def test_file_tree_diff(self, get_manifest):
10421072
],
10431073
),
10441074
]
1045-
with self.assertNumQueries(22):
1075+
with self.assertNumQueries(19):
10461076
r = self.client.get(
10471077
reverse("proxito_readthedocs_docs_addons"),
10481078
{

readthedocs/proxito/views/hosting.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from readthedocs.api.v3.serializers import RelatedProjectSerializer
2323
from readthedocs.api.v3.serializers import VersionSerializer
2424
from readthedocs.builds.constants import BUILD_STATE_FINISHED
25+
from readthedocs.builds.constants import INTERNAL
2526
from readthedocs.builds.constants import LATEST
2627
from readthedocs.builds.models import Build
2728
from readthedocs.builds.models import Version
@@ -307,7 +308,9 @@ def _get_versions(self, request, project):
307308
- They are active
308309
- They are not hidden
309310
"""
310-
return Version.internal.public(
311+
# NOTE: Use project.versions, not Version.objects,
312+
# so all results share the same instance of project.
313+
return project.versions(manager=INTERNAL).public(
311314
project=project,
312315
user=request.user,
313316
only_active=True,
@@ -340,9 +343,7 @@ def _v1(self, project, version, build, filename, url, request):
340343
versions_active_built_not_hidden = Version.objects.none()
341344
sorted_versions_active_built_not_hidden = Version.objects.none()
342345

343-
versions_active_built_not_hidden = (
344-
self._get_versions(request, project).select_related("project").order_by("-slug")
345-
)
346+
versions_active_built_not_hidden = self._get_versions(request, project).order_by("-slug")
346347
sorted_versions_active_built_not_hidden = versions_active_built_not_hidden
347348
if not project.supports_multiple_versions:
348349
# Return only one version when the project doesn't support multiple versions.

0 commit comments

Comments
 (0)