diff --git a/docs/dev/settings.rst b/docs/dev/settings.rst index 6b82c61af6a..38a20c20ed1 100644 --- a/docs/dev/settings.rst +++ b/docs/dev/settings.rst @@ -1,23 +1,19 @@ Interesting settings ==================== -DOCKER_LIMITS -------------- +BUILD_MEMORY_LIMIT +------------------ -A dictionary of limits to virtual machines. These limits include: +The maximum memory allocated to the virtual machine. +If this limit is hit, build processes will be automatically killed. +Examples: '200m' for 200MB of total memory, or '2g' for 2GB of total memory. -time - An integer representing the total allowed time limit (in - seconds) of build processes. This time limit affects the parent - process to the virtual machine and will force a virtual machine - to die if a build is still running after the allotted time - expires. +BUILD_TIME_LIMIT +---------------- -memory - The maximum memory allocated to the virtual machine. If this - limit is hit, build processes will be automatically killed. - Examples: '200m' for 200MB of total memory, or '2g' for 2GB of - total memory. +An integer representing the total allowed time limit (in seconds) of build processes. +This time limit affects the parent process to the virtual machine and will force a virtual machine +to die if a build is still running after the allotted time expires. PRODUCTION_DOMAIN ------------------ diff --git a/readthedocs/api/v2/models.py b/readthedocs/api/v2/models.py index 0d0a751eed7..82baf8ab269 100644 --- a/readthedocs/api/v2/models.py +++ b/readthedocs/api/v2/models.py @@ -1,5 +1,6 @@ from datetime import timedelta +from django.conf import settings from django.db import models from django.utils import timezone from django.utils.translation import gettext_lazy as _ @@ -18,7 +19,9 @@ def create_key(self, project): Build API keys are valid for 3 hours, and can be revoked at any time by hitting the /api/v2/revoke/ endpoint. """ - expiry_date = timezone.now() + timedelta(hours=3) + # Use the project or default build time limit + 25% for the API token + delta = (project.container_time_limit or settings.BUILD_TIME_LIMIT) * 1.25 + expiry_date = timezone.now() + timedelta(seconds=delta) name_max_length = self.model._meta.get_field("name").max_length return super().create_key( # Name is required, so we use the project slug for it. diff --git a/readthedocs/core/utils/__init__.py b/readthedocs/core/utils/__init__.py index 59d48efc4f0..fc03bf7041c 100644 --- a/readthedocs/core/utils/__init__.py +++ b/readthedocs/core/utils/__init__.py @@ -86,18 +86,7 @@ def prepare_build( options["queue"] = project.build_queue # Set per-task time limit - # TODO remove the use of Docker limits or replace the logic here. This - # was pulling the Docker limits that were set on each stack, but we moved - # to dynamic setting of the Docker limits. This sets a failsafe higher - # limit, but if no builds hit this limit, it should be safe to remove and - # rely on Docker to terminate things on time. - # time_limit = DOCKER_LIMITS['time'] - time_limit = 7200 - try: - if project.container_time_limit: - time_limit = int(project.container_time_limit) - except ValueError: - log.warning("Invalid time_limit for project.") + time_limit = project.container_time_limit or settings.BUILD_TIME_LIMIT # Add 20% overhead to task, to ensure the build can timeout and the task # will cleanly finish. diff --git a/readthedocs/doc_builder/constants.py b/readthedocs/doc_builder/constants.py index 843e0d00a2e..73dad8c1bd6 100644 --- a/readthedocs/doc_builder/constants.py +++ b/readthedocs/doc_builder/constants.py @@ -14,7 +14,6 @@ DOCKER_SOCKET = settings.DOCKER_SOCKET DOCKER_VERSION = settings.DOCKER_VERSION DOCKER_IMAGE = settings.DOCKER_IMAGE -DOCKER_LIMITS = settings.DOCKER_LIMITS DOCKER_TIMEOUT_EXIT_CODE = 42 DOCKER_OOM_EXIT_CODE = 137 diff --git a/readthedocs/doc_builder/environments.py b/readthedocs/doc_builder/environments.py index a933937278b..7c90c528fe0 100644 --- a/readthedocs/doc_builder/environments.py +++ b/readthedocs/doc_builder/environments.py @@ -25,7 +25,6 @@ from .constants import DOCKER_HOSTNAME_MAX_LEN from .constants import DOCKER_IMAGE -from .constants import DOCKER_LIMITS from .constants import DOCKER_OOM_EXIT_CODE from .constants import DOCKER_SOCKET from .constants import DOCKER_TIMEOUT_EXIT_CODE @@ -581,8 +580,6 @@ class DockerBuildEnvironment(BaseBuildEnvironment): command_class = DockerBuildCommand container_image = DOCKER_IMAGE - container_mem_limit = DOCKER_LIMITS.get("memory") - container_time_limit = DOCKER_LIMITS.get("time") def __init__(self, *args, **kwargs): container_image = kwargs.pop("container_image", None) @@ -609,10 +606,8 @@ def __init__(self, *args, **kwargs): if container_image: self.container_image = container_image - if self.project.container_mem_limit: - self.container_mem_limit = self.project.container_mem_limit - if self.project.container_time_limit: - self.container_time_limit = self.project.container_time_limit + self.container_mem_limit = self.project.container_mem_limit or settings.BUILD_MEMORY_LIMIT + self.container_time_limit = self.project.container_time_limit or settings.BUILD_TIME_LIMIT structlog.contextvars.bind_contextvars( project_slug=self.project.slug, diff --git a/readthedocs/projects/tasks/utils.py b/readthedocs/projects/tasks/utils.py index ed1dc60cc5e..69effcaa768 100644 --- a/readthedocs/projects/tasks/utils.py +++ b/readthedocs/projects/tasks/utils.py @@ -162,17 +162,13 @@ def finish_inactive_builds(): A build is consider inactive if it's not in a final state and it has been "running" for more time that the allowed one (``Project.container_time_limit`` - or ``DOCKER_LIMITS['time']`` plus a 20% of it). + or ``BUILD_TIME_LIMIT`` plus a 20% of it). These inactive builds will be marked as ``success`` and ``CANCELLED`` with an ``error`` to be communicated to the user. """ - # TODO similar to the celery task time limit, we can't infer this from - # Docker settings anymore, because Docker settings are determined on the - # build servers dynamically. - # time_limit = int(DOCKER_LIMITS['time'] * 1.2) - # Set time as maximum celery task time limit + 5m - time_limit = 7200 + 300 + # TODO: delete this task once we are fully migrated to ``BUILD_HEALTHCHECK`` + time_limit = settings.BUILD_TIME_LIMIT * 1.2 delta = datetime.timedelta(seconds=time_limit) query = ( ~Q(state__in=BUILD_FINAL_STATES) diff --git a/readthedocs/rtd_tests/tests/test_core_utils.py b/readthedocs/rtd_tests/tests/test_core_utils.py index 42f61315088..b8d80335f80 100644 --- a/readthedocs/rtd_tests/tests/test_core_utils.py +++ b/readthedocs/rtd_tests/tests/test_core_utils.py @@ -2,6 +2,7 @@ from unittest import mock import pytest +from django.conf import settings from django.test import TestCase, override_settings from django_dynamic_fixture import get @@ -189,8 +190,8 @@ def test_trigger_max_concurrency_reached(self, update_docs, app): trigger_build(project=self.project, version=self.version) kwargs = {"build_commit": None, "build_api_key": mock.ANY} options = { - "time_limit": int(7200 * 1.2), - "soft_time_limit": 7200, + "time_limit": settings.BUILD_TIME_LIMIT * 1.2, + "soft_time_limit": settings.BUILD_TIME_LIMIT, "countdown": 5 * 60, "max_retries": 25, } diff --git a/readthedocs/settings/base.py b/readthedocs/settings/base.py index 3891e8eea57..c3bd5a57cc4 100644 --- a/readthedocs/settings/base.py +++ b/readthedocs/settings/base.py @@ -589,6 +589,33 @@ def TEMPLATES(self): USE_I18N = True USE_L10N = True + BUILD_TIME_LIMIT = 900 # seconds + + @property + def BUILD_MEMORY_LIMIT(self): + """ + Set build memory limit dynamically, if in production, based on system memory. + + We do this to avoid having separate build images. This assumes 1 build + process per server, which will be allowed to consume all available + memory. + """ + # Our normal default + default_memory_limit = "7g" + + # Only run on our servers + if self.RTD_IS_PRODUCTION: + total_memory, memory_limit = self._get_build_memory_limit() + + memory_limit = memory_limit or default_memory_limit + log.info( + "Using dynamic build limits.", + hostname=socket.gethostname(), + memory=memory_limit, + ) + return memory_limit + + # Celery CELERY_APP_NAME = "readthedocs" CELERY_ALWAYS_EAGER = True @@ -605,7 +632,7 @@ def TEMPLATES(self): # https://github.com/readthedocs/readthedocs.org/issues/12317#issuecomment-3070950434 # https://docs.celeryq.dev/en/stable/getting-started/backends-and-brokers/redis.html#visibility-timeout BROKER_TRANSPORT_OPTIONS = { - 'visibility_timeout': 18000, # 5 hours + 'visibility_timeout': BUILD_TIME_LIMIT * 1.15, # 15% more than the build time limit } CELERY_DEFAULT_QUEUE = "celery" @@ -721,7 +748,13 @@ def TEMPLATES(self): # since we can't read their config file image choice before cloning RTD_DOCKER_CLONE_IMAGE = RTD_DOCKER_BUILD_SETTINGS["os"]["ubuntu-22.04"] - def _get_docker_memory_limit(self): + def _get_build_memory_limit(self): + """ + Return the buld memory limit based on available system memory. + + We subtract ~1000Mb for overhead of processes and base system, and set + the build time as proportional to the memory limit. + """ try: total_memory = int( subprocess.check_output( @@ -735,47 +768,6 @@ def _get_docker_memory_limit(self): # int and raise a ValueError log.exception("Failed to get memory size, using defaults Docker limits.") - # Coefficient used to determine build time limit, as a percentage of total - # memory. Historical values here were 0.225 to 0.3. - DOCKER_TIME_LIMIT_COEFF = 0.25 - - @property - def DOCKER_LIMITS(self): - """ - Set docker limits dynamically, if in production, based on system memory. - - We do this to avoid having separate build images. This assumes 1 build - process per server, which will be allowed to consume all available - memory. - - We subtract 750MiB for overhead of processes and base system, and set - the build time as proportional to the memory limit. - """ - # Our normal default - limits = { - "memory": "2g", - "time": 900, - } - - # Only run on our servers - if self.RTD_IS_PRODUCTION: - total_memory, memory_limit = self._get_docker_memory_limit() - if memory_limit: - limits = { - "memory": f"{memory_limit}m", - "time": max( - limits["time"], - round(total_memory * self.DOCKER_TIME_LIMIT_COEFF, -2), - ), - } - log.info( - "Using dynamic docker limits.", - hostname=socket.gethostname(), - memory=limits["memory"], - time=limits["time"], - ) - return limits - # Allauth ACCOUNT_ADAPTER = "readthedocs.core.adapters.AccountAdapter" SOCIALACCOUNT_ADAPTER = 'readthedocs.core.adapters.SocialAccountAdapter' diff --git a/readthedocs/settings/docker_compose.py b/readthedocs/settings/docker_compose.py index ec5a0d909db..8b580a60e75 100644 --- a/readthedocs/settings/docker_compose.py +++ b/readthedocs/settings/docker_compose.py @@ -15,7 +15,7 @@ class DockerBaseSettings(CommunityBaseSettings): RTD_DOCKER_COMPOSE_NETWORK = "community_readthedocs" RTD_DOCKER_COMPOSE_VOLUME = "community_build-user-builds" RTD_DOCKER_USER = f"{os.geteuid()}:{os.getegid()}" - DOCKER_LIMITS = {"memory": "2g", "time": 900} + BUILD_MEMORY_LIMIT = "2g" PRODUCTION_DOMAIN = os.environ.get("RTD_PRODUCTION_DOMAIN", "devthedocs.org") PUBLIC_DOMAIN = os.environ.get("RTD_PUBLIC_DOMAIN", "devthedocs.org") diff --git a/readthedocs/settings/test.py b/readthedocs/settings/test.py index c32d2befab9..cd804524336 100644 --- a/readthedocs/settings/test.py +++ b/readthedocs/settings/test.py @@ -26,7 +26,8 @@ class CommunityTestSettings(CommunityBaseSettings): CELERY_ALWAYS_EAGER = True # Skip automatic detection of Docker limits for testing - DOCKER_LIMITS = {"memory": "200m", "time": 600} + BUILD_TIME_LIMIT = 600 + BUILD_MEMORY_LIMIT = "200m" CACHES = { "default": {