|
9 | 9 |
|
10 | 10 | import structlog |
11 | 11 | from django.conf import settings |
| 12 | +from django.urls import reverse |
12 | 13 | from django.utils.translation import gettext_lazy as _ |
13 | 14 | from docker import APIClient |
14 | 15 | from docker.errors import APIError as DockerAPIError |
|
20 | 21 |
|
21 | 22 | from readthedocs.builds.models import BuildCommandResultMixin |
22 | 23 | from readthedocs.core.utils import slugify |
| 24 | +from readthedocs.projects.models import Feature |
23 | 25 |
|
24 | 26 | from .constants import DOCKER_HOSTNAME_MAX_LEN |
25 | 27 | from .constants import DOCKER_IMAGE |
@@ -583,6 +585,7 @@ class DockerBuildEnvironment(BaseBuildEnvironment): |
583 | 585 | container_time_limit = DOCKER_LIMITS.get("time") |
584 | 586 |
|
585 | 587 | def __init__(self, *args, **kwargs): |
| 588 | + self.build_api_key = kwargs.pop("build_api_key", None) |
586 | 589 | container_image = kwargs.pop("container_image", None) |
587 | 590 | super().__init__(*args, **kwargs) |
588 | 591 | self.client = None |
@@ -839,7 +842,48 @@ def create_container(self): |
839 | 842 | runtime="runsc", # gVisor runtime |
840 | 843 | ) |
841 | 844 | client.start(container=self.container_id) |
| 845 | + |
| 846 | + if self.project.has_feature(Feature.BUILD_HEALTHCHECK): |
| 847 | + self._run_background_healthcheck() |
| 848 | + |
842 | 849 | except (DockerAPIError, ConnectionError) as exc: |
843 | 850 | raise BuildAppError( |
844 | 851 | BuildAppError.GENERIC_WITH_BUILD_ID, exception_messag=exc.explanation |
845 | 852 | ) from exc |
| 853 | + |
| 854 | + def _run_background_healthcheck(self): |
| 855 | + """ |
| 856 | + Run a cURL command in the background to ping the healthcheck API. |
| 857 | +
|
| 858 | + The API saves the last ping timestamp on each call. Then a periodic Celery task |
| 859 | + checks this value for all the running builds and decide if the build is stalled or not. |
| 860 | + If it's stalled, it terminates those builds and mark them as fail. |
| 861 | + """ |
| 862 | + log.debug("Running build with healthcheck.") |
| 863 | + |
| 864 | + build_id = self.build.get("id") |
| 865 | + healthcheck_url = reverse("build-healthcheck", kwargs={"pk": build_id}) |
| 866 | + if settings.RTD_DOCKER_COMPOSE and "ngrok" in settings.PRODUCTION_DOMAIN: |
| 867 | + # NOTE: we do require using NGROK here to go over internet because I |
| 868 | + # didn't find a way to access the `web` container from inside the |
| 869 | + # container the `build` container created for this particular build |
| 870 | + # (there are 3 containers involved locally here: web, build, and user's build) |
| 871 | + # |
| 872 | + # This shouldn't happen in production, because we are not doing Docker in Docker. |
| 873 | + url = f"http://readthedocs.ngrok.io{healthcheck_url}" |
| 874 | + else: |
| 875 | + url = f"{settings.SLUMBER_API_HOST}{healthcheck_url}" |
| 876 | + |
| 877 | + cmd = f"/bin/bash -c 'while true; do curl --max-time 2 -H \"Authorization: Token {self.build_api_key}\" -X POST {url}; sleep {settings.RTD_BUILD_HEALTHCHECK_DELAY}; done;'" |
| 878 | + log.debug("Healthcheck command to run.", command=cmd) |
| 879 | + |
| 880 | + client = self.get_client() |
| 881 | + exec_cmd = client.exec_create( |
| 882 | + container=self.container_id, |
| 883 | + cmd=cmd, |
| 884 | + user=settings.RTD_DOCKER_USER, |
| 885 | + stdout=True, |
| 886 | + stderr=True, |
| 887 | + ) |
| 888 | + # `detach=True` allows us to run this command in the background |
| 889 | + client.exec_start(exec_id=exec_cmd["Id"], stream=False, detach=True) |
0 commit comments