Skip to content

Commit 1b040a6

Browse files
authored
Merge pull request #1598 from manics/health-customisable
Add `health_handler_class` to make `/health` handler is customisable
2 parents a6f2701 + 0d11aa4 commit 1b040a6

File tree

2 files changed

+97
-71
lines changed

2 files changed

+97
-71
lines changed

binderhub/app.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,11 @@
4141
from traitlets.config import Application
4242

4343
from .base import AboutHandler, Custom404, VersionHandler
44-
from .build import Build, BuildExecutor
44+
from .build import Build, BuildExecutor, KubernetesBuildExecutor
4545
from .builder import BuildHandler
4646
from .config import ConfigHandler
4747
from .events import EventLog
48-
from .health import HealthHandler
48+
from .health import HealthHandler, KubernetesHealthHandler
4949
from .launcher import Launcher
5050
from .log import log_request
5151
from .main import LegacyRedirectHandler, MainHandler, ParameterizedMainHandler
@@ -290,6 +290,18 @@ def _valid_badge_base_url(self, proposal):
290290
config=True,
291291
)
292292

293+
health_handler_class = Type(
294+
HealthHandler,
295+
help="The Tornado /health handler class",
296+
config=True,
297+
)
298+
299+
@default("health_handler_class")
300+
def _default_health_handler_class(self):
301+
if issubclass(self.build_class, KubernetesBuildExecutor):
302+
return KubernetesHealthHandler
303+
return HealthHandler
304+
293305
per_repo_quota = Integer(
294306
0,
295307
help="""
@@ -924,7 +936,7 @@ def initialize(self, *args, **kwargs):
924936
{"path": os.path.join(self.tornado_settings["static_path"], "images")},
925937
),
926938
(r"/about", AboutHandler),
927-
(r"/health", HealthHandler, {"hub_url": self.hub_url_local}),
939+
(r"/health", self.health_handler_class, {"hub_url": self.hub_url_local}),
928940
(r"/_config", ConfigHandler),
929941
(r"/", MainHandler),
930942
(r".*", Custom404),

binderhub/health.py

Lines changed: 82 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -105,34 +105,6 @@ class HealthHandler(BaseHandler):
105105
def initialize(self, hub_url=None):
106106
self.hub_url = hub_url
107107

108-
@at_most_every
109-
async def _get_pods(self):
110-
"""Get information about build and user pods"""
111-
namespace = self.settings["build_namespace"]
112-
k8s = self.settings["kubernetes_client"]
113-
pool = self.settings["executor"]
114-
115-
app_log.info(f"Getting pod statistics for {namespace}")
116-
117-
label_selectors = [
118-
"app=jupyterhub,component=singleuser-server",
119-
"component=binderhub-build",
120-
]
121-
requests = [
122-
asyncio.wrap_future(
123-
pool.submit(
124-
k8s.list_namespaced_pod,
125-
namespace,
126-
label_selector=label_selector,
127-
_preload_content=False,
128-
_request_timeout=KUBE_REQUEST_TIMEOUT,
129-
)
130-
)
131-
for label_selector in label_selectors
132-
]
133-
responses = await asyncio.gather(*requests)
134-
return [json.loads(resp.read())["items"] for resp in responses]
135-
136108
@false_if_raises
137109
@retry
138110
async def check_jupyterhub_api(self, hub_url):
@@ -155,58 +127,43 @@ async def check_docker_registry(self):
155127
)
156128
return True
157129

158-
async def check_pod_quota(self):
159-
"""Compare number of active pods to available quota"""
160-
user_pods, build_pods = await self._get_pods()
161-
162-
n_user_pods = len(user_pods)
163-
n_build_pods = len(build_pods)
130+
def get_checks(self, checks):
131+
"""Add health checks to the `checks` dict
164132
165-
quota = self.settings["pod_quota"]
166-
total_pods = n_user_pods + n_build_pods
167-
usage = {
168-
"total_pods": total_pods,
169-
"build_pods": n_build_pods,
170-
"user_pods": n_user_pods,
171-
"quota": quota,
172-
"ok": total_pods <= quota if quota is not None else True,
173-
}
174-
return usage
133+
checks: Dictionary, updated in-place:
134+
key: service name
135+
value: a future that resolves to either:
136+
- a bool (success/fail)
137+
- a dict with the field `"ok": bool` plus other information
138+
"""
139+
if self.settings["use_registry"]:
140+
checks["Docker registry"] = self.check_docker_registry()
141+
checks["JupyterHub API"] = self.check_jupyterhub_api(self.hub_url)
175142

176143
async def check_all(self):
177-
"""Runs all health checks and returns a tuple (overall, checks).
144+
"""Runs all health checks and returns a tuple (overall, results).
178145
179146
`overall` is a bool representing the overall status of the service
180-
`checks` contains detailed information on each check's result
147+
`results` contains detailed information on each check's result
181148
"""
182-
checks = []
183-
check_futures = []
184-
185-
if self.settings["use_registry"]:
186-
check_futures.append(self.check_docker_registry())
187-
checks.append({"service": "Docker registry", "ok": False})
188-
189-
check_futures.append(self.check_jupyterhub_api(self.hub_url))
190-
checks.append({"service": "JupyterHub API", "ok": False})
191-
192-
check_futures.append(self.check_pod_quota())
193-
checks.append({"service": "Pod quota", "ok": False})
149+
checks = {}
150+
results = []
151+
self.get_checks(checks)
194152

195-
for result, check in zip(await asyncio.gather(*check_futures), checks):
153+
for result, service in zip(
154+
await asyncio.gather(*checks.values()), checks.keys()
155+
):
196156
if isinstance(result, bool):
197-
check["ok"] = result
157+
results.append({"service": service, "ok": result})
198158
else:
199-
check.update(result)
159+
results.append(dict({"service": service}, **result))
200160

201-
# The pod quota is treated as a soft quota this means being above
202-
# quota doesn't mean the service is unhealthy
203-
overall = all(
204-
check["ok"] for check in checks if check["service"] != "Pod quota"
205-
)
161+
# Some checks are for information but do not count as a health failure
162+
overall = all(r["ok"] for r in results if not r.get("_ignore_failure", False))
206163
if not overall:
207-
unhealthy = [check for check in checks if not check["ok"]]
164+
unhealthy = [r for r in results if not r["ok"]]
208165
app_log.warning(f"Unhealthy services: {unhealthy}")
209-
return overall, checks
166+
return overall, results
210167

211168
async def get(self):
212169
overall, checks = await self.check_all()
@@ -218,3 +175,60 @@ async def head(self):
218175
overall, checks = await self.check_all()
219176
if not overall:
220177
self.set_status(503)
178+
179+
180+
class KubernetesHealthHandler(HealthHandler):
181+
"""Serve health status on Kubernetes"""
182+
183+
@at_most_every
184+
async def _get_pods(self):
185+
"""Get information about build and user pods"""
186+
namespace = self.settings["build_namespace"]
187+
k8s = self.settings["kubernetes_client"]
188+
pool = self.settings["executor"]
189+
190+
app_log.info(f"Getting pod statistics for {namespace}")
191+
192+
label_selectors = [
193+
"app=jupyterhub,component=singleuser-server",
194+
"component=binderhub-build",
195+
]
196+
requests = [
197+
asyncio.wrap_future(
198+
pool.submit(
199+
k8s.list_namespaced_pod,
200+
namespace,
201+
label_selector=label_selector,
202+
_preload_content=False,
203+
_request_timeout=KUBE_REQUEST_TIMEOUT,
204+
)
205+
)
206+
for label_selector in label_selectors
207+
]
208+
responses = await asyncio.gather(*requests)
209+
return [json.loads(resp.read())["items"] for resp in responses]
210+
211+
def get_checks(self, checks):
212+
super().get_checks(checks)
213+
checks["Pod quota"] = self._check_pod_quotas()
214+
215+
async def _check_pod_quotas(self):
216+
"""Compare number of active pods to available quota"""
217+
user_pods, build_pods = await self._get_pods()
218+
219+
n_user_pods = len(user_pods)
220+
n_build_pods = len(build_pods)
221+
222+
quota = self.settings["pod_quota"]
223+
total_pods = n_user_pods + n_build_pods
224+
usage = {
225+
"total_pods": total_pods,
226+
"build_pods": n_build_pods,
227+
"user_pods": n_user_pods,
228+
"quota": quota,
229+
"ok": total_pods <= quota if quota is not None else True,
230+
# The pod quota is treated as a soft quota
231+
# Being above quota doesn't mean the service is unhealthy
232+
"_ignore_failure": True,
233+
}
234+
return usage

0 commit comments

Comments
 (0)