@@ -105,34 +105,6 @@ class HealthHandler(BaseHandler):
105105 def initialize (self , hub_url = None ):
106106 self .hub_url = hub_url
107107
108- @at_most_every
109- async def _get_pods (self ):
110- """Get information about build and user pods"""
111- namespace = self .settings ["build_namespace" ]
112- k8s = self .settings ["kubernetes_client" ]
113- pool = self .settings ["executor" ]
114-
115- app_log .info (f"Getting pod statistics for { namespace } " )
116-
117- label_selectors = [
118- "app=jupyterhub,component=singleuser-server" ,
119- "component=binderhub-build" ,
120- ]
121- requests = [
122- asyncio .wrap_future (
123- pool .submit (
124- k8s .list_namespaced_pod ,
125- namespace ,
126- label_selector = label_selector ,
127- _preload_content = False ,
128- _request_timeout = KUBE_REQUEST_TIMEOUT ,
129- )
130- )
131- for label_selector in label_selectors
132- ]
133- responses = await asyncio .gather (* requests )
134- return [json .loads (resp .read ())["items" ] for resp in responses ]
135-
136108 @false_if_raises
137109 @retry
138110 async def check_jupyterhub_api (self , hub_url ):
@@ -155,58 +127,43 @@ async def check_docker_registry(self):
155127 )
156128 return True
157129
158- async def check_pod_quota (self ):
159- """Compare number of active pods to available quota"""
160- user_pods , build_pods = await self ._get_pods ()
161-
162- n_user_pods = len (user_pods )
163- n_build_pods = len (build_pods )
130+ def get_checks (self , checks ):
131+ """Add health checks to the `checks` dict
164132
165- quota = self .settings ["pod_quota" ]
166- total_pods = n_user_pods + n_build_pods
167- usage = {
168- "total_pods" : total_pods ,
169- "build_pods" : n_build_pods ,
170- "user_pods" : n_user_pods ,
171- "quota" : quota ,
172- "ok" : total_pods <= quota if quota is not None else True ,
173- }
174- return usage
133+ checks: Dictionary, updated in-place:
134+ key: service name
135+ value: a future that resolves to either:
136+ - a bool (success/fail)
137+ - a dict with the field `"ok": bool` plus other information
138+ """
139+ if self .settings ["use_registry" ]:
140+ checks ["Docker registry" ] = self .check_docker_registry ()
141+ checks ["JupyterHub API" ] = self .check_jupyterhub_api (self .hub_url )
175142
176143 async def check_all (self ):
177- """Runs all health checks and returns a tuple (overall, checks ).
144+ """Runs all health checks and returns a tuple (overall, results ).
178145
179146 `overall` is a bool representing the overall status of the service
180- `checks ` contains detailed information on each check's result
147+ `results ` contains detailed information on each check's result
181148 """
182- checks = []
183- check_futures = []
184-
185- if self .settings ["use_registry" ]:
186- check_futures .append (self .check_docker_registry ())
187- checks .append ({"service" : "Docker registry" , "ok" : False })
188-
189- check_futures .append (self .check_jupyterhub_api (self .hub_url ))
190- checks .append ({"service" : "JupyterHub API" , "ok" : False })
191-
192- check_futures .append (self .check_pod_quota ())
193- checks .append ({"service" : "Pod quota" , "ok" : False })
149+ checks = {}
150+ results = []
151+ self .get_checks (checks )
194152
195- for result , check in zip (await asyncio .gather (* check_futures ), checks ):
153+ for result , service in zip (
154+ await asyncio .gather (* checks .values ()), checks .keys ()
155+ ):
196156 if isinstance (result , bool ):
197- check [ "ok" ] = result
157+ results . append ({ "service" : service , "ok" : result })
198158 else :
199- check . update ( result )
159+ results . append ( dict ({ "service" : service }, ** result ) )
200160
201- # The pod quota is treated as a soft quota this means being above
202- # quota doesn't mean the service is unhealthy
203- overall = all (
204- check ["ok" ] for check in checks if check ["service" ] != "Pod quota"
205- )
161+ # Some checks are for information but do not count as a health failure
162+ overall = all (r ["ok" ] for r in results if not r .get ("_ignore_failure" , False ))
206163 if not overall :
207- unhealthy = [check for check in checks if not check ["ok" ]]
164+ unhealthy = [r for r in results if not r ["ok" ]]
208165 app_log .warning (f"Unhealthy services: { unhealthy } " )
209- return overall , checks
166+ return overall , results
210167
211168 async def get (self ):
212169 overall , checks = await self .check_all ()
@@ -218,3 +175,60 @@ async def head(self):
218175 overall , checks = await self .check_all ()
219176 if not overall :
220177 self .set_status (503 )
178+
179+
180+ class KubernetesHealthHandler (HealthHandler ):
181+ """Serve health status on Kubernetes"""
182+
183+ @at_most_every
184+ async def _get_pods (self ):
185+ """Get information about build and user pods"""
186+ namespace = self .settings ["build_namespace" ]
187+ k8s = self .settings ["kubernetes_client" ]
188+ pool = self .settings ["executor" ]
189+
190+ app_log .info (f"Getting pod statistics for { namespace } " )
191+
192+ label_selectors = [
193+ "app=jupyterhub,component=singleuser-server" ,
194+ "component=binderhub-build" ,
195+ ]
196+ requests = [
197+ asyncio .wrap_future (
198+ pool .submit (
199+ k8s .list_namespaced_pod ,
200+ namespace ,
201+ label_selector = label_selector ,
202+ _preload_content = False ,
203+ _request_timeout = KUBE_REQUEST_TIMEOUT ,
204+ )
205+ )
206+ for label_selector in label_selectors
207+ ]
208+ responses = await asyncio .gather (* requests )
209+ return [json .loads (resp .read ())["items" ] for resp in responses ]
210+
211+ def get_checks (self , checks ):
212+ super ().get_checks (checks )
213+ checks ["Pod quota" ] = self ._check_pod_quotas ()
214+
215+ async def _check_pod_quotas (self ):
216+ """Compare number of active pods to available quota"""
217+ user_pods , build_pods = await self ._get_pods ()
218+
219+ n_user_pods = len (user_pods )
220+ n_build_pods = len (build_pods )
221+
222+ quota = self .settings ["pod_quota" ]
223+ total_pods = n_user_pods + n_build_pods
224+ usage = {
225+ "total_pods" : total_pods ,
226+ "build_pods" : n_build_pods ,
227+ "user_pods" : n_user_pods ,
228+ "quota" : quota ,
229+ "ok" : total_pods <= quota if quota is not None else True ,
230+ # The pod quota is treated as a soft quota
231+ # Being above quota doesn't mean the service is unhealthy
232+ "_ignore_failure" : True ,
233+ }
234+ return usage
0 commit comments