Skip to content

Commit e2f23e9

Browse files
authored
Merge pull request #1225 from minrk/kube-api-timeout
set _request_timeout on most kubernetes API requests
2 parents 90b4394 + 6a2b398 commit e2f23e9

File tree

4 files changed

+36
-13
lines changed

4 files changed

+36
-13
lines changed

binderhub/build.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from tornado.ioloop import IOLoop
1313
from tornado.log import app_log
1414

15-
from .utils import rendezvous_rank
15+
from .utils import rendezvous_rank, KUBE_REQUEST_TIMEOUT
1616

1717

1818
class Build:
@@ -186,6 +186,7 @@ def get_affinity(self):
186186
dind_pods = self.api.list_namespaced_pod(
187187
self.namespace,
188188
label_selector="component=dind,app=binder",
189+
_request_timeout=KUBE_REQUEST_TIMEOUT,
189190
)
190191

191192
if self.sticky_builds and dind_pods:
@@ -304,7 +305,11 @@ def submit(self):
304305
)
305306

306307
try:
307-
ret = self.api.create_namespaced_pod(self.namespace, self.pod)
308+
ret = self.api.create_namespaced_pod(
309+
self.namespace,
310+
self.pod,
311+
_request_timeout=KUBE_REQUEST_TIMEOUT,
312+
)
308313
except client.rest.ApiException as e:
309314
if e.status == 409:
310315
# Someone else created it!
@@ -320,10 +325,11 @@ def submit(self):
320325
w = watch.Watch()
321326
try:
322327
for f in w.stream(
323-
self.api.list_namespaced_pod,
324-
self.namespace,
325-
label_selector="name={}".format(self.name),
326-
timeout_seconds=30,
328+
self.api.list_namespaced_pod,
329+
self.namespace,
330+
label_selector="name={}".format(self.name),
331+
timeout_seconds=30,
332+
_request_timeout=KUBE_REQUEST_TIMEOUT,
327333
):
328334
if f['type'] == 'DELETED':
329335
self.progress('pod.phasechange', 'Deleted')
@@ -348,11 +354,13 @@ def stream_logs(self):
348354
"""Stream a pod's logs"""
349355
app_log.info("Watching logs of %s", self.name)
350356
for line in self.api.read_namespaced_pod_log(
351-
self.name,
352-
self.namespace,
353-
follow=True,
354-
tail_lines=self.log_tail_lines,
355-
_preload_content=False):
357+
self.name,
358+
self.namespace,
359+
follow=True,
360+
tail_lines=self.log_tail_lines,
361+
_request_timeout=(3, None),
362+
_preload_content=False,
363+
):
356364
if self.stop_event.is_set():
357365
app_log.info("Stopping logs of %s", self.name)
358366
return
@@ -382,7 +390,9 @@ def cleanup(self):
382390
self.api.delete_namespaced_pod(
383391
name=self.name,
384392
namespace=self.namespace,
385-
body=client.V1DeleteOptions(grace_period_seconds=0))
393+
body=client.V1DeleteOptions(grace_period_seconds=0),
394+
_request_timeout=KUBE_REQUEST_TIMEOUT,
395+
)
386396
except client.rest.ApiException as e:
387397
if e.status == 404:
388398
# Is ok, someone else has already deleted it

binderhub/builder.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
from .base import BaseHandler
2424
from .build import Build, FakeBuild
25+
from .utils import KUBE_REQUEST_TIMEOUT
2526

2627
# Separate buckets for builds and launches.
2728
# Builds and launches have very different characteristic times,
@@ -475,9 +476,11 @@ async def launch(self, kube, provider):
475476

476477
# TODO: run a watch to keep this up to date in the background
477478
pool = self.settings['executor']
478-
f = pool.submit(kube.list_namespaced_pod,
479+
f = pool.submit(
480+
kube.list_namespaced_pod,
479481
self.settings["build_namespace"],
480482
label_selector='app=jupyterhub,component=singleuser-server',
483+
_request_timeout=KUBE_REQUEST_TIMEOUT,
481484
)
482485
# concurrent.futures.Future isn't awaitable
483486
# wrap in tornado Future

binderhub/health.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from tornado.log import app_log
99

1010
from .base import BaseHandler
11+
from .utils import KUBE_REQUEST_TIMEOUT
1112

1213

1314
def retry(_f=None, *, delay=1, attempts=3):
@@ -120,6 +121,7 @@ async def _get_pods(self):
120121
namespace,
121122
label_selector=label_selector,
122123
_preload_content=False,
124+
_request_timeout=KUBE_REQUEST_TIMEOUT,
123125
)
124126
)
125127
for label_selector in label_selectors

binderhub/utils.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,14 @@
66
from traitlets import Integer, TraitError
77

88

9+
# default _request_timeout for kubernetes api requests
10+
# tuple of two timeouts: (connect_timeout, read_timeout)
11+
# the most important of these is the connect_timeout,
12+
# which can hang for a *very* long time when there are internal
13+
# kubernetes connection issues
14+
KUBE_REQUEST_TIMEOUT = (3, 30)
15+
16+
917
def blake2b_hash_as_int(b):
1018
"""Compute digest of the bytes `b` using the Blake2 hash function.
1119

0 commit comments

Comments
 (0)