Skip to content

Commit 71e402f

Browse files
authored
Merge pull request #5924 from oyvsyo/gateway_connection_retry
Add reconnection to Gateway (form nb2kg)
2 parents 1672777 + d95f251 commit 71e402f

File tree

2 files changed

+46
-5
lines changed

2 files changed

+46
-5
lines changed

notebook/gateway/handlers.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import os
55
import logging
66
import mimetypes
7+
import random
78

89
from ..base.handlers import APIHandler, IPythonHandler
910
from ..utils import url_path_join
@@ -134,6 +135,7 @@ def __init__(self, **kwargs):
134135
self.ws = None
135136
self.ws_future = Future()
136137
self.disconnected = False
138+
self.retry = 0
137139

138140
@gen.coroutine
139141
def _connect(self, kernel_id):
@@ -155,6 +157,7 @@ def _connect(self, kernel_id):
155157
def _connection_done(self, fut):
156158
if not self.disconnected and fut.exception() is None: # prevent concurrent.futures._base.CancelledError
157159
self.ws = fut.result()
160+
self.retry = 0
158161
self.log.debug("Connection is ready: ws: {}".format(self.ws))
159162
else:
160163
self.log.warning("Websocket connection has been closed via client disconnect or due to error. "
@@ -189,8 +192,15 @@ def _read_messages(self, callback):
189192
else: # ws cancelled - stop reading
190193
break
191194

192-
if not self.disconnected: # if websocket is not disconnected by client, attept to reconnect to Gateway
193-
self.log.info("Attempting to re-establish the connection to Gateway: {}".format(self.kernel_id))
195+
# NOTE(esevan): if websocket is not disconnected by client, try to reconnect.
196+
if not self.disconnected and self.retry < GatewayClient.instance().gateway_retry_max:
197+
jitter = random.randint(10, 100) * 0.01
198+
retry_interval = min(GatewayClient.instance().gateway_retry_interval * (2 ** self.retry),
199+
GatewayClient.instance().gateway_retry_interval_max) + jitter
200+
self.retry += 1
201+
self.log.info("Attempting to re-establish the connection to Gateway in %s secs (%s/%s): %s",
202+
retry_interval, self.retry, GatewayClient.instance().gateway_retry_max, self.kernel_id)
203+
yield gen.sleep(retry_interval)
194204
self._connect(self.kernel_id)
195205
loop = IOLoop.current()
196206
loop.add_future(self.ws_future, lambda future: self._read_messages(callback))

notebook/gateway/managers.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,14 @@
1515
from jupyter_client.kernelspec import KernelSpecManager
1616
from ..utils import url_path_join
1717

18-
from traitlets import Instance, Unicode, Float, Bool, default, validate, TraitError
18+
from traitlets import Instance, Unicode, Int, Float, Bool, default, validate, TraitError
1919
from traitlets.config import SingletonConfigurable
2020

2121

2222
class GatewayClient(SingletonConfigurable):
2323
"""This class manages the configuration. It's its own singleton class so that we
2424
can share these values across all objects. It also contains some helper methods
25-
to build request arguments out of the various config options.
25+
to build request arguments out of the various config options.
2626
2727
"""
2828

@@ -220,6 +220,38 @@ def __init__(self, **kwargs):
220220
def _env_whitelist_default(self):
221221
return os.environ.get(self.env_whitelist_env, self.env_whitelist_default_value)
222222

223+
gateway_retry_interval_default_value = 1.0
224+
gateway_retry_interval_env = 'JUPYTER_GATEWAY_RETRY_INTERVAL'
225+
gateway_retry_interval = Float(default_value=gateway_retry_interval_default_value, config=True,
226+
help="""The time allowed for HTTP reconnection with the Gateway server for the first time.
227+
Next will be JUPYTER_GATEWAY_RETRY_INTERVAL multiplied by two in factor of numbers of retries
228+
but less than JUPYTER_GATEWAY_RETRY_INTERVAL_MAX.
229+
(JUPYTER_GATEWAY_RETRY_INTERVAL env var)""")
230+
231+
@default('gateway_retry_interval')
232+
def gateway_retry_interval_default(self):
233+
return float(os.environ.get('JUPYTER_GATEWAY_RETRY_INTERVAL', self.gateway_retry_interval_default_value))
234+
235+
gateway_retry_interval_max_default_value = 30.0
236+
gateway_retry_interval_max_env = 'JUPYTER_GATEWAY_RETRY_INTERVAL_MAX'
237+
gateway_retry_interval_max = Float(default_value=gateway_retry_interval_max_default_value, config=True,
238+
help="""The maximum time allowed for HTTP reconnection retry with the Gateway server.
239+
(JUPYTER_GATEWAY_RETRY_INTERVAL_MAX env var)""")
240+
241+
@default('gateway_retry_interval_max')
242+
def gateway_retry_interval_max_default(self):
243+
return float(os.environ.get('JUPYTER_GATEWAY_RETRY_INTERVAL_MAX', self.gateway_retry_interval_max_default_value))
244+
245+
gateway_retry_max_default_value = 5
246+
gateway_retry_max_env = 'JUPYTER_GATEWAY_RETRY_MAX'
247+
gateway_retry_max = Int(default_value=gateway_retry_max_default_value, config=True,
248+
help="""The maximum retries allowed for HTTP reconnection with the Gateway server.
249+
(JUPYTER_GATEWAY_RETRY_MAX env var)""")
250+
251+
@default('gateway_retry_max')
252+
def gateway_retry_max_default(self):
253+
return int(os.environ.get('JUPYTER_GATEWAY_RETRY_MAX', self.gateway_retry_max_default_value))
254+
223255
@property
224256
def gateway_enabled(self):
225257
return bool(self.url is not None and len(self.url) > 0)
@@ -503,7 +535,6 @@ def shutdown_all(self, now=False):
503535
self.remove_kernel(kernel_id)
504536

505537

506-
507538
class GatewayKernelSpecManager(KernelSpecManager):
508539

509540
def __init__(self, **kwargs):

0 commit comments

Comments
 (0)