Skip to content

Commit ee2b12d

Browse files
author
sd
committed
Add reconnection to Gateway
1 parent d4438a2 commit ee2b12d

File tree

2 files changed

+46
-3
lines changed

2 files changed

+46
-3
lines changed

jupyter_server/gateway/handlers.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import os
55
import logging
66
import mimetypes
7+
import random
8+
import asyncio
79

810
from ..base.handlers import APIHandler, JupyterHandler
911
from ..utils import url_path_join
@@ -133,6 +135,7 @@ def __init__(self, **kwargs):
133135
self.ws = None
134136
self.ws_future = Future()
135137
self.disconnected = False
138+
self.retry = 0
136139

137140
async def _connect(self, kernel_id, message_callback):
138141
# websocket is initialized before connection
@@ -159,6 +162,7 @@ async def _connect(self, kernel_id, message_callback):
159162
def _connection_done(self, fut):
160163
if not self.disconnected and fut.exception() is None: # prevent concurrent.futures._base.CancelledError
161164
self.ws = fut.result()
165+
self.retry = 0
162166
self.log.debug("Connection is ready: ws: {}".format(self.ws))
163167
else:
164168
self.log.warning("Websocket connection has been closed via client disconnect or due to error. "
@@ -192,8 +196,15 @@ async def _read_messages(self, callback):
192196
else: # ws cancelled - stop reading
193197
break
194198

195-
if not self.disconnected: # if websocket is not disconnected by client, attept to reconnect to Gateway
196-
self.log.info("Attempting to re-establish the connection to Gateway: {}".format(self.kernel_id))
199+
# NOTE(esevan): if websocket is not disconnected by client, try to reconnect.
200+
if not self.disconnected and self.retry < GatewayClient.instance().gateway_retry_max:
201+
jitter = random.randint(10, 100) * 0.01
202+
retry_interval = min(GatewayClient.instance().gateway_retry_interval * (2 ** self.retry),
203+
GatewayClient.instance().gateway_retry_interval_max) + jitter
204+
self.retry += 1
205+
self.log.info("Attempting to re-establish the connection to Gateway in %s secs (%s/%s): %s",
206+
retry_interval, self.retry, GatewayClient.instance().gateway_retry_max, self.kernel_id)
207+
await asyncio.sleep(retry_interval)
197208
loop = IOLoop.current()
198209
loop.spawn_callback(self._connect, self.kernel_id, callback)
199210

jupyter_server/gateway/managers.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from jupyter_client.kernelspec import KernelSpecManager
1616
from ..utils import url_path_join
1717

18-
from traitlets import Instance, Unicode, Float, Bool, default, validate, TraitError
18+
from traitlets import Instance, Unicode, Int, Float, Bool, default, validate, TraitError
1919
from traitlets.config import SingletonConfigurable
2020

2121

@@ -220,6 +220,38 @@ def __init__(self, **kwargs):
220220
def _env_whitelist_default(self):
221221
return os.environ.get(self.env_whitelist_env, self.env_whitelist_default_value)
222222

223+
gateway_retry_interval_default_value = 1.0
224+
gateway_retry_interval_env = 'JUPYTER_GATEWAY_RETRY_INTERVAL'
225+
gateway_retry_interval = Float(default_value=gateway_retry_interval_default_value, config=True,
226+
help="""The time allowed for HTTP reconnection with the Gateway server for the first time.
227+
Next will be JUPYTER_GATEWAY_RETRY_INTERVAL multiplied by two in factor of numbers of retries
228+
but less than JUPYTER_GATEWAY_RETRY_INTERVAL_MAX.
229+
(JUPYTER_GATEWAY_RETRY_INTERVAL env var)""")
230+
231+
@default('gateway_retry_interval')
232+
def gateway_retry_interval_default(self):
233+
return float(os.environ.get('JUPYTER_GATEWAY_RETRY_INTERVAL', self.gateway_retry_interval_default_value))
234+
235+
gateway_retry_interval_max_default_value = 30.0
236+
gateway_retry_interval_max_env = 'JUPYTER_GATEWAY_RETRY_INTERVAL_MAX'
237+
gateway_retry_interval_max = Float(default_value=gateway_retry_interval_max_default_value, config=True,
238+
help="""The maximum time allowed for HTTP reconnection retry with the Gateway server.
239+
(JUPYTER_GATEWAY_RETRY_INTERVAL_MAX env var)""")
240+
241+
@default('gateway_retry_interval_max')
242+
def gateway_retry_interval_max_default(self):
243+
return float(os.environ.get('JUPYTER_GATEWAY_RETRY_INTERVAL_MAX', self.gateway_retry_interval_max_default_value))
244+
245+
gateway_retry_max_default_value = 5
246+
gateway_retry_max_env = 'JUPYTER_GATEWAY_RETRY_MAX'
247+
gateway_retry_max = Int(default_value=gateway_retry_max_default_value, config=True,
248+
help="""The maximum retries allowed for HTTP reconnection with the Gateway server.
249+
(JUPYTER_GATEWAY_RETRY_MAX env var)""")
250+
251+
@default('gateway_retry_max')
252+
def gateway_retry_max_default(self):
253+
return int(os.environ.get('JUPYTER_GATEWAY_RETRY_MAX', self.gateway_retry_max_default_value))
254+
223255
@property
224256
def gateway_enabled(self):
225257
return bool(self.url is not None and len(self.url) > 0)

0 commit comments

Comments
 (0)