Skip to content

Commit 1aa1250

Browse files
Backport PR #874 on branch 1.x (Fix gateway kernel shutdown) (#875)
Co-authored-by: Kevin Bates <[email protected]>
1 parent 7bbc200 commit 1aa1250

File tree

2 files changed

+48
-14
lines changed

2 files changed

+48
-14
lines changed

jupyter_server/gateway/managers.py

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ async def kernel_model(self, kernel_id):
9292
The uuid of the kernel.
9393
"""
9494
model = None
95-
km = self.get_kernel(kernel_id)
95+
km = self.get_kernel(str(kernel_id))
9696
if km:
9797
model = km.kernel
9898
return model
@@ -166,13 +166,14 @@ async def interrupt_kernel(self, kernel_id, **kwargs):
166166

167167
async def shutdown_all(self, now=False):
168168
"""Shutdown all kernels."""
169-
for kernel_id in self._kernels:
169+
kids = list(self._kernels)
170+
for kernel_id in kids:
170171
km = self.get_kernel(kernel_id)
171172
await km.shutdown_kernel(now=now)
172173
self.remove_kernel(kernel_id)
173174

174175
async def cull_kernels(self):
175-
"""Override cull_kernels so we can be sure their state is current."""
176+
"""Override cull_kernels, so we can be sure their state is current."""
176177
await self.list_kernels()
177178
await super().cull_kernels()
178179

@@ -295,7 +296,7 @@ class GatewaySessionManager(SessionManager):
295296
kernel_manager = Instance("jupyter_server.gateway.managers.GatewayMappingKernelManager")
296297

297298
async def kernel_culled(self, kernel_id):
298-
"""Checks if the kernel is still considered alive and returns true if its not found."""
299+
"""Checks if the kernel is still considered alive and returns true if it's not found."""
299300
kernel = None
300301
try:
301302
km = self.kernel_manager.get_kernel(kernel_id)
@@ -387,7 +388,7 @@ async def refresh_model(self, model=None):
387388
if isinstance(self.parent, AsyncMappingKernelManager):
388389
# Update connections only if there's a mapping kernel manager parent for
389390
# this kernel manager. The current kernel manager instance may not have
390-
# an parent instance if, say, a server extension is using another application
391+
# a parent instance if, say, a server extension is using another application
391392
# (e.g., papermill) that uses a KernelManager instance directly.
392393
self.parent._kernel_connections[self.kernel_id] = int(model["connections"])
393394

@@ -448,8 +449,14 @@ async def shutdown_kernel(self, now=False, restart=False):
448449

449450
if self.has_kernel:
450451
self.log.debug("Request shutdown kernel at: %s", self.kernel_url)
451-
response = await gateway_request(self.kernel_url, method="DELETE")
452-
self.log.debug("Shutdown kernel response: %d %s", response.code, response.reason)
452+
try:
453+
response = await gateway_request(self.kernel_url, method="DELETE")
454+
self.log.debug("Shutdown kernel response: %d %s", response.code, response.reason)
455+
except web.HTTPError as error:
456+
if error.status_code == 404:
457+
self.log.debug("Shutdown kernel response: kernel not found (ignored)")
458+
else:
459+
raise
453460

454461
async def restart_kernel(self, **kw):
455462
"""Restarts a kernel via HTTP."""
@@ -518,7 +525,7 @@ def send(self, msg: dict) -> None:
518525

519526
@staticmethod
520527
def serialize_datetime(dt):
521-
if isinstance(dt, (datetime.datetime)):
528+
if isinstance(dt, datetime.datetime):
522529
return dt.timestamp()
523530
return None
524531

@@ -595,7 +602,7 @@ async def start_channels(self, shell=True, iopub=True, stdin=True, hb=True, cont
595602
"""Starts the channels for this kernel.
596603
597604
For this class, we establish a websocket connection to the destination
598-
and setup the channel-based queues on which applicable messages will
605+
and set up the channel-based queues on which applicable messages will
599606
be posted.
600607
"""
601608

@@ -606,10 +613,11 @@ async def start_channels(self, shell=True, iopub=True, stdin=True, hb=True, cont
606613
"channels",
607614
)
608615
# Gather cert info in case where ssl is desired...
609-
ssl_options = {}
610-
ssl_options["ca_certs"] = GatewayClient.instance().ca_certs
611-
ssl_options["certfile"] = GatewayClient.instance().client_cert
612-
ssl_options["keyfile"] = GatewayClient.instance().client_key
616+
ssl_options = {
617+
"ca_certs": GatewayClient.instance().ca_certs,
618+
"certfile": GatewayClient.instance().client_cert,
619+
"keyfile": GatewayClient.instance().client_key,
620+
}
613621

614622
self.channel_socket = websocket.create_connection(
615623
ws_url,
@@ -720,7 +728,7 @@ def _route_responses(self):
720728
self._channel_queues[channel].put_nowait(response_message)
721729

722730
except websocket.WebSocketConnectionClosedException:
723-
pass # websocket closure most likely due to shutdown
731+
pass # websocket closure most likely due to shut down
724732

725733
except BaseException as be:
726734
if not self._channels_stopped:

tests/test_gateway.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,9 @@ async def mock_gateway_request(url, **kwargs):
135135
# Shutdown existing kernel
136136
if endpoint.rfind("/api/kernels/") >= 0 and method == "DELETE":
137137
requested_kernel_id = endpoint.rpartition("/")[2]
138+
if requested_kernel_id not in running_kernels:
139+
raise HTTPError(404, message="Kernel does not exist: %s" % requested_kernel_id)
140+
138141
running_kernels.pop(
139142
requested_kernel_id
140143
) # Simulate shutdown by removing kernel from running set
@@ -292,6 +295,29 @@ async def test_gateway_kernel_lifecycle(init_gateway, jp_fetch):
292295
assert await is_kernel_running(jp_fetch, kernel_id) is False
293296

294297

298+
@pytest.mark.parametrize("missing_kernel", [True, False])
299+
async def test_gateway_shutdown(init_gateway, jp_serverapp, jp_fetch, missing_kernel):
300+
# Validate server shutdown when multiple gateway kernels are present or
301+
# we've lost track of at least one (missing) kernel
302+
303+
# create two kernels
304+
k1 = await create_kernel(jp_fetch, "kspec_bar")
305+
k2 = await create_kernel(jp_fetch, "kspec_bar")
306+
307+
# ensure they're considered running
308+
assert await is_kernel_running(jp_fetch, k1) is True
309+
assert await is_kernel_running(jp_fetch, k2) is True
310+
311+
if missing_kernel:
312+
running_kernels.pop(k1) # "terminate" kernel w/o our knowledge
313+
314+
with mocked_gateway:
315+
await jp_serverapp.kernel_manager.shutdown_all()
316+
317+
assert await is_kernel_running(jp_fetch, k1) is False
318+
assert await is_kernel_running(jp_fetch, k2) is False
319+
320+
295321
#
296322
# Test methods below...
297323
#

0 commit comments

Comments
 (0)