Skip to content

Commit d901415

Browse files
authored
Fix gateway kernel shutdown (#874)
* Fix gateway kernel shutdown * Add test for shutdown_all scenarios
1 parent 29911b1 commit d901415

File tree

2 files changed

+48
-14
lines changed

2 files changed

+48
-14
lines changed

jupyter_server/gateway/managers.py

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ async def kernel_model(self, kernel_id):
9292
The uuid of the kernel.
9393
"""
9494
model = None
95-
km = self.get_kernel(kernel_id)
95+
km = self.get_kernel(str(kernel_id))
9696
if km:
9797
model = km.kernel
9898
return model
@@ -166,13 +166,14 @@ async def interrupt_kernel(self, kernel_id, **kwargs):
166166

167167
async def shutdown_all(self, now=False):
168168
"""Shutdown all kernels."""
169-
for kernel_id in self._kernels:
169+
kids = list(self._kernels)
170+
for kernel_id in kids:
170171
km = self.get_kernel(kernel_id)
171172
await km.shutdown_kernel(now=now)
172173
self.remove_kernel(kernel_id)
173174

174175
async def cull_kernels(self):
175-
"""Override cull_kernels so we can be sure their state is current."""
176+
"""Override cull_kernels, so we can be sure their state is current."""
176177
await self.list_kernels()
177178
await super().cull_kernels()
178179

@@ -295,7 +296,7 @@ class GatewaySessionManager(SessionManager):
295296
kernel_manager = Instance("jupyter_server.gateway.managers.GatewayMappingKernelManager")
296297

297298
async def kernel_culled(self, kernel_id):
298-
"""Checks if the kernel is still considered alive and returns true if its not found."""
299+
"""Checks if the kernel is still considered alive and returns true if it's not found."""
299300
kernel = None
300301
try:
301302
km = self.kernel_manager.get_kernel(kernel_id)
@@ -387,7 +388,7 @@ async def refresh_model(self, model=None):
387388
if isinstance(self.parent, AsyncMappingKernelManager):
388389
# Update connections only if there's a mapping kernel manager parent for
389390
# this kernel manager. The current kernel manager instance may not have
390-
# an parent instance if, say, a server extension is using another application
391+
# a parent instance if, say, a server extension is using another application
391392
# (e.g., papermill) that uses a KernelManager instance directly.
392393
self.parent._kernel_connections[self.kernel_id] = int(model["connections"])
393394

@@ -448,8 +449,14 @@ async def shutdown_kernel(self, now=False, restart=False):
448449

449450
if self.has_kernel:
450451
self.log.debug("Request shutdown kernel at: %s", self.kernel_url)
451-
response = await gateway_request(self.kernel_url, method="DELETE")
452-
self.log.debug("Shutdown kernel response: %d %s", response.code, response.reason)
452+
try:
453+
response = await gateway_request(self.kernel_url, method="DELETE")
454+
self.log.debug("Shutdown kernel response: %d %s", response.code, response.reason)
455+
except web.HTTPError as error:
456+
if error.status_code == 404:
457+
self.log.debug("Shutdown kernel response: kernel not found (ignored)")
458+
else:
459+
raise
453460

454461
async def restart_kernel(self, **kw):
455462
"""Restarts a kernel via HTTP."""
@@ -518,7 +525,7 @@ def send(self, msg: dict) -> None:
518525

519526
@staticmethod
520527
def serialize_datetime(dt):
521-
if isinstance(dt, (datetime.datetime)):
528+
if isinstance(dt, datetime.datetime):
522529
return dt.timestamp()
523530
return None
524531

@@ -597,7 +604,7 @@ async def start_channels(self, shell=True, iopub=True, stdin=True, hb=True, cont
597604
"""Starts the channels for this kernel.
598605
599606
For this class, we establish a websocket connection to the destination
600-
and setup the channel-based queues on which applicable messages will
607+
and set up the channel-based queues on which applicable messages will
601608
be posted.
602609
"""
603610

@@ -608,10 +615,11 @@ async def start_channels(self, shell=True, iopub=True, stdin=True, hb=True, cont
608615
"channels",
609616
)
610617
# Gather cert info in case where ssl is desired...
611-
ssl_options = {}
612-
ssl_options["ca_certs"] = GatewayClient.instance().ca_certs
613-
ssl_options["certfile"] = GatewayClient.instance().client_cert
614-
ssl_options["keyfile"] = GatewayClient.instance().client_key
618+
ssl_options = {
619+
"ca_certs": GatewayClient.instance().ca_certs,
620+
"certfile": GatewayClient.instance().client_cert,
621+
"keyfile": GatewayClient.instance().client_key,
622+
}
615623

616624
self.channel_socket = websocket.create_connection(
617625
ws_url,
@@ -722,7 +730,7 @@ def _route_responses(self):
722730
self._channel_queues[channel].put_nowait(response_message)
723731

724732
except websocket.WebSocketConnectionClosedException:
725-
pass # websocket closure most likely due to shutdown
733+
pass # websocket closure most likely due to shut down
726734

727735
except BaseException as be:
728736
if not self._channels_stopped:

tests/test_gateway.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,9 @@ async def mock_gateway_request(url, **kwargs):
135135
# Shutdown existing kernel
136136
if endpoint.rfind("/api/kernels/") >= 0 and method == "DELETE":
137137
requested_kernel_id = endpoint.rpartition("/")[2]
138+
if requested_kernel_id not in running_kernels:
139+
raise HTTPError(404, message="Kernel does not exist: %s" % requested_kernel_id)
140+
138141
running_kernels.pop(
139142
requested_kernel_id
140143
) # Simulate shutdown by removing kernel from running set
@@ -292,6 +295,29 @@ async def test_gateway_kernel_lifecycle(init_gateway, jp_fetch):
292295
assert await is_kernel_running(jp_fetch, kernel_id) is False
293296

294297

298+
@pytest.mark.parametrize("missing_kernel", [True, False])
299+
async def test_gateway_shutdown(init_gateway, jp_serverapp, jp_fetch, missing_kernel):
300+
# Validate server shutdown when multiple gateway kernels are present or
301+
# we've lost track of at least one (missing) kernel
302+
303+
# create two kernels
304+
k1 = await create_kernel(jp_fetch, "kspec_bar")
305+
k2 = await create_kernel(jp_fetch, "kspec_bar")
306+
307+
# ensure they're considered running
308+
assert await is_kernel_running(jp_fetch, k1) is True
309+
assert await is_kernel_running(jp_fetch, k2) is True
310+
311+
if missing_kernel:
312+
running_kernels.pop(k1) # "terminate" kernel w/o our knowledge
313+
314+
with mocked_gateway:
315+
await jp_serverapp.kernel_manager.shutdown_all()
316+
317+
assert await is_kernel_running(jp_fetch, k1) is False
318+
assert await is_kernel_running(jp_fetch, k2) is False
319+
320+
295321
#
296322
# Test methods below...
297323
#

0 commit comments

Comments
 (0)