@@ -294,15 +294,19 @@ def shutdown_kernel(self, kernel_id, now=False, restart=False):
294294 kernel ._activity_stream .close ()
295295 kernel ._activity_stream = None
296296 self .stop_buffering (kernel_id )
297- self ._kernel_connections .pop (kernel_id , None )
298297
299298 # Decrease the metric of number of kernels
300299 # running for the relevant kernel type by 1
301300 KERNEL_CURRENTLY_RUNNING_TOTAL .labels (
302301 type = self ._kernels [kernel_id ].kernel_name
303302 ).dec ()
304303
305- return self .pinned_superclass .shutdown_kernel (self , kernel_id , now = now , restart = restart )
304+ self .pinned_superclass .shutdown_kernel (self , kernel_id , now = now , restart = restart )
305+ # Unlike its async sibling method in AsyncMappingKernelManager, removing the kernel_id
306+ # from the connections dictionary isn't as problematic before the shutdown since the
307+ # method is synchronous. However, we'll keep the relative call orders the same from
308+ # a maintenance perspective.
309+ self ._kernel_connections .pop (kernel_id , None )
306310
307311 async def restart_kernel (self , kernel_id , now = False ):
308312 """Restart a kernel by kernel_id"""
@@ -376,8 +380,11 @@ def list_kernels(self):
376380 kernels = []
377381 kernel_ids = self .pinned_superclass .list_kernel_ids (self )
378382 for kernel_id in kernel_ids :
379- model = self .kernel_model (kernel_id )
380- kernels .append (model )
383+ try :
384+ model = self .kernel_model (kernel_id )
385+ kernels .append (model )
386+ except (web .HTTPError , KeyError ):
387+ pass # Probably due to a (now) non-existent kernel, continue building the list
381388 return kernels
382389
383390 # override _check_kernel_id to raise 404 instead of KeyError
@@ -498,12 +505,16 @@ async def shutdown_kernel(self, kernel_id, now=False, restart=False):
498505 kernel ._activity_stream .close ()
499506 kernel ._activity_stream = None
500507 self .stop_buffering (kernel_id )
501- self ._kernel_connections .pop (kernel_id , None )
502508
503509 # Decrease the metric of number of kernels
504510 # running for the relevant kernel type by 1
505511 KERNEL_CURRENTLY_RUNNING_TOTAL .labels (
506512 type = self ._kernels [kernel_id ].kernel_name
507513 ).dec ()
508514
509- return await self .pinned_superclass .shutdown_kernel (self , kernel_id , now = now , restart = restart )
515+ await self .pinned_superclass .shutdown_kernel (self , kernel_id , now = now , restart = restart )
516+ # Remove kernel_id from the connections dictionary only after kernel has been shutdown,
517+ # otherwise a race condition can occur since the shutdown may take a while - allowing
518+ # list/fetch kernel operations to access _kernel_connections for a non-existent key
519+ # (kernel_id) while "awaiting" the result of the shutdown.
520+ self ._kernel_connections .pop (kernel_id , None )
0 commit comments