Skip to content

Commit 2d05335

Browse files
authored
Merge pull request #34 from MSeal/zombieProcFix
Zombie Process Fix
2 parents ab0162f + e91436b commit 2d05335

File tree

9 files changed

+165
-61
lines changed

9 files changed

+165
-61
lines changed

.travis.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@ language: python
22
sudo: false
33
matrix:
44
include:
5-
- python: 3.5
6-
env: TOXENV=py35
75
- python: 3.6
86
env: TOXENV=py36
97
- python: 3.7

README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/jupyter/nbclient/master?filepath=binder%2Frun_nbclient.ipynb)
22
[![Travis Build Status](https://travis-ci.org/jupyter/nbclient.svg?branch=master)](https://travis-ci.org/jupyter/nbclient)
33
[![image](https://codecov.io/github/jupyter/nbclient/coverage.svg?branch=master)](https://codecov.io/github/jupyter/nbclient?branch=master)
4-
[![Python 3.5](https://img.shields.io/badge/python-3.5-blue.svg)](https://www.python.org/downloads/release/python-350/)
54
[![Python 3.6](https://img.shields.io/badge/python-3.6-blue.svg)](https://www.python.org/downloads/release/python-360/)
65
[![Python 3.7](https://img.shields.io/badge/python-3.7-blue.svg)](https://www.python.org/downloads/release/python-370/)
76
[![Python 3.8](https://img.shields.io/badge/python-3.8-blue.svg)](https://www.python.org/downloads/release/python-380/)
@@ -26,7 +25,7 @@ This library used to be part of [nbconvert](https://nbconvert.readthedocs.io/en/
2625

2726
## Python Version Support
2827

29-
This library currently supports python 3.5+ versions. As minor python
28+
This library currently supports python 3.6+ versions. As minor python
3029
versions are officially sunset by the python org nbclient will similarly
3130
drop support in the future.
3231

docs/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ applications.
2525
Python Version Support
2626
----------------------
2727

28-
This library currently supports python 3.5+ verisons. As minor python
28+
This library currently supports python 3.6+ verisons. As minor python
2929
versions are officially sunset by the python org nbclient will similarly
3030
drop support in the future.
3131

nbclient/client.py

Lines changed: 107 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,8 @@
22
import base64
33
from textwrap import dedent
44

5-
# For python 3.5 compatibility we import asynccontextmanager from async_generator instead of
6-
# contextlib, and we `await yield_()` instead of just `yield`
7-
from async_generator import asynccontextmanager, async_generator, yield_
5+
from async_generator import asynccontextmanager
6+
from contextlib import contextmanager
87

98
from time import monotonic
109
from queue import Empty
@@ -15,8 +14,14 @@
1514

1615
from nbformat.v4 import output_from_msg
1716

18-
from .exceptions import CellTimeoutError, DeadKernelError, CellExecutionComplete, CellExecutionError
19-
from .util import run_sync
17+
from .exceptions import (
18+
CellControlSignal,
19+
CellTimeoutError,
20+
DeadKernelError,
21+
CellExecutionComplete,
22+
CellExecutionError
23+
)
24+
from .util import run_sync, ensure_async
2025

2126

2227
def timestamp():
@@ -324,7 +329,28 @@ def start_kernel_manager(self):
324329
self.km.client_class = 'jupyter_client.asynchronous.AsyncKernelClient'
325330
return self.km
326331

327-
async def start_new_kernel_client(self, **kwargs):
332+
async def _async_cleanup_kernel(self):
333+
try:
334+
# Send a polite shutdown request
335+
await ensure_async(self.kc.shutdown())
336+
try:
337+
# Queue the manager to kill the process, sometimes the built-in and above
338+
# shutdowns have not been successful or called yet, so give a direct kill
339+
# call here and recover gracefully if it's already dead.
340+
await ensure_async(self.km.shutdown_kernel(now=True))
341+
except RuntimeError as e:
342+
# The error isn't specialized, so we have to check the message
343+
if 'No kernel is running!' not in str(e):
344+
raise
345+
finally:
346+
# Remove any state left over even if we failed to stop the kernel
347+
await ensure_async(self.km.cleanup())
348+
await ensure_async(self.kc.stop_channels())
349+
self.kc = None
350+
351+
_cleanup_kernel = run_sync(_async_cleanup_kernel)
352+
353+
async def async_start_new_kernel_client(self, **kwargs):
328354
"""Creates a new kernel client.
329355
330356
Parameters
@@ -346,22 +372,43 @@ async def start_new_kernel_client(self, **kwargs):
346372
if self.km.ipykernel and self.ipython_hist_file:
347373
self.extra_arguments += ['--HistoryManager.hist_file={}'.format(self.ipython_hist_file)]
348374

349-
await self.km.start_kernel(extra_arguments=self.extra_arguments, **kwargs)
375+
await ensure_async(self.km.start_kernel(extra_arguments=self.extra_arguments, **kwargs))
350376

351377
self.kc = self.km.client()
352-
self.kc.start_channels()
378+
await ensure_async(self.kc.start_channels())
353379
try:
354-
await self.kc.wait_for_ready(timeout=self.startup_timeout)
380+
await ensure_async(self.kc.wait_for_ready(timeout=self.startup_timeout))
355381
except RuntimeError:
356-
self.kc.stop_channels()
357-
await self.km.shutdown_kernel()
382+
await self._async_cleanup_kernel()
358383
raise
359384
self.kc.allow_stdin = False
360385
return self.kc
361386

387+
start_new_kernel_client = run_sync(async_start_new_kernel_client)
388+
389+
@contextmanager
390+
def setup_kernel(self, **kwargs):
391+
"""
392+
Context manager for setting up the kernel to execute a notebook.
393+
394+
The assigns the Kernel Manager (`self.km`) if missing and Kernel Client(`self.kc`).
395+
396+
When control returns from the yield it stops the client's zmq channels, and shuts
397+
down the kernel.
398+
"""
399+
# Can't use run_until_complete on an asynccontextmanager function :(
400+
if self.km is None:
401+
self.start_kernel_manager()
402+
403+
if not self.km.has_kernel:
404+
self.start_new_kernel_client(**kwargs)
405+
try:
406+
yield
407+
finally:
408+
self._cleanup_kernel()
409+
362410
@asynccontextmanager
363-
@async_generator # needed for python 3.5 compatibility
364-
async def setup_kernel(self, **kwargs):
411+
async def async_setup_kernel(self, **kwargs):
365412
"""
366413
Context manager for setting up the kernel to execute a notebook.
367414
@@ -374,12 +421,11 @@ async def setup_kernel(self, **kwargs):
374421
self.start_kernel_manager()
375422

376423
if not self.km.has_kernel:
377-
await self.start_new_kernel_client(**kwargs)
424+
await self.async_start_new_kernel_client(**kwargs)
378425
try:
379-
await yield_(None) # would just yield in python >3.5
426+
yield
380427
finally:
381-
self.kc.stop_channels()
382-
self.kc = None
428+
await self._async_cleanup_kernel()
383429

384430
async def async_execute(self, **kwargs):
385431
"""
@@ -392,15 +438,16 @@ async def async_execute(self, **kwargs):
392438
"""
393439
self.reset_execution_trackers()
394440

395-
async with self.setup_kernel(**kwargs):
441+
async with self.async_setup_kernel(**kwargs):
396442
self.log.info("Executing notebook with kernel: %s" % self.kernel_name)
397443
for index, cell in enumerate(self.nb.cells):
398444
# Ignore `'execution_count' in content` as it's always 1
399445
# when store_history is False
400446
await self.async_execute_cell(
401447
cell, index, execution_count=self.code_cells_executed + 1
402448
)
403-
info_msg = await self._wait_for_reply(self.kc.kernel_info())
449+
msg_id = await ensure_async(self.kc.kernel_info())
450+
info_msg = await self.async_wait_for_reply(msg_id)
404451
self.nb.metadata['language_info'] = info_msg['content']['language_info']
405452
self.set_widgets_metadata()
406453

@@ -450,12 +497,12 @@ def _update_display_id(self, display_id, msg):
450497
outputs[output_idx]['data'] = out['data']
451498
outputs[output_idx]['metadata'] = out['metadata']
452499

453-
async def _poll_for_reply(self, msg_id, cell, timeout, task_poll_output_msg):
500+
async def _async_poll_for_reply(self, msg_id, cell, timeout, task_poll_output_msg):
454501
if timeout is not None:
455502
deadline = monotonic() + timeout
456503
while True:
457504
try:
458-
msg = await self.kc.shell_channel.get_msg(timeout=timeout)
505+
msg = await ensure_async(self.kc.shell_channel.get_msg(timeout=timeout))
459506
if msg['parent_header'].get('msg_id') == msg_id:
460507
if self.record_timing:
461508
cell['metadata']['execution']['shell.execute_reply'] = timestamp()
@@ -474,12 +521,12 @@ async def _poll_for_reply(self, msg_id, cell, timeout, task_poll_output_msg):
474521
timeout = max(0, deadline - monotonic())
475522
except Empty:
476523
# received no message, check if kernel is still alive
477-
await self._check_alive()
478-
await self._handle_timeout(timeout, cell)
524+
await self._async_check_alive()
525+
await self._async_handle_timeout(timeout, cell)
479526

480-
async def _poll_output_msg(self, parent_msg_id, cell, cell_index):
527+
async def _async_poll_output_msg(self, parent_msg_id, cell, cell_index):
481528
while True:
482-
msg = await self.kc.iopub_channel.get_msg(timeout=None)
529+
msg = await ensure_async(self.kc.iopub_channel.get_msg(timeout=None))
483530
if msg['parent_header'].get('msg_id') == parent_msg_id:
484531
try:
485532
# Will raise CellExecutionComplete when completed
@@ -498,39 +545,46 @@ def _get_timeout(self, cell):
498545

499546
return timeout
500547

501-
async def _handle_timeout(self, timeout, cell=None):
548+
async def _async_handle_timeout(self, timeout, cell=None):
502549
self.log.error("Timeout waiting for execute reply (%is)." % timeout)
503550
if self.interrupt_on_timeout:
504551
self.log.error("Interrupting kernel")
505-
await self.km.interrupt_kernel()
552+
await ensure_async(self.km.interrupt_kernel())
506553
else:
507554
raise CellTimeoutError.error_from_timeout_and_cell(
508555
"Cell execution timed out", timeout, cell
509556
)
510557

511-
async def _check_alive(self):
512-
if not await self.kc.is_alive():
558+
async def _async_check_alive(self):
559+
if not await ensure_async(self.kc.is_alive()):
513560
self.log.error("Kernel died while waiting for execute reply.")
514561
raise DeadKernelError("Kernel died")
515562

516-
async def _wait_for_reply(self, msg_id, cell=None):
563+
async def async_wait_for_reply(self, msg_id, cell=None):
517564
# wait for finish, with timeout
518565
timeout = self._get_timeout(cell)
519566
cummulative_time = 0
520-
self.shell_timeout_interval = 5
521567
while True:
522568
try:
523-
msg = await self.kc.shell_channel.get_msg(timeout=self.shell_timeout_interval)
569+
msg = await ensure_async(
570+
self.kc.shell_channel.get_msg(
571+
timeout=self.shell_timeout_interval
572+
)
573+
)
524574
except Empty:
525-
await self._check_alive()
575+
await self._async_check_alive()
526576
cummulative_time += self.shell_timeout_interval
527577
if timeout and cummulative_time > timeout:
528-
await self._handle_timeout(timeout, cell)
578+
await self._async_async_handle_timeout(timeout, cell)
529579
break
530580
else:
531581
if msg['parent_header'].get('msg_id') == msg_id:
532582
return msg
533583

584+
wait_for_reply = run_sync(async_wait_for_reply)
585+
# Backwards compatability naming for papermill
586+
_wait_for_reply = wait_for_reply
587+
534588
def _timeout_with_deadline(self, timeout, deadline):
535589
if deadline is not None and deadline - monotonic() < timeout:
536590
timeout = deadline - monotonic()
@@ -596,8 +650,12 @@ async def async_execute_cell(self, cell, cell_index, execution_count=None, store
596650
cell['metadata']['execution'] = {}
597651

598652
self.log.debug("Executing cell:\n%s", cell.source)
599-
parent_msg_id = self.kc.execute(
600-
cell.source, store_history=store_history, stop_on_error=not self.allow_errors
653+
parent_msg_id = await ensure_async(
654+
self.kc.execute(
655+
cell.source,
656+
store_history=store_history,
657+
stop_on_error=not self.allow_errors
658+
)
601659
)
602660
# We launched a code cell to execute
603661
self.code_cells_executed += 1
@@ -607,11 +665,20 @@ async def async_execute_cell(self, cell, cell_index, execution_count=None, store
607665
self.clear_before_next_output = False
608666

609667
task_poll_output_msg = asyncio.ensure_future(
610-
self._poll_output_msg(parent_msg_id, cell, cell_index)
611-
)
612-
exec_reply = await self._poll_for_reply(
613-
parent_msg_id, cell, exec_timeout, task_poll_output_msg
668+
self._async_poll_output_msg(parent_msg_id, cell, cell_index)
614669
)
670+
try:
671+
exec_reply = await self._async_poll_for_reply(
672+
parent_msg_id, cell, exec_timeout, task_poll_output_msg
673+
)
674+
except Exception as e:
675+
# Best effort to cancel request if it hasn't been resolved
676+
try:
677+
# Check if the task_poll_output is doing the raising for us
678+
if not isinstance(e, CellControlSignal):
679+
task_poll_output_msg.cancel()
680+
finally:
681+
raise
615682

616683
if execution_count:
617684
cell['execution_count'] = execution_count

nbclient/exceptions.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,13 @@
1-
class CellTimeoutError(TimeoutError):
1+
class CellControlSignal(Exception):
2+
"""
3+
A custom exception used to indicate that the exception is used for cell
4+
control actions (not the best model, but it's needed to cover existing
5+
behavior without major refactors).
6+
"""
7+
pass
8+
9+
10+
class CellTimeoutError(TimeoutError, CellControlSignal):
211
"""
312
A custom exception to capture when a cell has timed out during execution.
413
"""
@@ -21,7 +30,7 @@ class DeadKernelError(RuntimeError):
2130
pass
2231

2332

24-
class CellExecutionComplete(Exception):
33+
class CellExecutionComplete(CellControlSignal):
2534
"""
2635
Used as a control signal for cell execution across execute_cell and
2736
process_message function calls. Raised when all execution requests
@@ -32,7 +41,7 @@ class CellExecutionComplete(Exception):
3241
pass
3342

3443

35-
class CellExecutionError(Exception):
44+
class CellExecutionError(CellControlSignal):
3645
"""
3746
Custom exception to propagate exceptions that are raised during
3847
notebook execution to the caller. This is mostly useful when

0 commit comments

Comments
 (0)