Skip to content

Commit dcde5cf

Browse files
authored
Merge pull request #874 from minrk/safer-killpg
Only kill children in process group at shutdown
2 parents 78c83ad + 5c16fde commit dcde5cf

File tree

6 files changed

+132
-68
lines changed

6 files changed

+132
-68
lines changed

ipykernel/debugger.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,14 @@
1919

2020
from .compiler import (get_file_name, get_tmp_directory, get_tmp_hash_seed)
2121

22-
# This import is required to have the next ones working...
23-
from debugpy.server import api # noqa
24-
from _pydevd_bundle import pydevd_frame_utils
25-
from _pydevd_bundle.pydevd_suspended_frames import SuspendedFramesManager, _FramesTracker
22+
try:
23+
# This import is required to have the next ones working...
24+
from debugpy.server import api # noqa
25+
from _pydevd_bundle import pydevd_frame_utils
26+
from _pydevd_bundle.pydevd_suspended_frames import SuspendedFramesManager, _FramesTracker
27+
_is_debugpy_available = True
28+
except ImportError:
29+
_is_debugpy_available = False
2630

2731
# Required for backwards compatiblity
2832
ROUTING_ID = getattr(zmq, 'ROUTING_ID', None) or zmq.IDENTITY

ipykernel/ipkernel.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from .zmqshell import ZMQInteractiveShell
1919
from .eventloops import _use_appnope
2020
from .compiler import XCachingCompiler
21+
from .debugger import Debugger, _is_debugpy_available
2122

2223
try:
2324
from IPython.core.interactiveshell import _asyncio_runner
@@ -33,12 +34,6 @@
3334
except ImportError:
3435
_use_experimental_60_completion = False
3536

36-
try:
37-
import debugpy
38-
from .debugger import Debugger
39-
_is_debugpy_available = True
40-
except ImportError:
41-
_is_debugpy_available = False
4237

4338
_EXPERIMENTAL_KEY_NAME = '_jupyter_types_experimental'
4439

ipykernel/kernelbase.py

Lines changed: 46 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,6 @@
2424
SIGKILL = "windown-SIGKILL-sentinel"
2525

2626

27-
try:
28-
import psutil
29-
except ImportError:
30-
psutil = None
3127

3228

3329
try:
@@ -37,6 +33,7 @@
3733
# jupyter_client < 5, use local now()
3834
now = datetime.now
3935

36+
import psutil
4037
import zmq
4138
from IPython.core.error import StdinNotImplementedError
4239
from jupyter_client.session import Session
@@ -808,7 +805,8 @@ def _send_interupt_children(self):
808805
pid = os.getpid()
809806
pgid = os.getpgid(pid)
810807
# Prefer process-group over process
811-
if pgid and hasattr(os, "killpg"):
808+
# but only if the kernel is the leader of the process group
809+
if pgid and pgid == pid and hasattr(os, "killpg"):
812810
try:
813811
os.killpg(pgid, SIGINT)
814812
return
@@ -897,8 +895,6 @@ async def usage_request(self, stream, ident, parent):
897895
reply_content = {
898896
'hostname': socket.gethostname()
899897
}
900-
if psutil is None:
901-
return reply_content
902898
current_process = psutil.Process()
903899
all_processes = [current_process] + current_process.children(recursive=True)
904900
process_metric_value = self.get_process_metric_value
@@ -1136,67 +1132,62 @@ def _input_request(self, prompt, ident, parent, password=False):
11361132
raise EOFError
11371133
return value
11381134

1139-
def _killpg(self, signal):
1135+
def _signal_children(self, signum):
11401136
"""
1141-
similar to killpg but use psutil if it can on windows
1142-
or if pgid is none
1137+
Send a signal to all our children
11431138
1139+
Like `killpg`, but does not include the current process
1140+
(or possible parents).
11441141
"""
1145-
pgid = os.getpgid(os.getpid())
1146-
if pgid and hasattr(os, "killpg"):
1142+
for p in self._process_children():
1143+
self.log.debug(f"Sending {Signals(signum)!r} to subprocess {p}")
11471144
try:
1148-
os.killpg(pgid, signal)
1149-
except (OSError) as e:
1150-
self.log.exception(f"OSError running killpg, not killing children.")
1151-
return
1152-
elif psutil is not None:
1153-
children = parent.children(recursive=True)
1154-
for p in children:
1155-
try:
1156-
if signal == SIGTERM:
1157-
p.terminate()
1158-
elif signal == SIGKILL:
1159-
p.kill()
1160-
except psutil.NoSuchProcess:
1161-
pass
1145+
if signum == SIGTERM:
1146+
p.terminate()
1147+
elif signum == SIGKILL:
1148+
p.kill()
1149+
else:
1150+
p.send_signal(signum)
1151+
except psutil.NoSuchProcess:
1152+
pass
11621153

1163-
async def _progressively_terminate_all_children(self):
1154+
def _process_children(self):
1155+
"""Retrieve child processes in the kernel's process group
11641156
1165-
pgid = os.getpgid(os.getpid())
1166-
if psutil is None:
1167-
# blindly send quickly sigterm/sigkill to processes if psutil not there.
1168-
self.log.info("Please install psutil for a cleaner subprocess shutdown.")
1169-
self._send_interupt_children()
1170-
await asyncio.sleep(0.05)
1171-
self.log.debug("Sending SIGTERM to {pgid}")
1172-
self._killpg(SIGTERM)
1173-
await asyncio.sleep(0.05)
1174-
self.log.debug("Sending SIGKILL to {pgid}")
1175-
self._killpg(pgid, SIGKILL)
1157+
Avoids:
1158+
- including parents and self with killpg
1159+
- including all children that may have forked-off a new group
1160+
"""
1161+
kernel_process = psutil.Process()
1162+
all_children = kernel_process.children(recursive=True)
1163+
if os.name == "nt":
1164+
return all_children
1165+
kernel_pgid = os.getpgrp()
1166+
process_group_children = []
1167+
for child in all_children:
1168+
try:
1169+
child_pgid = os.getpgid(child.pid)
1170+
except OSError:
1171+
pass
1172+
else:
1173+
if child_pgid == kernel_pgid:
1174+
process_group_children.append(child)
1175+
return process_group_children
11761176

1177+
async def _progressively_terminate_all_children(self):
11771178
sleeps = (0.01, 0.03, 0.1, 0.3, 1, 3, 10)
1178-
children = psutil.Process().children(recursive=True)
1179-
if not children:
1179+
if not self._process_children():
11801180
self.log.debug("Kernel has no children.")
11811181
return
1182-
self.log.debug(f"Trying to interrupt then kill subprocesses : {children}")
1183-
self._send_interupt_children()
11841182

11851183
for signum in (SIGTERM, SIGKILL):
1186-
self.log.debug(
1187-
f"Will try to send {signum} ({Signals(signum)!r}) to subprocesses :{children}"
1188-
)
11891184
for delay in sleeps:
1190-
children = psutil.Process().children(recursive=True)
1191-
try:
1192-
if not children:
1193-
self.log.warning(
1194-
"No more children, continuing shutdown routine."
1195-
)
1196-
return
1197-
except psutil.NoSuchProcess:
1198-
pass
1199-
self._killpg(15)
1185+
children = self._process_children()
1186+
if not children:
1187+
self.log.debug("No more children, continuing shutdown routine.")
1188+
return
1189+
# signals only children, not current process
1190+
self._signal_children(signum)
12001191
self.log.debug(
12011192
f"Will sleep {delay}s before checking for children and retrying. {children}"
12021193
)

ipykernel/kernelspec.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
from jupyter_client.kernelspec import KernelSpecManager
1515

16-
from .ipkernel import _is_debugpy_available
16+
from .debugger import _is_debugpy_available
1717

1818
pjoin = os.path.join
1919

ipykernel/tests/test_kernel.py

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,16 @@
66
import ast
77
import os.path
88
import platform
9+
import signal
910
import subprocess
1011
import sys
1112
import time
13+
from subprocess import Popen
1214
from tempfile import TemporaryDirectory
1315

1416
from flaky import flaky
17+
import psutil
1518
import pytest
16-
from packaging import version
1719

1820
import IPython
1921
from IPython.paths import locate_profile
@@ -496,3 +498,75 @@ def test_control_thread_priority():
496498
# comparing first to last ought to be enough, since queues preserve order
497499
# use <= in case of very-fast handling and/or low resolution timers
498500
assert control_dates[-1] <= shell_dates[0]
501+
502+
503+
def _child():
504+
print("in child", os.getpid())
505+
506+
def _print_and_exit(sig, frame):
507+
print(f"Received signal {sig}")
508+
# take some time so retries are triggered
509+
time.sleep(0.5)
510+
sys.exit(-sig)
511+
512+
signal.signal(signal.SIGTERM, _print_and_exit)
513+
time.sleep(30)
514+
515+
516+
def _start_children():
517+
ip = IPython.get_ipython()
518+
ns = ip.user_ns
519+
520+
cmd = [sys.executable, "-c", f"from {__name__} import _child; _child()"]
521+
child_pg = Popen(cmd, start_new_session=False)
522+
child_newpg = Popen(cmd, start_new_session=True)
523+
ns["pid"] = os.getpid()
524+
ns["child_pg"] = child_pg.pid
525+
ns["child_newpg"] = child_newpg.pid
526+
# give them time to start up and register signal handlers
527+
time.sleep(1)
528+
529+
530+
@pytest.mark.skipif(
531+
platform.python_implementation() == "PyPy",
532+
reason="does not work on PyPy",
533+
)
534+
def test_shutdown_subprocesses():
535+
"""Kernel exits after polite shutdown_request"""
536+
with new_kernel() as kc:
537+
km = kc.parent
538+
msg_id, reply = execute(
539+
f"from {__name__} import _start_children\n_start_children()",
540+
kc=kc,
541+
user_expressions={
542+
"pid": "pid",
543+
"child_pg": "child_pg",
544+
"child_newpg": "child_newpg",
545+
},
546+
)
547+
print(reply)
548+
expressions = reply["user_expressions"]
549+
kernel_process = psutil.Process(int(expressions["pid"]["data"]["text/plain"]))
550+
child_pg = psutil.Process(int(expressions["child_pg"]["data"]["text/plain"]))
551+
child_newpg = psutil.Process(
552+
int(expressions["child_newpg"]["data"]["text/plain"])
553+
)
554+
wait_for_idle(kc)
555+
556+
kc.shutdown()
557+
for i in range(300): # 30s timeout
558+
if km.is_alive():
559+
time.sleep(0.1)
560+
else:
561+
break
562+
assert not km.is_alive()
563+
assert not kernel_process.is_running()
564+
# child in the process group shut down
565+
assert not child_pg.is_running()
566+
# child outside the process group was not shut down (unix only)
567+
if os.name != 'nt':
568+
assert child_newpg.is_running()
569+
try:
570+
child_newpg.terminate()
571+
except psutil.NoSuchProcess:
572+
pass

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def run(self):
6868
'tornado>=4.2,<7.0',
6969
'matplotlib-inline>=0.1.0,<0.2.0',
7070
'appnope;platform_system=="Darwin"',
71-
'psutil;platform_system=="Windows"',
71+
'psutil',
7272
'nest_asyncio',
7373
],
7474
extras_require={

0 commit comments

Comments
 (0)