Skip to content

Commit 511e8e0

Browse files
committed
Only kill children in process group at shutdown
- not the whole process group, which includes self and possibly parents - not children which have started their own process groups
1 parent 78c83ad commit 511e8e0

File tree

2 files changed

+137
-48
lines changed

2 files changed

+137
-48
lines changed

ipykernel/kernelbase.py

Lines changed: 54 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -808,7 +808,8 @@ def _send_interupt_children(self):
808808
pid = os.getpid()
809809
pgid = os.getpgid(pid)
810810
# Prefer process-group over process
811-
if pgid and hasattr(os, "killpg"):
811+
# but only if the kernel is the leader of the process group
812+
if pgid and pgid == pid and hasattr(os, "killpg"):
812813
try:
813814
os.killpg(pgid, SIGINT)
814815
return
@@ -1136,67 +1137,73 @@ def _input_request(self, prompt, ident, parent, password=False):
11361137
raise EOFError
11371138
return value
11381139

1139-
def _killpg(self, signal):
1140+
def _signal_children(self, signum):
11401141
"""
1141-
similar to killpg but use psutil if it can on windows
1142-
or if pgid is none
1142+
Send a signal to all our children
11431143
1144+
Like `killpg`, but does not include the current process
1145+
(or possible parents).
11441146
"""
1145-
pgid = os.getpgid(os.getpid())
1146-
if pgid and hasattr(os, "killpg"):
1147-
try:
1148-
os.killpg(pgid, signal)
1149-
except (OSError) as e:
1150-
self.log.exception(f"OSError running killpg, not killing children.")
1147+
if psutil is None:
1148+
self.log.info("Need psutil to signal children")
11511149
return
1152-
elif psutil is not None:
1153-
children = parent.children(recursive=True)
1154-
for p in children:
1155-
try:
1156-
if signal == SIGTERM:
1157-
p.terminate()
1158-
elif signal == SIGKILL:
1159-
p.kill()
1160-
except psutil.NoSuchProcess:
1161-
pass
11621150

1163-
async def _progressively_terminate_all_children(self):
1151+
for p in self._process_children():
1152+
self.log.debug(f"Sending {Signals(signum)!r} to subprocess {p}")
1153+
try:
1154+
if signum == SIGTERM:
1155+
p.terminate()
1156+
elif signum == SIGKILL:
1157+
p.kill()
1158+
else:
1159+
p.send_signal(signum)
1160+
except psutil.NoSuchProcess:
1161+
pass
1162+
1163+
def _process_children(self):
1164+
"""Retrieve child processes in the kernel's process group
1165+
1166+
Avoids:
1167+
- including parents and self with killpg
1168+
- including all children that may have forked-off a new group
1169+
"""
1170+
if psutil is None:
1171+
return []
1172+
kernel_process = psutil.Process()
1173+
all_children = kernel_process.children(recursive=True)
1174+
if os.name == "nt":
1175+
return all_children
1176+
kernel_pgid = os.getpgrp()
1177+
process_group_children = []
1178+
for child in all_children:
1179+
try:
1180+
child_pgid = os.getpgid(child.pid)
1181+
except OSError:
1182+
pass
1183+
else:
1184+
if child_pgid == kernel_pgid:
1185+
process_group_children.append(child)
1186+
return process_group_children
11641187

1165-
pgid = os.getpgid(os.getpid())
1188+
async def _progressively_terminate_all_children(self):
11661189
if psutil is None:
1167-
# blindly send quickly sigterm/sigkill to processes if psutil not there.
1190+
# we need psutil to safely clean up children
11681191
self.log.info("Please install psutil for a cleaner subprocess shutdown.")
1169-
self._send_interupt_children()
1170-
await asyncio.sleep(0.05)
1171-
self.log.debug("Sending SIGTERM to {pgid}")
1172-
self._killpg(SIGTERM)
1173-
await asyncio.sleep(0.05)
1174-
self.log.debug("Sending SIGKILL to {pgid}")
1175-
self._killpg(pgid, SIGKILL)
1192+
return
11761193

11771194
sleeps = (0.01, 0.03, 0.1, 0.3, 1, 3, 10)
1178-
children = psutil.Process().children(recursive=True)
1179-
if not children:
1195+
if not self._process_children():
11801196
self.log.debug("Kernel has no children.")
11811197
return
1182-
self.log.debug(f"Trying to interrupt then kill subprocesses : {children}")
1183-
self._send_interupt_children()
11841198

11851199
for signum in (SIGTERM, SIGKILL):
1186-
self.log.debug(
1187-
f"Will try to send {signum} ({Signals(signum)!r}) to subprocesses :{children}"
1188-
)
11891200
for delay in sleeps:
1190-
children = psutil.Process().children(recursive=True)
1191-
try:
1192-
if not children:
1193-
self.log.warning(
1194-
"No more children, continuing shutdown routine."
1195-
)
1196-
return
1197-
except psutil.NoSuchProcess:
1198-
pass
1199-
self._killpg(15)
1201+
children = self._process_children()
1202+
if not children:
1203+
self.log.debug("No more children, continuing shutdown routine.")
1204+
return
1205+
# signals only children, not current process
1206+
self._signal_children(signum)
12001207
self.log.debug(
12011208
f"Will sleep {delay}s before checking for children and retrying. {children}"
12021209
)

ipykernel/tests/test_kernel.py

Lines changed: 83 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,20 @@
66
import ast
77
import os.path
88
import platform
9+
import signal
910
import subprocess
1011
import sys
1112
import time
13+
from subprocess import Popen
1214
from tempfile import TemporaryDirectory
1315

1416
from flaky import flaky
1517
import pytest
16-
from packaging import version
18+
19+
try:
20+
import psutil
21+
except ImportError:
22+
psutil = None
1723

1824
import IPython
1925
from IPython.paths import locate_profile
@@ -496,3 +502,79 @@ def test_control_thread_priority():
496502
# comparing first to last ought to be enough, since queues preserve order
497503
# use <= in case of very-fast handling and/or low resolution timers
498504
assert control_dates[-1] <= shell_dates[0]
505+
506+
507+
def _child():
508+
print("in child", os.getpid())
509+
510+
def _print_and_exit(sig, frame):
511+
print(f"Received signal {sig}")
512+
# take some time so retries are triggered
513+
time.sleep(0.5)
514+
sys.exit(-sig)
515+
516+
signal.signal(signal.SIGTERM, _print_and_exit)
517+
time.sleep(30)
518+
519+
520+
def _start_children():
521+
ip = IPython.get_ipython()
522+
ns = ip.user_ns
523+
524+
cmd = [sys.executable, "-c", f"from {__name__} import _child; _child()"]
525+
child_pg = Popen(cmd, start_new_session=False)
526+
child_newpg = Popen(cmd, start_new_session=True)
527+
ns["pid"] = os.getpid()
528+
ns["child_pg"] = child_pg.pid
529+
ns["child_newpg"] = child_newpg.pid
530+
# give them time to start up and register signal handlers
531+
time.sleep(1)
532+
533+
534+
@pytest.mark.skipif(
535+
platform.python_implementation() == "PyPy",
536+
reason="does not work on PyPy",
537+
)
538+
@pytest.mark.skipif(
539+
psutil is None,
540+
reason="requires psutil",
541+
)
542+
def test_shutdown_subprocesses():
543+
"""Kernel exits after polite shutdown_request"""
544+
with new_kernel() as kc:
545+
km = kc.parent
546+
msg_id, reply = execute(
547+
f"from {__name__} import _start_children\n_start_children()",
548+
kc=kc,
549+
user_expressions={
550+
"pid": "pid",
551+
"child_pg": "child_pg",
552+
"child_newpg": "child_newpg",
553+
},
554+
)
555+
print(reply)
556+
expressions = reply["user_expressions"]
557+
kernel_process = psutil.Process(int(expressions["pid"]["data"]["text/plain"]))
558+
child_pg = psutil.Process(int(expressions["child_pg"]["data"]["text/plain"]))
559+
child_newpg = psutil.Process(
560+
int(expressions["child_newpg"]["data"]["text/plain"])
561+
)
562+
wait_for_idle(kc)
563+
564+
kc.shutdown()
565+
for i in range(300): # 30s timeout
566+
if km.is_alive():
567+
time.sleep(0.1)
568+
else:
569+
break
570+
assert not km.is_alive()
571+
assert not kernel_process.is_running()
572+
# child in the process group shut down
573+
assert not child_pg.is_running()
574+
# child outside the process group was not shut down (unix only)
575+
if os.name != 'nt':
576+
assert child_newpg.is_running()
577+
try:
578+
child_newpg.terminate()
579+
except psutil.NoSuchProcess:
580+
pass

0 commit comments

Comments
 (0)