Skip to content

Commit 6c803a7

Browse files
committed
Add tests for restarter
One simple test for recovery after a single "crash" + one test of handling "infinitely" crashing kernels, i.e. that the restart limit is respected.
1 parent f453b51 commit 6c803a7

File tree

2 files changed

+303
-0
lines changed

2 files changed

+303
-0
lines changed

jupyter_client/tests/problemkernel.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
"""Test kernel for signalling subprocesses"""
2+
# Copyright (c) Jupyter Development Team.
3+
# Distributed under the terms of the Modified BSD License.
4+
import os
5+
import signal
6+
import time
7+
from subprocess import PIPE
8+
from subprocess import Popen
9+
10+
from ipykernel.displayhook import ZMQDisplayHook
11+
from ipykernel.kernelapp import IPKernelApp
12+
from ipykernel.kernelbase import Kernel
13+
14+
15+
class ProblemTestKernel(Kernel):
16+
"""Kernel for testing kernel problems"""
17+
18+
implementation = "problemtest"
19+
implementation_version = "0.0"
20+
banner = ""
21+
22+
23+
class ProblemTestApp(IPKernelApp):
24+
kernel_class = ProblemTestKernel
25+
26+
def init_io(self):
27+
# Overridden to disable stdout/stderr capture
28+
self.displayhook = ZMQDisplayHook(self.session, self.iopub_socket)
29+
30+
def init_sockets(self):
31+
if os.environ.get("FAIL_ON_START") == "1":
32+
# Simulates e.g. a port binding issue (Adress already in use)
33+
raise RuntimeError("Failed for testing purposes")
34+
return super().init_sockets()
35+
36+
37+
if __name__ == "__main__":
38+
# make startup artificially slow,
39+
# so that we exercise client logic for slow-starting kernels
40+
startup_delay = int(os.environ.get("STARTUP_DELAY", "2"))
41+
time.sleep(startup_delay)
42+
ProblemTestApp.launch_instance()
Lines changed: 261 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,261 @@
1+
"""Tests for the KernelManager"""
2+
# Copyright (c) Jupyter Development Team.
3+
# Distributed under the terms of the Modified BSD License.
4+
5+
import asyncio
6+
import concurrent.futures
7+
import json
8+
import os
9+
import signal
10+
import sys
11+
import time
12+
from subprocess import PIPE
13+
14+
import pytest
15+
from jupyter_core import paths
16+
from traitlets.config.loader import Config
17+
from traitlets.log import get_logger
18+
19+
from jupyter_client import AsyncKernelManager
20+
from jupyter_client.ioloop import AsyncIOLoopKernelManager, IOLoopKernelManager
21+
from ..manager import start_new_async_kernel
22+
from ..manager import start_new_kernel
23+
24+
pjoin = os.path.join
25+
26+
def _install_kernel(name="problemtest", extra_env=None):
27+
if extra_env is None:
28+
extra_env = dict()
29+
kernel_dir = pjoin(paths.jupyter_data_dir(), "kernels", name)
30+
os.makedirs(kernel_dir)
31+
with open(pjoin(kernel_dir, "kernel.json"), "w") as f:
32+
f.write(
33+
json.dumps(
34+
{
35+
"argv": [
36+
sys.executable,
37+
"-m",
38+
"jupyter_client.tests.problemkernel",
39+
"-f",
40+
"{connection_file}",
41+
],
42+
"display_name": "Problematic Test Kernel",
43+
"env": {"TEST_VARS": "${TEST_VARS}:test_var_2", **extra_env},
44+
}
45+
)
46+
)
47+
return name
48+
49+
@pytest.fixture
50+
def install_kernel():
51+
return _install_kernel("problemtest")
52+
53+
@pytest.fixture
54+
def install_fail_kernel():
55+
return _install_kernel("problemtest-fail", extra_env={
56+
"FAIL_ON_START": "1"
57+
})
58+
59+
@pytest.fixture
60+
def install_slow_fail_kernel():
61+
return _install_kernel("problemtest-slow", extra_env={
62+
"STARTUP_DELAY": "5",
63+
"FAIL_ON_START": "1"
64+
})
65+
66+
@pytest.fixture(params=["tcp", "ipc"])
67+
def transport(request):
68+
if sys.platform == "win32" and request.param == "ipc": #
69+
pytest.skip("Transport 'ipc' not supported on Windows.")
70+
return request.param
71+
72+
@pytest.fixture
73+
def config(transport):
74+
c = Config()
75+
c.KernelManager.transport = transport
76+
if transport == "ipc":
77+
c.KernelManager.ip = "test"
78+
return c
79+
80+
@pytest.fixture
81+
def debug_logging():
82+
get_logger().setLevel("DEBUG")
83+
84+
85+
@pytest.mark.asyncio
86+
async def test_restart_check(config, install_kernel):
87+
"""Test that the kernel is restarted and recovers"""
88+
# If this test failes, run it with --log-cli-level=DEBUG to inspect
89+
N_restarts = 1
90+
config.KernelRestarter.restart_limit = N_restarts
91+
config.KernelRestarter.debug = True
92+
km = IOLoopKernelManager(kernel_name=install_kernel, config=config)
93+
94+
cbs = 0
95+
restarts = [asyncio.Future() for i in range(N_restarts)]
96+
def cb():
97+
nonlocal cbs
98+
if cbs >= N_restarts:
99+
raise RuntimeError("Kernel restarted more than %d times!" % N_restarts)
100+
restarts[cbs].set_result(True)
101+
cbs += 1
102+
103+
try:
104+
km.start_kernel()
105+
km.add_restart_callback(cb, 'restart')
106+
except:
107+
if km.has_kernel:
108+
km.shutdown_kernel()
109+
raise
110+
111+
try:
112+
for i in range(N_restarts + 1):
113+
kc = km.client()
114+
kc.start_channels()
115+
kc.wait_for_ready(timeout=60)
116+
kc.stop_channels()
117+
if i < N_restarts:
118+
# Kill without cleanup to simulate crash:
119+
await km.provisioner.kill()
120+
await restarts[i]
121+
122+
assert cbs == N_restarts
123+
assert km.is_alive()
124+
125+
finally:
126+
127+
km.shutdown_kernel(now=True)
128+
assert km.context.closed
129+
130+
@pytest.mark.asyncio
131+
async def test_restarter_gives_up(config, install_fail_kernel):
132+
"""Test that the restarter gives up after reaching the restart limit"""
133+
# If this test failes, run it with --log-cli-level=DEBUG to inspect
134+
N_restarts = 1
135+
config.KernelRestarter.restart_limit = N_restarts
136+
config.KernelRestarter.debug = True
137+
km = IOLoopKernelManager(kernel_name=install_fail_kernel, config=config)
138+
139+
cbs = 0
140+
restarts = [asyncio.Future() for i in range(N_restarts)]
141+
def cb():
142+
nonlocal cbs
143+
if cbs >= N_restarts:
144+
raise RuntimeError("Kernel restarted more than %d times!" % N_restarts)
145+
restarts[cbs].set_result(True)
146+
cbs += 1
147+
148+
died = asyncio.Future()
149+
def on_death():
150+
died.set_result(True)
151+
152+
try:
153+
km.start_kernel()
154+
km.add_restart_callback(cb, 'restart')
155+
km.add_restart_callback(on_death, 'dead')
156+
except:
157+
if km.has_kernel:
158+
km.shutdown_kernel()
159+
raise
160+
161+
try:
162+
for i in range(N_restarts):
163+
await restarts[i]
164+
165+
assert await died
166+
assert cbs == N_restarts
167+
168+
finally:
169+
170+
km.shutdown_kernel(now=True)
171+
assert km.context.closed
172+
173+
174+
@pytest.mark.asyncio
175+
async def test_async_restart_check(config, install_kernel):
176+
"""Test that the kernel is restarted and recovers"""
177+
# If this test failes, run it with --log-cli-level=DEBUG to inspect
178+
N_restarts = 1
179+
config.KernelRestarter.restart_limit = N_restarts
180+
config.KernelRestarter.debug = True
181+
km = AsyncIOLoopKernelManager(kernel_name=install_kernel, config=config)
182+
183+
cbs = 0
184+
restarts = [asyncio.Future() for i in range(N_restarts)]
185+
def cb():
186+
nonlocal cbs
187+
if cbs >= N_restarts:
188+
raise RuntimeError("Kernel restarted more than %d times!" % N_restarts)
189+
restarts[cbs].set_result(True)
190+
cbs += 1
191+
192+
try:
193+
await km.start_kernel()
194+
km.add_restart_callback(cb, 'restart')
195+
except:
196+
if km.has_kernel:
197+
await km.shutdown_kernel()
198+
raise
199+
200+
try:
201+
for i in range(N_restarts + 1):
202+
kc = km.client()
203+
kc.start_channels()
204+
await kc.wait_for_ready(timeout=60)
205+
kc.stop_channels()
206+
if i < N_restarts:
207+
# Kill without cleanup to simulate crash:
208+
await km.provisioner.kill()
209+
await restarts[i]
210+
211+
assert cbs == N_restarts
212+
assert await km.is_alive()
213+
214+
finally:
215+
216+
await km.shutdown_kernel(now=True)
217+
assert km.context.closed
218+
219+
@pytest.mark.asyncio
220+
async def test_async_restarter_gives_up(config, install_slow_fail_kernel):
221+
"""Test that the restarter gives up after reaching the restart limit"""
222+
# If this test failes, run it with --log-cli-level=DEBUG to inspect
223+
N_restarts = 2
224+
config.KernelRestarter.restart_limit = N_restarts
225+
config.KernelRestarter.debug = True
226+
config.KernelRestarter.stable_start_time = 30.
227+
km = AsyncIOLoopKernelManager(kernel_name=install_slow_fail_kernel, config=config)
228+
229+
cbs = 0
230+
restarts = [asyncio.Future() for i in range(N_restarts)]
231+
def cb():
232+
nonlocal cbs
233+
if cbs >= N_restarts:
234+
raise RuntimeError("Kernel restarted more than %d times!" % N_restarts)
235+
restarts[cbs].set_result(True)
236+
cbs += 1
237+
238+
died = asyncio.Future()
239+
def on_death():
240+
died.set_result(True)
241+
242+
try:
243+
await km.start_kernel()
244+
km.add_restart_callback(cb, 'restart')
245+
km.add_restart_callback(on_death, 'dead')
246+
except:
247+
if km.has_kernel:
248+
await km.shutdown_kernel()
249+
raise
250+
251+
try:
252+
await asyncio.gather(*restarts)
253+
254+
assert await died
255+
assert cbs == N_restarts
256+
257+
finally:
258+
259+
await km.shutdown_kernel(now=True)
260+
assert km.context.closed
261+

0 commit comments

Comments
 (0)