15
15
"""
16
16
import asyncio
17
17
import logging
18
+ import multiprocessing
18
19
import os
19
20
import signal
21
+ import sys
20
22
from multiprocessing import Pipe
21
- from multiprocessing import Process
23
+ from multiprocessing . connection import Connection
22
24
from threading import Thread
23
25
24
26
import psutil
25
- import tornado .ioloop
26
27
import zmq
27
28
from jupyter_client .session import Session
29
+ from tornado .ioloop import IOLoop
28
30
from traitlets .config import Config
29
31
from zmq .eventloop .zmqstream import ZMQStream
30
32
35
37
class KernelNanny :
36
38
"""Object for monitoring
37
39
38
- Must be handling signal messages"""
40
+ Must be child of engine
41
+
42
+ Handles signal messages and watches Engine process for exiting
43
+ """
39
44
40
45
def __init__ (
41
46
self ,
@@ -65,6 +70,7 @@ def __init__(
65
70
self .control_handlers = {
66
71
"signal_request" : self .signal_request ,
67
72
}
73
+ self ._finish_called = False
68
74
69
75
def wait_for_parent_thread (self ):
70
76
"""Wait for my parent to exit, then I'll notify the controller and shut down"""
@@ -79,6 +85,20 @@ def wait_for_parent_thread(self):
79
85
self .log .critical (f"Parent { self .pid } exited with status { exit_code } ." )
80
86
self .loop .add_callback (self .finish )
81
87
88
+ def pipe_handler (self , fd , events ):
89
+ self .log .debug (f"Pipe event { events } " )
90
+ self .loop .remove_handler (fd )
91
+ try :
92
+ status = self .parent_process .wait (0 )
93
+ except psutil .TimeoutExpired :
94
+ try :
95
+ status = self .parent_process .status ()
96
+ except psutil .NoSuchProcessError :
97
+ status = "exited"
98
+
99
+ self .log .critical (f"Pipe closed, parent { self .pid } has status: { status } " )
100
+ self .finish ()
101
+
82
102
def notify_exit (self ):
83
103
"""Notify the Hub that our parent has exited"""
84
104
self .log .info ("Notifying Hub that our parent has shut down" )
@@ -91,6 +111,9 @@ def notify_exit(self):
91
111
92
112
def finish (self ):
93
113
"""Prepare to exit and stop our event loop."""
114
+ if self ._finish_called :
115
+ return
116
+ self ._finish_called = True
94
117
self .notify_exit ()
95
118
self .loop .add_callback (self .loop .stop )
96
119
@@ -160,7 +183,7 @@ def start(self):
160
183
# ignore SIGINT sent to parent
161
184
signal .signal (signal .SIGINT , signal .SIG_IGN )
162
185
163
- self .loop = tornado . ioloop . IOLoop .current ()
186
+ self .loop = IOLoop .current ()
164
187
self .context = zmq .Context ()
165
188
166
189
# set up control socket (connection to Scheduler)
@@ -176,7 +199,15 @@ def start(self):
176
199
177
200
# now that we've bound, pass port to parent via AsyncResult
178
201
self .start_pipe .send (f"tcp://127.0.0.1:{ port } " )
179
- self .loop .add_timeout (self .loop .time () + 10 , self .start_pipe .close )
202
+ if sys .platform .startswith ("win" ):
203
+ # close the pipe on Windows
204
+ self .loop .add_timeout (self .loop .time () + 10 , self .start_pipe .close )
205
+ else :
206
+ # otherwise, watch for the pipe to close
207
+ # as a signal that our parent is shutting down
208
+ self .loop .add_handler (
209
+ self .start_pipe , self .pipe_handler , IOLoop .READ | IOLoop .ERROR
210
+ )
180
211
self .parent_stream = ZMQStream (self .parent_socket )
181
212
self .parent_stream .on_recv_stream (self .dispatch_parent )
182
213
try :
@@ -197,7 +228,7 @@ def main(cls, *args, **kwargs):
197
228
"""
198
229
# start a new event loop for the forked process
199
230
asyncio .set_event_loop (asyncio .new_event_loop ())
200
- tornado . ioloop . IOLoop ().make_current ()
231
+ IOLoop ().make_current ()
201
232
self = cls (* args , ** kwargs )
202
233
self .start ()
203
234
@@ -214,12 +245,22 @@ def start_nanny(**kwargs):
214
245
"""
215
246
216
247
pipe_r , pipe_w = Pipe (duplex = False )
248
+
217
249
kwargs ['start_pipe' ] = pipe_w
218
250
kwargs ['pid' ] = os .getpid ()
219
- p = Process (target = KernelNanny .main , kwargs = kwargs , name = "KernelNanny" , daemon = True )
251
+ # make sure to not use fork, which can be an issue for MPI
252
+ p = multiprocessing .get_context ("spawn" ).Process (
253
+ target = KernelNanny .main ,
254
+ kwargs = kwargs ,
255
+ name = "KernelNanny" ,
256
+ daemon = True ,
257
+ )
220
258
p .start ()
221
259
# close our copy of the write pipe
222
260
pipe_w .close ()
223
261
nanny_url = pipe_r .recv ()
224
- pipe_r .close ()
225
- return nanny_url
262
+ if sys .platform .startswith ("win" ):
263
+ pipe_r .close ()
264
+ # return the handle on the read pipe
265
+ # need to keep this open for the nanny
266
+ return nanny_url , pipe_r
0 commit comments