16
16
import asyncio
17
17
import logging
18
18
import os
19
+ import pickle
19
20
import signal
20
- from multiprocessing import Pipe
21
- from multiprocessing import Process
21
+ import sys
22
+ from subprocess import PIPE
23
+ from subprocess import Popen
22
24
from threading import Thread
23
25
24
26
import psutil
25
- import tornado .ioloop
26
27
import zmq
27
28
from jupyter_client .session import Session
29
+ from tornado .ioloop import IOLoop
28
30
from traitlets .config import Config
29
31
from zmq .eventloop .zmqstream import ZMQStream
30
32
35
37
class KernelNanny :
36
38
"""Object for monitoring
37
39
38
- Must be handling signal messages"""
40
+ Must be child of engine
41
+
42
+ Handles signal messages and watches Engine process for exiting
43
+ """
39
44
40
45
def __init__ (
41
46
self ,
@@ -46,7 +51,7 @@ def __init__(
46
51
registration_url : str ,
47
52
identity : bytes ,
48
53
config : Config ,
49
- start_pipe : Pipe ,
54
+ pipe ,
50
55
log_level : int = logging .INFO ,
51
56
):
52
57
self .pid = pid
@@ -55,16 +60,18 @@ def __init__(
55
60
self .control_url = control_url
56
61
self .registration_url = registration_url
57
62
self .identity = identity
58
- self .start_pipe = start_pipe
59
63
self .config = config
64
+ self .pipe = pipe
60
65
self .session = Session (config = self .config )
66
+ log_level = 10
61
67
62
68
self .log = local_logger (f"{ self .__class__ .__name__ } .{ engine_id } " , log_level )
63
69
self .log .propagate = False
64
70
65
71
self .control_handlers = {
66
72
"signal_request" : self .signal_request ,
67
73
}
74
+ self ._finish_called = False
68
75
69
76
def wait_for_parent_thread (self ):
70
77
"""Wait for my parent to exit, then I'll notify the controller and shut down"""
@@ -79,6 +86,24 @@ def wait_for_parent_thread(self):
79
86
self .log .critical (f"Parent { self .pid } exited with status { exit_code } ." )
80
87
self .loop .add_callback (self .finish )
81
88
89
+ def pipe_handler (self , fd , events ):
90
+ self .log .debug (f"Pipe event { events } " )
91
+ self .loop .remove_handler (fd )
92
+ try :
93
+ fd .close ()
94
+ except BrokenPipeError :
95
+ pass
96
+ try :
97
+ status = self .parent_process .wait (0 )
98
+ except psutil .TimeoutExpired :
99
+ try :
100
+ status = self .parent_process .status ()
101
+ except psutil .NoSuchProcessError :
102
+ status = "exited"
103
+
104
+ self .log .critical (f"Pipe closed, parent { self .pid } has status: { status } " )
105
+ self .finish ()
106
+
82
107
def notify_exit (self ):
83
108
"""Notify the Hub that our parent has exited"""
84
109
self .log .info ("Notifying Hub that our parent has shut down" )
@@ -91,6 +116,9 @@ def notify_exit(self):
91
116
92
117
def finish (self ):
93
118
"""Prepare to exit and stop our event loop."""
119
+ if self ._finish_called :
120
+ return
121
+ self ._finish_called = True
94
122
self .notify_exit ()
95
123
self .loop .add_callback (self .loop .stop )
96
124
@@ -160,7 +188,7 @@ def start(self):
160
188
# ignore SIGINT sent to parent
161
189
signal .signal (signal .SIGINT , signal .SIG_IGN )
162
190
163
- self .loop = tornado . ioloop . IOLoop .current ()
191
+ self .loop = IOLoop .current ()
164
192
self .context = zmq .Context ()
165
193
166
194
# set up control socket (connection to Scheduler)
@@ -175,15 +203,25 @@ def start(self):
175
203
port = self .parent_socket .bind_to_random_port ("tcp://127.0.0.1" )
176
204
177
205
# now that we've bound, pass port to parent via AsyncResult
178
- self .start_pipe .send (f"tcp://127.0.0.1:{ port } " )
179
- self .loop .add_timeout (self .loop .time () + 10 , self .start_pipe .close )
206
+ self .pipe .write (f"tcp://127.0.0.1:{ port } \n " )
207
+ if not sys .platform .startswith ("win" ):
208
+ # watch for the stdout pipe to close
209
+ # as a signal that our parent is shutting down
210
+ self .loop .add_handler (
211
+ self .pipe , self .pipe_handler , IOLoop .READ | IOLoop .ERROR
212
+ )
180
213
self .parent_stream = ZMQStream (self .parent_socket )
181
214
self .parent_stream .on_recv_stream (self .dispatch_parent )
182
215
try :
183
216
self .loop .start ()
184
217
finally :
185
218
self .loop .close (all_fds = True )
186
219
self .context .term ()
220
+ try :
221
+ self .pipe .close ()
222
+ except BrokenPipeError :
223
+ pass
224
+ self .log .debug ("exiting" )
187
225
188
226
@classmethod
189
227
def main (cls , * args , ** kwargs ):
@@ -197,7 +235,7 @@ def main(cls, *args, **kwargs):
197
235
"""
198
236
# start a new event loop for the forked process
199
237
asyncio .set_event_loop (asyncio .new_event_loop ())
200
- tornado . ioloop . IOLoop ().make_current ()
238
+ IOLoop ().make_current ()
201
239
self = cls (* args , ** kwargs )
202
240
self .start ()
203
241
@@ -213,13 +251,39 @@ def start_nanny(**kwargs):
213
251
instead of connecting directly to the control Scheduler.
214
252
"""
215
253
216
- pipe_r , pipe_w = Pipe (duplex = False )
217
- kwargs ['start_pipe' ] = pipe_w
218
254
kwargs ['pid' ] = os .getpid ()
219
- p = Process (target = KernelNanny .main , kwargs = kwargs , name = "KernelNanny" , daemon = True )
220
- p .start ()
221
- # close our copy of the write pipe
222
- pipe_w .close ()
223
- nanny_url = pipe_r .recv ()
224
- pipe_r .close ()
225
- return nanny_url
255
+
256
+ env = os .environ .copy ()
257
+ env ['PYTHONUNBUFFERED' ] = '1'
258
+ p = Popen (
259
+ [sys .executable , '-m' , __name__ ],
260
+ stdin = PIPE ,
261
+ stdout = PIPE ,
262
+ env = env ,
263
+ start_new_session = True , # don't inherit signals
264
+ )
265
+ p .stdin .write (pickle .dumps (kwargs ))
266
+ p .stdin .close ()
267
+ out = p .stdout .readline ()
268
+ nanny_url = out .decode ("utf8" ).strip ()
269
+ if not nanny_url :
270
+ p .terminate ()
271
+ raise RuntimeError ("nanny failed" )
272
+ # return the handle on the process
273
+ # need to keep the pipe open for the nanny
274
+ return nanny_url , p
275
+
276
+
277
+ def main ():
278
+ """Entrypoint from the command-line
279
+
280
+ Loads kwargs from stdin,
281
+ sets pipe to stdout
282
+ """
283
+ kwargs = pickle .load (os .fdopen (sys .stdin .fileno (), mode = 'rb' ))
284
+ kwargs ['pipe' ] = sys .stdout
285
+ KernelNanny .main (** kwargs )
286
+
287
+
288
+ if __name__ == "__main__" :
289
+ main ()
0 commit comments