88
99from __future__ import annotations
1010
11+ import os
1112import subprocess
1213import tempfile
1314import threading
@@ -54,6 +55,8 @@ def __init__(
5455 self ._tasks : dict [str , "Task" ] = {}
5556 self ._service_id : int = Service ._service_count
5657 Service ._service_count += 1
58+ self ._invalid_lines : list [str ] = []
59+ self ._error_lines : list [str ] = []
5760 self ._process : subprocess .Popen | None = None
5861 self ._stdout_thread : threading .Thread | None = None
5962 self ._stderr_thread : threading .Thread | None = None
@@ -298,7 +301,60 @@ def close(self) -> None:
298301 raise RuntimeError ("Service has not been started" )
299302 self ._process .stdin .close ()
300303
304+ def kill (self ) -> None :
305+ """
306+ Force the service's worker process to begin shutting down. Any tasks still
307+ pending completion will be interrupted, reporting TaskStatus.CRASHED.
308+
309+ To shut down the service more gently, allowing any pending tasks to run to
310+ completion, use close() instead.
311+
312+ To wait until the service's worker process has completely shut down
313+ and all output has been reported, call wait_for() afterward.
314+ """
315+ self ._process .kill ()
316+
317+ def wait_for (self ) -> int :
318+ """
319+ Wait for the service's worker process to terminate.
320+
321+ Returns:
322+ Exit value of the worker process.
323+ """
324+ self ._process .wait ()
325+
326+ # Wait for worker output processing threads to finish up.
327+ self ._stdout_thread .join ()
328+ self ._stderr_thread .join ()
329+ self ._monitor_thread .join ()
301330
331+ return self ._process .returncode
332+
333+ def is_alive (self ) -> bool :
334+ """
335+ Return true if the service's worker process is currently running,
336+ or false if it has not yet started or has already shut down or crashed.
337+
338+ Returns:
339+ Whether the service's worker process is currently running.
340+ """
341+ return self ._process is not None and self ._process .poll () is None
342+
343+ def invalid_lines (self ) -> list [str ]:
344+ """
345+ Unparseable lines emitted by the worker process on its stdout stream,
346+ collected over the lifetime of the service.
347+ Can be useful for analyzing why a worker process has crashed.
348+ """
349+ return self ._invalid_lines
350+
351+ def error_lines (self ) -> list [str ]:
352+ """
353+ Lines emitted by the worker process on its stderr stream,
354+ collected over the lifetime of the service.
355+ Can be useful for analyzing why a worker process has crashed.
356+ """
357+ return self ._error_lines
302358
303359 def _stdout_loop (self ) -> None :
304360 """
@@ -310,7 +366,7 @@ def _stdout_loop(self) -> None:
310366 try :
311367 line = None if stdout is None else stdout .readline ()
312368 except Exception :
313- # Something went wrong reading the line. Panic!
369+ # Something went wrong reading the stdout line. Panic!
314370 self ._debug_service (format_exc ())
315371 break
316372
@@ -336,22 +392,26 @@ def _stdout_loop(self) -> None:
336392 # Something went wrong decoding the line of JSON.
337393 # Skip it and keep going, but log it first.
338394 self ._debug_service (f"<INVALID> { line } " )
395+ self ._invalid_lines .append (line .rstrip ("\n \r " ))
339396
340397 def _stderr_loop (self ) -> None :
341398 """
342399 Input loop processing lines from the worker's stderr stream.
343400 """
344- # noinspection PyBroadException
345- try :
346- while True :
347- stderr = self . _process . stderr
401+ while True :
402+ stderr = self . _process . stderr
403+ # noinspection PyBroadException
404+ try :
348405 line = None if stderr is None else stderr .readline ()
349- if not line : # readline returns empty string upon EOF
350- self ._debug_service ("<worker stderr closed>" )
351- return
352- self ._debug_worker (line )
353- except Exception :
354- self ._debug_service (format_exc ())
406+ except Exception :
407+ # Something went wrong reading the stderr line. Panic!
408+ self ._debug_service (format_exc ())
409+ break
410+ if not line : # readline returns empty string upon EOF
411+ self ._debug_service ("<worker stderr closed>" )
412+ break
413+ self ._debug_worker (line )
414+ self ._error_lines .append (line .rstrip ("\n \r " ))
355415
356416 def _monitor_loop (self ) -> None :
357417 # Wait until the worker process terminates.
@@ -364,15 +424,33 @@ def _monitor_loop(self) -> None:
364424 f"<worker process terminated with exit code { exit_code } >"
365425 )
366426 task_count = len (self ._tasks )
367- if task_count > 0 :
368- self ._debug_service (
369- f"<worker process terminated with { task_count } pending tasks>"
370- )
427+ if task_count == 0 :
428+ # No hanging tasks to clean up.
429+ return
430+
431+ self ._debug_service (
432+ "<worker process terminated with "
433+ + f"{ task_count } pending task{ '' if task_count == 1 else 's' } >"
434+ )
371435
372436 # Notify any remaining tasks about the process crash.
437+ nl = os .linesep
438+ error_parts = [f"Worker crashed with exit code { exit_code } ." ]
439+ error_parts .append ("" )
440+ error_parts .append ("[stdout]" )
441+ if len (self ._invalid_lines ) == 0 :
442+ error_parts .append ("<none>" )
443+ else :
444+ error_parts .extend (self ._invalid_lines )
445+ error_parts .append ("" )
446+ error_parts .append ("[stderr]" )
447+ if len (self ._error_lines ) == 0 :
448+ error_parts .append ("<none>" )
449+ else :
450+ error_parts .extend (self ._error_lines )
451+ error = nl .join (error_parts ) + nl
373452 for task in self ._tasks .values ():
374- task ._crash ()
375-
453+ task ._crash (error )
376454 self ._tasks .clear ()
377455
378456 def _debug_service (self , message : str ) -> None :
@@ -405,12 +483,17 @@ class TaskStatus(Enum):
405483 FAILED = "FAILED"
406484 CRASHED = "CRASHED"
407485
408- def is_finished (self ):
486+ def is_finished (self ) -> bool :
409487 """
410488 True iff status is COMPLETE, CANCELED, FAILED, or CRASHED.
411489 """
490+ return self == TaskStatus .COMPLETE or self .is_error ()
491+
492+ def is_error (self ) -> bool :
493+ """
494+ True iff status is CANCELED, FAILED, or CRASHED.
495+ """
412496 return self in (
413- TaskStatus .COMPLETE ,
414497 TaskStatus .CANCELED ,
415498 TaskStatus .FAILED ,
416499 TaskStatus .CRASHED ,
@@ -434,7 +517,7 @@ class ResponseType(Enum):
434517 True iff response type is COMPLETE, CANCELED, FAILED, or CRASHED.
435518 """
436519
437- def is_terminal (self ):
520+ def is_terminal (self ) -> bool :
438521 return self in (
439522 ResponseType .COMPLETION ,
440523 ResponseType .CANCELATION ,
@@ -629,9 +712,10 @@ def _handle(self, response: Args) -> None:
629712 with self .cv :
630713 self .cv .notify_all ()
631714
632- def _crash (self ):
715+ def _crash (self , error : str ):
633716 event = TaskEvent (self , ResponseType .CRASH )
634717 self .status = TaskStatus .CRASHED
718+ self .error = error
635719 for listener in self .listeners :
636720 listener (event )
637721 with self .cv :
0 commit comments