@@ -43,14 +43,15 @@ def handle(
4343 :param action_queue: queue to send events to.
4444 """
4545 for worker_id in range (workers_num ):
46- action_queue .put (ReloadOneAction (worker_num = worker_id ))
46+ action_queue .put (ReloadOneAction (worker_num = worker_id , is_reload_all = True ))
4747
4848
4949@dataclass
5050class ReloadOneAction (ProcessActionBase ):
5151 """This action reloads single worker with particular id."""
5252
5353 worker_num : int
54+ is_reload_all : bool
5455
5556 def handle (
5657 self ,
@@ -153,6 +154,7 @@ def __init__(
153154 args : WorkerArgs ,
154155 worker_function : Callable [[WorkerArgs , EventType ], None ],
155156 observer : Optional [Observer ] = None , # type: ignore[valid-type]
157+ max_restarts : Optional [int ] = None ,
156158 ) -> None :
157159 self .worker_function = worker_function
158160 self .action_queue : "Queue[ProcessActionBase]" = Queue (- 1 )
@@ -198,7 +200,7 @@ def prepare_workers(self) -> None:
198200 for worker , event in zip (self .workers , events ):
199201 _wait_for_worker_startup (worker , event )
200202
201- def start (self ) -> None : # noqa: C901, WPS213
203+ def start (self ) -> Optional [ int ] : # noqa: C901, WPS213
202204 """
203205 Start managing child processes.
204206
@@ -223,7 +225,10 @@ def start(self) -> None: # noqa: C901, WPS213
223225 After all events are handled, it iterates over all child processes and
224226 checks that all processes are healthy. If process was terminated for
225227 some reason, it schedules a restart for dead process.
228+
229+ :returns: status code or None.
226230 """
231+ restarts = 0
227232 self .prepare_workers ()
228233 while True :
229234 sleep (1 )
@@ -238,16 +243,30 @@ def start(self) -> None: # noqa: C901, WPS213
238243 action_queue = self .action_queue ,
239244 )
240245 elif isinstance (action , ReloadOneAction ):
246+ # We check if max_fails is set.
247+ # If it's true, we check how many times
248+ # worker was reloaded.
249+ if not action .is_reload_all and self .args .max_fails >= 1 :
250+ restarts += 1
251+ if restarts >= self .args .max_fails :
252+ logger .warning ("Max restarts reached. Exiting." )
253+ # Returning error status.
254+ return - 1
241255 # If we just reloaded this worker, skip handling.
242256 if action .worker_num in reloaded_workers :
243257 continue
244258 action .handle (self .workers , self .args , self .worker_function )
245259 reloaded_workers .add (action .worker_num )
246260 elif isinstance (action , ShutdownAction ):
247261 logger .debug ("Process manager closed." )
248- return
262+ return None
249263
250264 for worker_num , worker in enumerate (self .workers ):
251265 if not worker .is_alive ():
252266 logger .info (f"{ worker .name } is dead. Scheduling reload." )
253- self .action_queue .put (ReloadOneAction (worker_num = worker_num ))
267+ self .action_queue .put (
268+ ReloadOneAction (
269+ worker_num = worker_num ,
270+ is_reload_all = False ,
271+ ),
272+ )
0 commit comments