fix: final PR comments

AlonKellner-Jounce · AlonKellner-Jounce · commit 47e71551f0da · 2025-07-23T08:57:47.000Z
diff --git a/src/guidellm/scheduler/worker.py b/src/guidellm/scheduler/worker.py
@@ -9,7 +9,6 @@
 from collections.abc import AsyncGenerator
 from dataclasses import dataclass
 from multiprocessing.synchronize import Event as MultiprocessingEvent
-from threading import Event
 from typing import (
     Any,
     Generic,
@@ -131,25 +130,30 @@ async def resolve(
     async def get_request(
         self,
         requests_queue: multiprocessing.Queue,
-        shutdown_event: threading.Event,
         process_id: int,
         shutdown_poll_interval_seconds: float,
     ) -> WorkerProcessRequest[RequestT]:
+        shutdown_event = threading.Event()
+
         # We need to check shutdown_event intermittently cause
         # if we simply use asyncio.to_thread(requests_queue.get)
         # the cancellation task doesn't propagate because the
         # asyncio.to_thread is blocking
         def _get_queue_intermittently():
-            while True:
+            while not shutdown_event.is_set():
                 try:
-                    return requests_queue.get(timeout=shutdown_poll_interval_seconds)
-                except queue.Empty as e:
-                    logger.info("Checking shutdown even is set in get_request")
-                    if shutdown_event.is_set():
-                        logger.info(f"Shutdown signal received in future {process_id}")
-                        raise asyncio.CancelledError from e
+                    request = requests_queue.get(timeout=shutdown_poll_interval_seconds)
+                    logger.debug(f"Gor request in future {process_id}")
+                    return request
+                except queue.Empty:
+                    logger.trace(f"Queue was empty in future {process_id}")
+            logger.info(f"Shutdown signal received in future {process_id}")
+            return None
 
-        return await asyncio.to_thread(_get_queue_intermittently)  # type: ignore[attr-defined]
+        try:
+            return await asyncio.to_thread(_get_queue_intermittently)
+        finally:
+            shutdown_event.set()
 
     async def send_result(
         self,
@@ -175,15 +179,17 @@ async def resolve_scheduler_request(
             scheduled_time=time.time(),
             process_id=process_id,
         )
-        request_scheduled_result: WorkerProcessResult[RequestT, ResponseT] = (
-            WorkerProcessResult(
-                type_="request_scheduled",
-                request=request,
-                response=None,
-                info=info,
+        asyncio.create_task(
+            self.send_result(
+                results_queue,
+                WorkerProcessResult(
+                    type_="request_scheduled",
+                    request=request,
+                    response=None,
+                    info=info,
+                ),
             )
         )
-        asyncio.create_task(self.send_result(results_queue, request_scheduled_result))
 
         if (wait_time := start_time - time.time()) > 0:
             await asyncio.sleep(wait_time)
@@ -223,37 +229,26 @@ def run_process(
         shutdown_event: MultiprocessingEvent,
         shutdown_poll_interval_seconds: float,
         process_id: int,
-        max_concurrency: Optional[int] = None,
+        max_concurrency: int,
     ):
         async def _process_runner():
-            # We are using a separate internal event
-            # because if we're using the shutdown_event
-            # there's a race condition between the get_request
-            # loop which checks for shutdown and the .cancel() in this
-            # method which causes the asyncio.CancelledError
-            # to propagate and crash the worker
-            internal_shutdown_event: threading.Event = Event()
             if type_ == "sync":
                 loop_task = asyncio.create_task(
                     self._process_synchronous_requests_loop(
                         requests_queue=requests_queue,
                         results_queue=results_queue,
                         process_id=process_id,
-                        shutdown_event=internal_shutdown_event,
                         shutdown_poll_interval_seconds=shutdown_poll_interval_seconds,
                     ),
                     name="request_loop_processor_task",
                 )
             elif type_ == "async":
-                if max_concurrency is None:
-                    raise ValueError("max_concurrency must be set for async processor")
                 loop_task = asyncio.create_task(
                     self._process_asynchronous_requests_loop(
                         requests_queue=requests_queue,
                         results_queue=results_queue,
                         max_concurrency=max_concurrency,
                         process_id=process_id,
-                        shutdown_event=internal_shutdown_event,
                         shutdown_poll_interval_seconds=shutdown_poll_interval_seconds,
                     ),
                     name="request_loop_processor_task",
@@ -286,7 +281,6 @@ async def _process_runner():
                     f"Cancelling task {task.get_name()}|| Process {process_id}"
                 )
                 task.cancel()
-                internal_shutdown_event.set()
                 try:  # noqa: SIM105
                     await task
                 except asyncio.CancelledError:
@@ -317,9 +311,6 @@ async def _wait_for_shutdown(
         while not shutdown_event.is_set():  # noqa: ASYNC110
             await asyncio.sleep(shutdown_poll_interval)
 
-        # Raising asyncio.CancelledError instead would
-        # cause the asyncio.wait above to wait
-        # forever, couldn't find a reasonable reason why
         raise ShutdownSignalReceivedError(
             f"Shutdown event set for process {process_id}, cancelling process loop."
         )
@@ -329,13 +320,11 @@ async def _process_synchronous_requests_loop(
         requests_queue: multiprocessing.Queue,
         results_queue: multiprocessing.Queue,
         process_id: int,
-        shutdown_event: threading.Event,
         shutdown_poll_interval_seconds: float,
     ):
         while True:
             process_request = await self.get_request(
                 requests_queue=requests_queue,
-                shutdown_event=shutdown_event,
                 process_id=process_id,
                 shutdown_poll_interval_seconds=shutdown_poll_interval_seconds,
             )
@@ -358,7 +347,6 @@ async def _process_asynchronous_requests_loop(
         results_queue: multiprocessing.Queue,
         max_concurrency: int,
         process_id: int,
-        shutdown_event: threading.Event,
         shutdown_poll_interval_seconds: float,
     ):
         pending = asyncio.Semaphore(max_concurrency)
@@ -369,7 +357,6 @@ async def _process_asynchronous_requests_loop(
         while True:
             process_request = await self.get_request(
                 requests_queue=requests_queue,
-                shutdown_event=shutdown_event,
                 process_id=process_id,
                 shutdown_poll_interval_seconds=shutdown_poll_interval_seconds,
             )
@@ -461,7 +448,7 @@ def run_process(
         shutdown_event: MultiprocessingEvent,
         shutdown_poll_interval_seconds: float,
         process_id: int,
-        max_concurrency: Optional[int] = None,
+        max_concurrency: int,
     ):
         asyncio.run(self.backend.validate())
         super().run_process(