wip

markvaykhansky · markvaykhansky · commit 883593aece54 · 2025-06-03T10:25:04.000+03:00
diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py
@@ -572,12 +572,12 @@ async def _iterative_completions_request(
 
             async for line in stream.aiter_lines():
                 iter_time = time.time()
-                logger.debug(
-                    "{} request: {} recieved iter response line: {}",
-                    self.__class__.__name__,
-                    request_id,
-                    line,
-                )
+                # logger.debug(
+                #     "{} request: {} recieved iter response line: {}",
+                #     self.__class__.__name__,
+                #     request_id,
+                #     line,
+                # )
 
                 if not line or not line.strip().startswith("data:"):
                     continue
diff --git a/src/guidellm/objects/pydantic.py b/src/guidellm/objects/pydantic.py
@@ -21,11 +21,11 @@ class StandardBaseModel(BaseModel):
 
     def __init__(self, /, **data: Any) -> None:
         super().__init__(**data)
-        logger.debug(
-            "Initialized new instance of {} with data: {}",
-            self.__class__.__name__,
-            data,
-        )
+        # logger.debug(
+        #     "Initialized new instance of {} with data: {}",
+        #     self.__class__.__name__,
+        #     data,
+        # )
 
 
 SuccessfulT = TypeVar("SuccessfulT")
diff --git a/src/guidellm/scheduler/repro.py b/src/guidellm/scheduler/repro.py
@@ -0,0 +1,138 @@
+import asyncio
+import multiprocessing
+import time
+import logging
+import threading
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - [%(threadName)s] - %(message)s',
+    datefmt='%H:%M:%S'
+)
+
+# A multiprocessing queue that will remain empty
+# Naming it mp_queue to distinguish from asyncio.Queue
+mp_queue = multiprocessing.Queue()
+
+
+async def get_item_from_mp_queue(q: multiprocessing.Queue, worker_id: int):
+    """
+    Coroutine that tries to get an item from a multiprocessing.Queue
+    using asyncio.to_thread.
+    """
+    logging.info(f"Worker {worker_id}: get_item_from_mp_queue: ENTERED. Awaiting asyncio.to_thread(q.get).")
+    try:
+        # This is the blocking call in a separate thread
+        item = await asyncio.to_thread(q.get)
+        # We don't expect this to be reached if the queue is empty
+        logging.info(
+            f"Worker {worker_id}: get_item_from_mp_queue: asyncio.to_thread RETURNED NORMALLY with item: {item}.")
+        return item
+    except asyncio.CancelledError:
+        # This is where it SHOULD go if the task awaiting this coroutine is cancelled,
+        # and asyncio.to_thread correctly propagates the cancellation to its awaiter.
+        logging.error(
+            f"Worker {worker_id}: get_item_from_mp_queue: CAUGHT CancelledError from asyncio.to_thread directly!")
+        raise  # Re-raise to propagate the cancellation
+    except Exception as e:
+        logging.error(f"Worker {worker_id}: get_item_from_mp_queue: CAUGHT an UNEXPECTED EXCEPTION {type(e)}: {e}",
+                      exc_info=True)
+        raise
+    finally:
+        # This finally block will execute. The key is whether the CancelledError was caught above.
+        logging.info(f"Worker {worker_id}: get_item_from_mp_queue: EXITED (finally block).")
+
+
+async def worker_coroutine(worker_id: int, q: multiprocessing.Queue):
+    """
+    The main coroutine for our worker task. It will try to get an item
+    from the queue.
+    """
+    logging.info(f"Worker {worker_id}: worker_coroutine: STARTED.")
+    try:
+        logging.info(f"Worker {worker_id}: worker_coroutine: About to await get_item_from_mp_queue.")
+        # This is the await point where CancelledError should be injected
+        # if this worker_coroutine task is cancelled.
+        await get_item_from_mp_queue(q, worker_id)
+        logging.info(f"Worker {worker_id}: worker_coroutine: get_item_from_mp_queue completed (unexpectedly).")
+    except asyncio.CancelledError:
+        logging.error(f"Worker {worker_id}: worker_coroutine: SUCCESSFULLY CAUGHT CancelledError.")
+        # Perform any task-specific cleanup here if needed
+    except Exception as e:
+        logging.error(f"Worker {worker_id}: worker_coroutine: CAUGHT UNEXPECTED EXCEPTION {type(e)}: {e}",
+                      exc_info=True)
+    finally:
+        logging.info(f"Worker {worker_id}: worker_coroutine: FINISHED (finally block).")
+
+
+async def main_orchestrator():
+    """
+    Orchestrates the test: creates, runs, and cancels the worker.
+    """
+    logging.info("Main Orchestrator: Starting worker task.")
+    worker_task = asyncio.create_task(worker_coroutine(1, mp_queue), name="WorkerCoroutine-1")
+
+    # Give the worker task a moment to start and block on the queue
+    logging.info("Main Orchestrator: Sleeping for 1 second to let worker block...")
+    await asyncio.sleep(1)
+
+    logging.info(f"Main Orchestrator: Current active threads: {[t.name for t_ in threading.enumerate()]}...")
+
+    # Cancel the worker task
+    print("Main Orchestrator: Cancelling worker_task...")
+    worker_task.cancel()
+
+    # Wait for the worker task to finish, with a timeout.
+    # If cancellation works as expected, worker_task should complete (by handling CancelledError)
+    # well before the timeout.
+    # If it gets stuck, asyncio.TimeoutError will be raised.
+    timeout_seconds = 5.0
+    logging.info(f"Main Orchestrator: Awaiting worker_task with timeout {timeout_seconds}s...")
+    try:
+        await asyncio.wait_for(worker_task, timeout=timeout_seconds)
+        logging.info("Main Orchestrator: worker_task completed WITHOUT timeout.")
+    except asyncio.TimeoutError:
+        logging.error(
+            f"Main Orchestrator: TIMEOUT! worker_task did not finish within {timeout_seconds}s after cancellation.")
+        logging.error(
+            f"Main Orchestrator: worker_task.done() = {worker_task.done()}, worker_task.cancelled() = {worker_task.cancelled()}")
+        # At this point, the thread running mp_queue.get() is likely still blocked.
+    except asyncio.CancelledError:
+        # This would happen if main_orchestrator itself was cancelled, not expected here.
+        logging.error("Main Orchestrator: main_orchestrator itself was cancelled (unexpected).")
+    except Exception as e:
+        logging.error(f"Main Orchestrator: An unexpected error occurred while waiting for worker_task: {e}",
+                      exc_info=True)
+    finally:
+        logging.info("Main Orchestrator: Test finished.")
+        # Note: The thread started by asyncio.to_thread for mp_queue.get()
+        # might still be alive and blocked if q.get() wasn't unblocked.
+        # It's a daemon thread by default, so it won't prevent program exit.
+        # To clean it up, one would typically put a sentinel into mp_queue.
+        # For this test, we are focused on the asyncio task cancellation.
+        logging.info(
+            f"Main Orchestrator: Final check: worker_task.done() = {worker_task.done()}, worker_task.cancelled() = {worker_task.cancelled()}")
+
+        # Attempt to unblock the queue to allow the thread to exit,
+        # though the test's focus is on the asyncio cancellation.
+        try:
+            mp_queue.put_nowait(None)  # Sentinel
+            logging.info("Main Orchestrator: Put sentinel in mp_queue to unblock thread.")
+        except Exception:
+            logging.warning("Main Orchestrator: Could not put sentinel in mp_queue.")
+
+
+if __name__ == "__main__":
+    # For multiprocessing queues to work correctly, especially on Windows/macOS
+    # with 'spawn' or 'forkserver' start methods, it's good practice
+    # to ensure the queue is created in the main process scope before tasks.
+    # In this simple script, it's fine.
+    try:
+        asyncio.run(main_orchestrator())
+    except KeyboardInterrupt:
+        logging.info("Main Orchestrator: Keyboard interrupt received.")
+    finally:
+        mp_queue.close()
+        mp_queue.join_thread()  # Ensure queue's feeder thread is joined
+        logging.info("Main Orchestrator: mp_queue resources released.")
diff --git a/src/guidellm/scheduler/scheduler.py b/src/guidellm/scheduler/scheduler.py
@@ -154,6 +154,7 @@ async def run(
                     ):
                         # we've exhausted all requests we've wanted to run
                         # and yielded all responses
+                        logger.info("run_info.completed_requests >= run_info.created_requests")
                         break
 
                     requests_iter = self._add_requests(
@@ -198,7 +199,7 @@ async def run(
                 run_info=run_info,
             )
 
-            await self._stop_processes(futures, requests_queue)
+            await self._stop_processes(futures, shutdown_event, requests_queue)
 
     def _validate_scheduler_params(
         self,
@@ -457,10 +458,9 @@ def _check_result_ready(
     async def _stop_processes(
         self,
         futures: list[asyncio.Future],
+        shutdown_event: MultiprocessingEvent,
         requests_queue: multiprocessing.Queue,
     ):
-        for _ in futures:
-            requests_queue.put(None)
-
+        shutdown_event.set()
         logger.debug("Waiting for futures to shut down")
         await asyncio.gather(*futures)
diff --git a/src/guidellm/scheduler/worker.py b/src/guidellm/scheduler/worker.py
@@ -223,7 +223,10 @@ async def _process_runner():
                 raise ValueError(f"Invalid process type: {type_}")
 
             shutdown_task = asyncio.create_task(
-                self._wait_for_shutdown(shutdown_event, shutdown_poll_interval_seconds),
+                self._wait_for_shutdown(
+                    shutdown_event=shutdown_event,
+                    shutdown_poll_interval=shutdown_poll_interval_seconds
+                ),
                 name="shutdown_task",
             )
 
@@ -236,7 +239,9 @@ async def _process_runner():
             )
 
             for task in pending:
-                task.cancel()
+                logger.debug(f"Cancelling task {task.get_name()}")
+                cancel_result = task.cancel()
+                logger.debug(f"{'Task is already done or canceled' if not cancel_result else 'sent cancel signal'}")
                 try:
                     await task
                 except asyncio.CancelledError:
@@ -265,6 +270,8 @@ async def _wait_for_shutdown(
         while not shutdown_event.is_set():
             await asyncio.sleep(shutdown_poll_interval)
 
+        logger.debug("Shutdown signal received")
+        raise ValueError("kaki")
         raise asyncio.CancelledError("Shutdown event set, cancelling process loop.")
 
     async def _process_synchronous_requests_loop(
@@ -290,6 +297,9 @@ async def _process_synchronous_requests_loop(
                 process_id=process_id,
             )
 
+        logger.debug("Done processing synchronous loop")
+
+
     async def _process_asynchronous_requests_loop(
         self,
         requests_queue: multiprocessing.Queue,
@@ -303,6 +313,7 @@ async def _process_asynchronous_requests_loop(
             raise ValueError("Async worker called with max_concurrency < 1")
 
         while True:
+            logger.info("Awaiting request...")
             process_request = await self.get_request(
                 requests_queue=requests_queue,
             )
@@ -315,7 +326,6 @@ async def _process_asynchronous_requests_loop(
             )
 
             await pending.acquire()
-
             lock_acquired_at = time.time()
             logger.debug(
                 f"Lock acquired Process ID {process_id} ||"
@@ -341,6 +351,8 @@ def _task_done(_: asyncio.Task):
             task.add_done_callback(_task_done)
             await asyncio.sleep(0)  # enable start task immediately
 
+        logger.debug("Done processing asynchronous loop")
+
 
 class GenerativeRequestsWorkerDescription(WorkerDescription):
     type_: Literal["generative_requests_worker"] = "generative_requests_worker"  # type: ignore[assignment]