Lightning-AI
diff --git a/‎src/litserve/loops/base.py‎
Lines changed: 19 additions & 15 deletions b/‎src/litserve/loops/base.py‎
Lines changed: 19 additions & 15 deletions
diff --git a/‎src/litserve/loops/loops.py‎
Lines changed: 4 additions & 14 deletions b/‎src/litserve/loops/loops.py‎
Lines changed: 4 additions & 14 deletions
diff --git a/‎src/litserve/loops/simple_loops.py‎
Lines changed: 14 additions & 128 deletions b/‎src/litserve/loops/simple_loops.py‎
Lines changed: 14 additions & 128 deletions
@@ -14,19 +14,20 @@
 import asyncio
 import inspect
 import logging
+import signal
 import sys
 import time
 from abc import ABC
 from queue import Empty, Queue
 from typing import Any, Dict, List, Optional, Tuple, Union
 
-import zmq
 from starlette.formparsers import MultiPartParser
 
 from litserve import LitAPI
 from litserve.callbacks import CallbackRunner
 from litserve.specs.base import LitSpec
 from litserve.utils import LitAPIStatus
+from litserve.zmq_queue import Producer
 
 logger = logging.getLogger(__name__)
 # FastAPI writes form files to disk over 1MB by default, which prevents serialization by multiprocessing
@@ -129,9 +130,6 @@ def run(
 
     """
 
-    zmq_ctx: Optional[zmq.Context] = None
-    socket: Optional[zmq.Socket] = None
-
     def pre_setup(self, lit_api: LitAPI, spec: Optional[LitSpec]):
         pass
 
@@ -159,9 +157,7 @@ def __call__(
         stream: bool,
         workers_setup_status: Dict[int, str],
         callback_runner: CallbackRunner,
-        socket: Optional[zmq.Socket],
     ):
-        self.socket = socket
         if asyncio.iscoroutinefunction(self.run):
             event_loop = asyncio.new_event_loop()
 
@@ -226,7 +222,9 @@ def run(
 
 class LitLoop(_BaseLoop):
     def __init__(self):
+        self.producer: Optional[Producer] = None
         self._context = {}
+        self._setup_signal_handlers()
 
     def get_batch_requests(self, lit_api: LitAPI, request_queue: Queue, max_batch_size: int, batch_timeout: float):
         batches, timed_out_uids = collate_requests(
@@ -250,23 +248,29 @@ def populate_context(self, lit_spec: LitSpec, request: Any):
     def put_response(
         self, response_queues: List[Queue], response_queue_id: int, uid: str, response_data: Any, status: LitAPIStatus
     ) -> None:
-        if self.socket:
-            self.socket.send_pyobj((uid, (response_data, status)))
+        if self.producer:
+            self.producer.put((uid, (response_data, status)), consumer_id=response_queue_id)
         else:
             response_queues[response_queue_id].put((uid, (response_data, status)), block=False)
 
     def put_error_response(
         self, response_queues: List[Queue], response_queue_id: int, uid: str, error: Exception
     ) -> None:
-        if self.socket:
-            self.socket.send_pyobj((uid, (error, LitAPIStatus.ERROR)))
-        else:
-            response_queues[response_queue_id].put((uid, (error, LitAPIStatus.ERROR)), block=False)
+        self.put_response(response_queues, response_queue_id, uid, error, LitAPIStatus.ERROR)
 
     def __del__(self):
-        if self.socket:
-            self.socket.close(linger=0)
-            self.zmq_ctx.term()
+        if self.producer:
+            self.producer.close()
+
+    def _setup_signal_handlers(self):
+        def cleanup_handler(signum=None, frame=None):
+            logging.debug("Worker process received shutdown signal")
+            if self.producer:
+                self.producer.close()
+            sys.exit(0)
+
+        signal.signal(signal.SIGINT, cleanup_handler)
+        signal.signal(signal.SIGTERM, cleanup_handler)
 
 
 class DefaultLoop(LitLoop):
 
@@ -15,16 +15,14 @@
 from queue import Queue
 from typing import Dict, List, Optional, Union
 
-import zmq
-import zmq.asyncio
-
 from litserve import LitAPI
 from litserve.callbacks import CallbackRunner, EventTypes
 from litserve.loops.base import _BaseLoop
 from litserve.loops.simple_loops import BatchedLoop, SingleLoop
 from litserve.loops.streaming_loops import BatchedStreamingLoop, StreamingLoop
 from litserve.specs.base import LitSpec
 from litserve.utils import WorkerSetupStatus
+from litserve.zmq_queue import Producer
 
 logger = logging.getLogger(__name__)
 
@@ -78,14 +76,10 @@ def inference_worker(
     if loop == "auto":
         loop = get_default_loop(stream, max_batch_size)
 
-    socket = None
     if use_zmq:
-        ctx = zmq.Context()
-        socket = ctx.socket(zmq.PUB)
-        logger.debug(f"Inference worker binding to {zmq_addr}")
-        socket.bind(zmq_addr)
-        loop.socket = socket
-        loop.zmq_context = ctx
+        producer = Producer(address=zmq_addr)
+        producer.wait_for_subscribers(timeout=5)
+        loop.producer = producer
 
     loop(
         lit_api,
@@ -99,8 +93,4 @@ def inference_worker(
         stream,
         workers_setup_status,
         callback_runner,
-        socket,
     )
-    if use_zmq:
-        socket.close()
-        loop.zmq_context.term()
@@ -16,7 +16,6 @@
 from queue import Empty, Queue
 from typing import Dict, List, Optional
 
-import zmq
 from fastapi import HTTPException
 
 from litserve import LitAPI
@@ -28,116 +27,14 @@
 logger = logging.getLogger(__name__)
 
 
-def run_batched_loop(
-    lit_api: LitAPI,
-    lit_spec: LitSpec,
-    request_queue: Queue,
-    response_queues: List[Queue],
-    max_batch_size: int,
-    batch_timeout: float,
-    callback_runner: CallbackRunner,
-    socket: Optional[zmq.Socket],
-):
-    while True:
-        batches, timed_out_uids = collate_requests(
-            lit_api,
-            request_queue,
-            max_batch_size,
-            batch_timeout,
-        )
-
-        for response_queue_id, uid in timed_out_uids:
-            logger.error(
-                f"Request {uid} was waiting in the queue for too long ({lit_api.request_timeout} seconds) and "
-                "has been timed out. "
-                "You can adjust the timeout by providing the `timeout` argument to LitServe(..., timeout=30)."
-            )
-            if socket:
-                socket.send_pyobj((uid, (HTTPException(504, "Request timed out"), LitAPIStatus.ERROR)))
-            else:
-                response_queues[response_queue_id].put((
-                    uid,
-                    (HTTPException(504, "Request timed out"), LitAPIStatus.ERROR),
-                ))
-
-        if not batches:
-            continue
-        logger.debug(f"{len(batches)} batched requests received")
-        response_queue_ids, uids, inputs = zip(*batches)
-        num_inputs = len(inputs)
-        try:
-            contexts = [{}] * num_inputs
-            if hasattr(lit_spec, "populate_context"):
-                for input, context in zip(inputs, contexts):
-                    lit_spec.populate_context(context, input)
-
-            callback_runner.trigger_event(EventTypes.BEFORE_DECODE_REQUEST, lit_api=lit_api)
-            x = [
-                _inject_context(
-                    context,
-                    lit_api.decode_request,
-                    input,
-                )
-                for input, context in zip(inputs, contexts)
-            ]
-            callback_runner.trigger_event(EventTypes.AFTER_DECODE_REQUEST, lit_api=lit_api)
-
-            x = lit_api.batch(x)
-
-            callback_runner.trigger_event(EventTypes.BEFORE_PREDICT, lit_api=lit_api)
-            y = _inject_context(contexts, lit_api.predict, x)
-            callback_runner.trigger_event(EventTypes.AFTER_PREDICT, lit_api=lit_api)
-
-            outputs = lit_api.unbatch(y)
-
-            if len(outputs) != num_inputs:
-                logger.error(
-                    "LitAPI.predict/unbatch returned {len(outputs)} outputs, but expected {num_inputs}. "
-                    "Please check the predict/unbatch method of the LitAPI implementation."
-                )
-                raise HTTPException(500, "Batch size mismatch")
-
-            callback_runner.trigger_event(EventTypes.BEFORE_ENCODE_RESPONSE, lit_api=lit_api)
-            y_enc_list = []
-            for response_queue_id, y, uid, context in zip(response_queue_ids, outputs, uids, contexts):
-                y_enc = _inject_context(context, lit_api.encode_response, y)
-                y_enc_list.append((response_queue_id, uid, y_enc))
-            callback_runner.trigger_event(EventTypes.AFTER_ENCODE_RESPONSE, lit_api=lit_api)
-
-            for response_queue_id, uid, y_enc in y_enc_list:
-                response_queues[response_queue_id].put((uid, (y_enc, LitAPIStatus.OK)))
-
-        except HTTPException as e:
-            for response_queue_id, uid in zip(response_queue_ids, uids):
-                if socket:
-                    socket.send_pyobj((uid, (PickleableHTTPException.from_exception(e), LitAPIStatus.ERROR)))
-                else:
-                    response_queues[response_queue_id].put((
-                        uid,
-                        (PickleableHTTPException.from_exception(e), LitAPIStatus.ERROR),
-                    ))
-
-        except Exception as e:
-            logger.exception(
-                "LitAPI ran into an error while processing the batched request.\n"
-                "Please check the error trace for more details."
-            )
-            for response_queue_id, uid in zip(response_queue_ids, uids):
-                if socket:
-                    socket.send_pyobj((uid, (e, LitAPIStatus.ERROR)))
-                else:
-                    response_queues[response_queue_id].put((uid, (e, LitAPIStatus.ERROR)))
-
-
 class SingleLoop(DefaultLoop):
     def run_single_loop(
         self,
         lit_api: LitAPI,
-        lit_spec: LitSpec,
+        lit_spec: Optional[LitSpec],
         request_queue: Queue,
         response_queues: List[Queue],
         callback_runner: CallbackRunner,
-        socket: Optional[zmq.Socket],
     ):
         while True:
             try:
@@ -233,9 +130,8 @@ def __call__(
         stream: bool,
         workers_setup_status: Dict[int, str],
         callback_runner: CallbackRunner,
-        socket: Optional[zmq.Socket],
     ):
-        self.run_single_loop(lit_api, lit_spec, request_queue, response_queues, callback_runner, socket)
+        self.run_single_loop(lit_api, lit_spec, request_queue, response_queues, callback_runner)
 
 
 class BatchedLoop(DefaultLoop):
@@ -248,7 +144,6 @@ def run_batched_loop(
         max_batch_size: int,
         batch_timeout: float,
         callback_runner: CallbackRunner,
-        socket: Optional[zmq.Socket],
     ):
         while True:
             batches, timed_out_uids = collate_requests(
@@ -264,13 +159,9 @@ def run_batched_loop(
                     "has been timed out. "
                     "You can adjust the timeout by providing the `timeout` argument to LitServe(..., timeout=30)."
                 )
-                if socket:
-                    socket.send_pyobj((uid, (HTTPException(504, "Request timed out"), LitAPIStatus.ERROR)))
-                else:
-                    response_queues[response_queue_id].put((
-                        uid,
-                        (HTTPException(504, "Request timed out"), LitAPIStatus.ERROR),
-                    ))
+                self.put_response(
+                    response_queues, response_queue_id, uid, HTTPException(504, "Request timed out"), LitAPIStatus.ERROR
+                )
 
             if not batches:
                 continue
@@ -317,28 +208,25 @@ def run_batched_loop(
                 callback_runner.trigger_event(EventTypes.AFTER_ENCODE_RESPONSE, lit_api=lit_api)
 
                 for response_queue_id, uid, y_enc in y_enc_list:
-                    response_queues[response_queue_id].put((uid, (y_enc, LitAPIStatus.OK)))
+                    self.put_response(response_queues, response_queue_id, uid, y_enc, LitAPIStatus.OK)
 
             except HTTPException as e:
                 for response_queue_id, uid in zip(response_queue_ids, uids):
-                    if socket:
-                        socket.send_pyobj((uid, (PickleableHTTPException.from_exception(e), LitAPIStatus.ERROR)))
-                    else:
-                        response_queues[response_queue_id].put((
-                            uid,
-                            (PickleableHTTPException.from_exception(e), LitAPIStatus.ERROR),
-                        ))
+                    self.put_response(
+                        response_queues,
+                        response_queue_id,
+                        uid,
+                        PickleableHTTPException.from_exception(e),
+                        LitAPIStatus.ERROR,
+                    )
 
             except Exception as e:
                 logger.exception(
                     "LitAPI ran into an error while processing the batched request.\n"
                     "Please check the error trace for more details."
                 )
                 for response_queue_id, uid in zip(response_queue_ids, uids):
-                    if socket:
-                        socket.send_pyobj((uid, (e, LitAPIStatus.ERROR)))
-                    else:
-                        response_queues[response_queue_id].put((uid, (e, LitAPIStatus.ERROR)))
+                    self.put_response(response_queues, response_queue_id, uid, e, LitAPIStatus.ERROR)
 
     def __call__(
         self,
@@ -353,7 +241,6 @@ def __call__(
         stream: bool,
         workers_setup_status: Dict[int, str],
         callback_runner: CallbackRunner,
-        socket: Optional[zmq.Socket],
     ):
         self.run_batched_loop(
             lit_api,
@@ -363,5 +250,4 @@ def __call__(
             max_batch_size,
             batch_timeout,
             callback_runner,
-            socket,
         )