Merge branch 'main' into chore/extend-ci-matrix

bhimrazy · web-flow · commit c1e598413d81 · 2026-01-06T13:27:11.000+05:45
diff --git a/README.md b/README.md
@@ -1,12 +1,14 @@
 <div align='center'>
 
-<h2>
-  The framework to build custom inference engines with expert control.   
+<h1>
+  Build custom inference servers in pure Python
   <br/>
-  Engines for models, agents, MCP, multi-modal, RAG, and pipelines.
+</h1> 
+<h4>
+  Define exactly how inference works for models, agents, RAG, or pipelines. 
   <br/>
-  No MLOps. No YAML.
-</h2>    
+  Control batching, routing, streaming, and orchestration without MLOps glue or config files.
+</h4> 
 
 <img alt="Lightning" src="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/app-2/ls_banner2.png" width="800px" style="max-width: 100%;">
 
@@ -16,10 +18,10 @@
 <div align='center'>
   
 <pre>
-✅ Build your own inference engine ✅ 2× faster than FastAPI     ✅ Agents, RAG, pipelines, more
-✅ Custom logic + control          ✅ Any PyTorch model          ✅ Self-host or managed        
-✅ Multi-GPU autoscaling           ✅ Batching + streaming       ✅ BYO model or vLLM           
-✅ No MLOps glue code              ✅ Easy setup in Python       ✅ Serverless support          
+✅ Custom inference logic  ✅ 2× faster than FastAPI     ✅ Agents, RAG, pipelines, more
+✅ Custom logic + control  ✅ Any PyTorch model          ✅ Self-host or managed        
+✅ Multi-GPU autoscaling   ✅ Batching + streaming       ✅ BYO model or vLLM           
+✅ No MLOps glue code      ✅ Easy setup in Python       ✅ Serverless support          
 
 </pre>
 
@@ -54,22 +56,16 @@
 
 &nbsp; 
 
-# Looking for GPUs and an inference platform?
-Over 340,000 developers use [Lightning Cloud](https://lightning.ai/?utm_source=ptl_readme&utm_medium=referral&utm_campaign=ptl_readme) - purpose-built for PyTorch and PyTorch Lightning. 
-- [GPUs](https://lightning.ai/pricing?utm_source=ptl_readme&utm_medium=referral&utm_campaign=ptl_readme) from $0.19.   
-- [Clusters](https://lightning.ai/clusters?utm_source=ptl_readme&utm_medium=referral&utm_campaign=ptl_readme): frontier-grade training/inference clusters.   
-- [AI Studio (vibe train)](https://lightning.ai/studios?utm_source=ptl_readme&utm_medium=referral&utm_campaign=ptl_readme): workspaces where AI helps you debug, tune and vibe train.
-- [AI Studio (vibe deploy)](https://lightning.ai/studios?utm_source=ptl_readme&utm_medium=referral&utm_campaign=ptl_readme): workspaces where AI helps you optimize, and deploy models.     
-- [Notebooks](https://lightning.ai/notebooks?utm_source=ptl_readme&utm_medium=referral&utm_campaign=ptl_readme): Persistent GPU workspaces where AI helps you code and analyze.
-- [Inference](https://lightning.ai/deploy?utm_source=ptl_readme&utm_medium=referral&utm_campaign=ptl_readme): Deploy models as inference APIs.   
-
 # Why LitServe?
-LitServe lets you build your own inference engine. Serving engines such as vLLM serve specific model types (LLMs) with rigid abstractions. LitServe gives you the low-level control to serve any model (vision, audio, text, multi-modal), and define exactly how inference works - from batching, caching, streaming, and routing, to multi-model orchestration and custom logic. LitServe is perfect for building inference APIs, agents, chatbots, MCP servers, RAG, pipelines and more.
+Most serving tools (vLLM, etc..) are built for a single model type and enforce rigid abstractions. They work well until you need custom logic, multiple models, agents, or non standard pipelines. LitServe lets you write your own inference engine in Python. You define how requests are handled, how models are loaded, how batching and routing work, and how outputs are produced. LitServe handles performance, concurrency, scaling, and deployment. Use LitServe to build inference APIs, agents, chatbots, RAG systems, MCP servers, or multi model pipelines. 
 
-Self host LitServe or deploy in one-click to [Lightning AI](https://lightning.ai/litserve?utm_source=litserve_readme&utm_medium=referral&utm_campaign=litserve_readme).
+Run it locally, self host anywhere, or deploy with one click on [Lightning AI](https://lightning.ai/litserve?utm_source=litserve_readme&utm_medium=referral&utm_campaign=litserve_readme).
 
 &nbsp;
 
+# Want the easiest way to host inference?
+Over 380,000 developers use [Lightning Cloud](https://lightning.ai/?utm_source=ptl_readme&utm_medium=referral&utm_campaign=ptl_readme), the simplest way to run LitServe without managing infrastructure. Deploy with one command, get autoscaling GPUs, monitoring, and a free tier. No cloud setup required. Or self host anywhere.
+
 # Quick start
 
 Install LitServe via pip ([more options](https://lightning.ai/docs/litserve/home/install)):
diff --git a/src/litserve/cli.py b/src/litserve/cli.py
@@ -1,13 +1,33 @@
+import importlib.util
+import shutil
 import subprocess
 import sys
 
 from litserve.utils import is_package_installed
 
 
 def _ensure_lightning_installed():
-    if not is_package_installed("lightning_sdk"):
-        print("Lightning CLI not found. Installing...")
-        subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", "lightning-sdk"])
+    """Ensure lightning-sdk is installed, attempting auto-installation if needed."""
+    if is_package_installed("lightning_sdk"):
+        return
+
+    print("Lightning CLI not found. Installing lightning-sdk...")
+
+    # Build list of available installers (pip first as it respects the active environment)
+    installers = []
+    if importlib.util.find_spec("pip"):
+        installers.append([sys.executable, "-m", "pip"])
+    if shutil.which("uv"):
+        installers.append(["uv", "pip"])
+
+    for installer in installers:
+        try:
+            subprocess.run([*installer, "install", "-U", "lightning-sdk"], check=True)
+            return
+        except (subprocess.CalledProcessError, FileNotFoundError):
+            continue
+
+    sys.exit("Failed to install lightning-sdk. Run: pip install lightning-sdk")
 
 
 def main():
diff --git a/src/litserve/server.py b/src/litserve/server.py
@@ -1071,6 +1071,8 @@ def _register_spec_endpoints(self, lit_api: LitAPI):
         specs = [lit_api.spec] if lit_api.spec else []
         for spec in specs:
             spec: LitSpec
+            # Set the server reference for callback triggering in spec endpoints
+            spec._server = self
             # TODO check that path is not clashing
             for path, endpoint, methods in spec.endpoints:
                 self.app.add_api_route(
diff --git a/src/litserve/specs/base.py b/src/litserve/specs/base.py
@@ -31,6 +31,12 @@ def __init__(self):
         self.request_queue = None
         self.response_queue_id = None
 
+    def __getstate__(self):
+        """Exclude _server from pickling as it contains unpickleable objects."""
+        state = self.__dict__.copy()
+        state["_server"] = None
+        return state
+
     @property
     def stream(self):
         return False
diff --git a/src/litserve/specs/openai.py b/src/litserve/specs/openai.py
@@ -28,6 +28,7 @@
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel, Field
 
+from litserve.callbacks.base import EventTypes
 from litserve.constants import _DEFAULT_LIT_API_PATH
 from litserve.specs.base import LitSpec, _AsyncSpecWrapper
 from litserve.utils import LitAPIStatus, ResponseBufferItem, azip
@@ -502,6 +503,14 @@ async def chat_completion(self, request: ChatCompletionRequest, background_tasks
         uids = [uuid.uuid4() for _ in range(request.n)]
         self.queues = []
         self.events = []
+
+        # Trigger callback
+        self._server._callback_runner.trigger_event(
+            EventTypes.ON_REQUEST.value,
+            active_requests=self._server.active_requests,
+            litserver=self._server,
+        )
+
         for uid in uids:
             request_el = request.model_copy()
             request_el.n = 1
diff --git a/src/litserve/specs/openai_embedding.py b/src/litserve/specs/openai_embedding.py
@@ -24,6 +24,7 @@
 from fastapi import status as status_code
 from pydantic import BaseModel
 
+from litserve.callbacks.base import EventTypes
 from litserve.constants import _DEFAULT_LIT_API_PATH
 from litserve.specs.base import LitSpec
 from litserve.utils import LitAPIStatus, ResponseBufferItem
@@ -261,6 +262,13 @@ async def embeddings_endpoint(self, request: EmbeddingRequest) -> EmbeddingRespo
         event = asyncio.Event()
         self.response_buffer[uid] = ResponseBufferItem(event=event)
 
+        # Trigger callback
+        self._server._callback_runner.trigger_event(
+            EventTypes.ON_REQUEST.value,
+            active_requests=self._server.active_requests,
+            litserver=self._server,
+        )
+
         self.request_queue.put_nowait((response_queue_id, uid, time.monotonic(), request.model_copy()))
         await event.wait()
 
diff --git a/tests/unit/test_callbacks.py b/tests/unit/test_callbacks.py
@@ -80,3 +80,45 @@ async def test_request_tracker(capfd):
     await run_simple_request(server, 4)
     captured = capfd.readouterr()
     assert "Active requests: 4" in captured.out, f"Expected pattern not found in output: {captured.out}"
+
+
+@pytest.mark.asyncio
+async def test_request_tracker_with_spec(capfd):
+    from litserve.specs.openai_embedding import OpenAIEmbeddingSpec
+    from litserve.test_examples.openai_embedding_spec_example import TestEmbedAPI
+
+    lit_api = TestEmbedAPI(spec=OpenAIEmbeddingSpec())
+    server = ls.LitServer(lit_api, track_requests=True, callbacks=[RequestTracker()])
+
+    with wrap_litserve_start(server) as server:
+        async with (
+            LifespanManager(server.app) as manager,
+            AsyncClient(transport=ASGITransport(app=manager.app), base_url="http://test") as ac,
+        ):
+            resp = await ac.post("/v1/embeddings", json={"input": "test", "model": "test"})
+            assert resp.status_code == 200
+
+    captured = capfd.readouterr()
+    assert "Active requests: 1" in captured.out, f"Expected pattern not found in output: {captured.out}"
+
+
+@pytest.mark.asyncio
+async def test_request_tracker_with_openai_spec(capfd):
+    from litserve.specs.openai import OpenAISpec
+    from litserve.test_examples.openai_spec_example import TestAPI
+
+    lit_api = TestAPI(spec=OpenAISpec())
+    server = ls.LitServer(lit_api, track_requests=True, callbacks=[RequestTracker()])
+
+    with wrap_litserve_start(server) as server:
+        async with (
+            LifespanManager(server.app) as manager,
+            AsyncClient(transport=ASGITransport(app=manager.app), base_url="http://test") as ac,
+        ):
+            resp = await ac.post(
+                "/v1/chat/completions", json={"messages": [{"role": "user", "content": "test"}], "model": "test"}
+            )
+            assert resp.status_code == 200
+
+    captured = capfd.readouterr()
+    assert "Active requests: 1" in captured.out, f"Expected pattern not found in output: {captured.out}"
diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py
@@ -1,4 +1,5 @@
 import os
+import subprocess
 import sys
 from unittest.mock import MagicMock, patch
 
@@ -34,19 +35,98 @@ def test_dockerize_command(monkeypatch, capsys):
 
 
 @patch("litserve.cli.is_package_installed")
-@patch("subprocess.check_call")
-def test_ensure_lightning_installed(mock_check_call, mock_is_package_installed):
+@patch("litserve.cli.importlib.util.find_spec")
+@patch("litserve.cli.shutil.which")
+@patch("subprocess.run")
+def test_ensure_lightning_installed_with_pip(mock_run, mock_which, mock_find_spec, mock_is_package_installed):
     mock_is_package_installed.return_value = False
+    mock_find_spec.return_value = True  # pip available
+    mock_which.return_value = None  # uv not available
     _ensure_lightning_installed()
-    mock_check_call.assert_called_once_with([sys.executable, "-m", "pip", "install", "-U", "lightning-sdk"])
+    mock_run.assert_called_once_with([sys.executable, "-m", "pip", "install", "-U", "lightning-sdk"], check=True)
+
+
+@patch("litserve.cli.is_package_installed")
+@patch("litserve.cli.importlib.util.find_spec")
+@patch("litserve.cli.shutil.which")
+@patch("subprocess.run")
+def test_ensure_lightning_installed_pip_preferred(mock_run, mock_which, mock_find_spec, mock_is_package_installed):
+    """When both pip and uv are available, pip should be used first."""
+    mock_is_package_installed.return_value = False
+    mock_find_spec.return_value = True  # pip available
+    mock_which.return_value = "/usr/bin/uv"  # uv also available
+    _ensure_lightning_installed()
+    mock_run.assert_called_once_with([sys.executable, "-m", "pip", "install", "-U", "lightning-sdk"], check=True)
+
+
+@patch("litserve.cli.is_package_installed")
+@patch("litserve.cli.importlib.util.find_spec")
+@patch("litserve.cli.shutil.which")
+@patch("subprocess.run")
+def test_ensure_lightning_installed_with_uv(mock_run, mock_which, mock_find_spec, mock_is_package_installed):
+    mock_is_package_installed.return_value = False
+    mock_find_spec.return_value = None  # pip not available
+    mock_which.return_value = "/usr/bin/uv"  # uv available
+    _ensure_lightning_installed()
+    mock_run.assert_called_once_with(["uv", "pip", "install", "-U", "lightning-sdk"], check=True)
+
+
+@patch("litserve.cli.is_package_installed")
+@patch("litserve.cli.importlib.util.find_spec")
+@patch("litserve.cli.shutil.which")
+@patch("subprocess.run")
+def test_ensure_lightning_installed_fallback_to_uv(mock_run, mock_which, mock_find_spec, mock_is_package_installed):
+    """When pip fails, should fall back to uv."""
+    mock_is_package_installed.return_value = False
+    mock_find_spec.return_value = True  # pip available
+    mock_which.return_value = "/usr/bin/uv"  # uv also available
+    mock_run.side_effect = [subprocess.CalledProcessError(1, "pip"), None]  # pip fails, uv succeeds
+    _ensure_lightning_installed()
+    assert mock_run.call_count == 2
+    mock_run.assert_called_with(["uv", "pip", "install", "-U", "lightning-sdk"], check=True)
+
+
+@patch("litserve.cli.is_package_installed")
+@patch("litserve.cli.importlib.util.find_spec")
+@patch("litserve.cli.shutil.which")
+@patch("subprocess.run")
+def test_ensure_lightning_installed_failure(mock_run, mock_which, mock_find_spec, mock_is_package_installed):
+    """When all available installers fail, should exit with error."""
+    mock_is_package_installed.return_value = False
+    mock_find_spec.return_value = True  # pip available
+    mock_which.return_value = "/usr/bin/uv"  # uv also available
+    mock_run.side_effect = subprocess.CalledProcessError(1, "install")  # both fail
+
+    with pytest.raises(SystemExit, match="Failed to install lightning-sdk"):
+        _ensure_lightning_installed()
+    assert mock_run.call_count == 2  # tried both pip and uv
+
+
+@patch("litserve.cli.is_package_installed")
+@patch("litserve.cli.importlib.util.find_spec")
+@patch("litserve.cli.shutil.which")
+@patch("subprocess.run")
+def test_ensure_lightning_installed_no_installer_available(
+    mock_run, mock_which, mock_find_spec, mock_is_package_installed
+):
+    """When neither pip nor uv is available, should exit with error."""
+    mock_is_package_installed.return_value = False
+    mock_find_spec.return_value = None  # pip not available
+    mock_which.return_value = None  # uv not available
+
+    with pytest.raises(SystemExit, match="Failed to install lightning-sdk"):
+        _ensure_lightning_installed()
+    mock_run.assert_not_called()  # no installer was tried
 
 
 # TODO: Remove this once we have a fix for Python 3.10
 @pytest.mark.skipif(sys.version_info[:2] in [(3, 10)], reason="Test fails on Python 3.10")
 @patch("litserve.cli.is_package_installed")
-@patch("subprocess.check_call")
+@patch("litserve.cli.importlib.util.find_spec")
+@patch("litserve.cli.shutil.which")
+@patch("subprocess.run")
 @patch("builtins.__import__")
-def test_cli_main_lightning_not_installed(mock_import, mock_check_call, mock_is_package_installed):
+def test_cli_main_lightning_not_installed(mock_import, mock_run, mock_which, mock_find_spec, mock_is_package_installed):
     # Create a mock for the lightning_sdk module and its components
     mock_lightning_sdk = MagicMock()
     mock_lightning_sdk.cli.entrypoint.main_cli = MagicMock()
@@ -58,6 +138,8 @@ def side_effect(name, *args, **kwargs):
         return __import__(name, *args, **kwargs)
 
     mock_import.side_effect = side_effect
+    mock_find_spec.return_value = True  # pip available
+    mock_which.return_value = None  # uv not available
 
     # Test when lightning_sdk is not installed but gets installed dynamically
     mock_is_package_installed.side_effect = [False, True]  # First call returns False, second call returns True
@@ -66,7 +148,7 @@ def side_effect(name, *args, **kwargs):
     with patch.object(sys, "argv", test_args):
         cli_main()
 
-    mock_check_call.assert_called_once_with([sys.executable, "-m", "pip", "install", "-U", "lightning-sdk"])
+    mock_run.assert_called_once_with([sys.executable, "-m", "pip", "install", "-U", "lightning-sdk"], check=True)
 
 
 @pytest.mark.skipif(sys.version_info[:2] in [(3, 10)], reason="Test fails on Python 3.10")