Improve developer experience (#519)

aniketmaurya · web-flow · commit 145daeab719a · 2025-05-29T17:02:27.000+01:00
* Enhance LitAPI and LitServer initialization documentation

- Updated docstrings for LitAPI and LitServer constructors to provide detailed descriptions of parameters and their usage.
- Improved clarity on deprecated parameters and their replacements, ensuring better guidance for users.
- Added examples for instantiating LitAPI and LitServer, demonstrating various configurations for enhanced usability.

* bumo

* fix tests
diff --git a/src/litserve/__about__.py b/src/litserve/__about__.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "0.2.11a2"
+__version__ = "0.2.11"
 __author__ = "Lightning-AI et al."
 __author_email__ = "community@lightning.ai"
 __license__ = "Apache-2.0"
diff --git a/src/litserve/api.py b/src/litserve/api.py
@@ -45,16 +45,52 @@ def __init__(
         spec: Optional[LitSpec] = None,
         enable_async: bool = False,
     ):
-        """Initialize a LitAPI instance.
+        """Initialize a LitAPI instance that defines the model's inference behavior.
 
         Args:
-            max_batch_size: Maximum number of requests to process in a batch.
-            batch_timeout: Maximum time to wait for a batch to fill before processing.
-            api_path: URL path for the prediction endpoint.
-            stream: Whether to enable streaming responses.
-            loop: Inference loop to use, or 'auto' to select based on settings.
-            spec: Specification for the API, such as OpenAISpec or custom specs.
-            enable_async: Enable async support.
+            max_batch_size (int, optional):
+                Maximum requests to batch together for inference. Higher values improve throughput
+                for models that benefit from batching but use more memory. Defaults to 1.
+
+            batch_timeout (float, optional):
+                Maximum seconds to wait for a batch to fill before processing incomplete batches.
+                Lower values reduce latency, higher values improve batching efficiency. Defaults to 0.0.
+
+            api_path (str, optional):
+                URL endpoint path for predictions (e.g., "/predict", "/v1/chat"). Defaults to "/predict".
+
+            stream (bool, optional):
+                Enable streaming responses for real-time output (useful for LLMs, long-running tasks).
+                Requires implementing encode_response() for streaming. Defaults to False.
+
+            loop (Union[str, LitLoop], optional):
+                Inference loop strategy. "auto" selects optimal loop based on batching/streaming settings,
+                or provide custom LitLoop instance for advanced control. Defaults to "auto".
+
+            spec (LitSpec, optional):
+                API specification defining input/output schemas and behavior. Use OpenAISpec for
+                OpenAI-compatible APIs or custom LitSpec implementations. Defaults to None.
+
+            enable_async (bool, optional):
+                Enable async/await support for non-blocking operations in predict() method.
+                Useful for I/O-bound inference or external API calls. Defaults to False.
+
+        Example:
+            >>> # Simple API
+            >>> api = LitAPI()
+
+            >>> # Batched inference
+            >>> api = LitAPI(max_batch_size=8, batch_timeout=0.1)
+
+            >>> # OpenAI-compatible API
+            >>> api = LitAPI(spec=OpenAISpec())
+
+            >>> # Async processing
+            >>> api = LitAPI(enable_async=True)
+
+        Note:
+            You must implement setup(), predict(), and optionally decode_request()/encode_response()
+            methods to define your model's behavior.
 
         """
 
diff --git a/src/litserve/python_client.py b/src/litserve/python_client.py
@@ -1,16 +1,6 @@
-client_template = """# Copyright The Lightning AI team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+client_template = """# This file is auto-generated by LitServe.
+# Disable auto-generation by setting `generate_client_file=False` in `LitServer.run()`.
+
 import requests
 
 response = requests.post("http://127.0.0.1:{PORT}/predict", json={{"input": 4.0}})
diff --git a/src/litserve/server.py b/src/litserve/server.py
@@ -30,7 +30,7 @@
 from multiprocessing.context import Process
 from queue import Queue
 from threading import Thread
-from typing import Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union
+from typing import Callable, Dict, Iterable, List, Literal, Optional, Sequence, Tuple, Union
 
 import uvicorn
 import uvicorn.server
@@ -50,7 +50,7 @@
 from litserve.specs.base import LitSpec
 from litserve.transport.base import MessageTransport
 from litserve.transport.factory import TransportConfig, create_transport_from_config
-from litserve.utils import LitAPIStatus, LoopResponseType, WorkerSetupStatus, call_after_stream
+from litserve.utils import LitAPIStatus, LoopResponseType, WorkerSetupStatus, call_after_stream, configure_logging
 
 mp.allow_connection_pickling()
 
@@ -371,29 +371,66 @@ def __init__(
         api_path: Optional[str] = None,
         loop: Optional[Union[str, LitLoop]] = None,
     ):
-        """Initialize a LitServer instance.
+        """Initialize a LitServer instance for high-performance model inference.
 
         Args:
-            lit_api: The API instance that handles requests and responses.
-            accelerator: Type of hardware to use, like 'cpu', 'cuda', or 'mps'. 'auto' selects the best available.
-            devices: Number of devices to use, or 'auto' to select automatically.
-            workers_per_device: Number of worker processes per device.
-            max_batch_size: Deprecated. Use `lit_api.max_batch_size` instead.
-            batch_timeout: Deprecated. Use `lit_api.batch_timeout` instead.
-            timeout: Maximum time to wait for a request to complete. Set to False for no timeout.
-            api_path: Deprecated. Use `LitAPI(api_path=...)` instead.
-            healthcheck_path: URL path for the health check endpoint.
-            info_path: URL path for the server and model information endpoint.
-            model_metadata: Metadata about the model, shown at the info endpoint.
-            stream: Whether to enable streaming responses.
-            spec: Specification for the API, such as OpenAISpec or custom specs.
-            max_payload_size: Maximum size of request payloads.
-            track_requests: Whether to track the number of active requests.
-            loop: Inference loop to use, or 'auto' to select based on settings.
-            callbacks: List of callback classes to execute at various stages.
-            middlewares: List of middleware classes to apply to the server.
-            loggers: List of loggers to use for recording server activity.
-            fast_queue: Whether to use ZeroMQ for faster response handling.
+            lit_api (Union[LitAPI, List[LitAPI]]):
+                API instance(s) defining model inference logic. Single instance or list for multi-model serving.
+
+            accelerator (str, optional):
+                Hardware type: 'cpu', 'cuda', 'mps', or 'auto' (detects best available). Defaults to 'auto'.
+
+            devices (Union[int, str], optional):
+                Number of devices to use, or 'auto' for all available. Defaults to 'auto'.
+
+            workers_per_device (int, optional):
+                Worker processes per device. Higher values improve throughput but use more memory. Defaults to 1.
+
+            timeout (Union[float, bool], optional):
+                Request timeout in seconds, or False to disable. Defaults to 30.
+
+            healthcheck_path (str, optional):
+                Health check endpoint path for load balancers. Defaults to "/health".
+
+            info_path (str, optional):
+                Server info endpoint path showing metadata and configuration. Defaults to "/info".
+
+            model_metadata (dict, optional):
+                Model metadata displayed at info endpoint (e.g., {"version": "1.0"}). Defaults to None.
+
+            max_payload_size (Union[int, str], optional):
+                Maximum request size as bytes or string ("10MB"). Defaults to "100MB".
+
+            track_requests (bool, optional):
+                Enable request tracking for monitoring. Recommended for production. Defaults to False.
+
+            callbacks (List[Callback], optional):
+                Callback instances for lifecycle events (logging, metrics). Defaults to None.
+
+            middlewares (List[Middleware], optional):
+                HTTP middleware for auth, CORS, rate limiting, etc. Defaults to None.
+
+            loggers (List[Logger], optional):
+                Custom loggers for server activity. Defaults to standard logging.
+
+            fast_queue (bool, optional):
+                Enable ZeroMQ for high-throughput (>100 RPS). Requires ZeroMQ installation. Defaults to False.
+
+            max_batch_size, batch_timeout, stream, spec, api_path, loop:
+                **Deprecated**: Configure these in your LitAPI implementation instead.
+
+        Example:
+            >>> # Basic
+            >>> server = LitServer(MyLitAPI())
+
+            >>> # Production
+            >>> server = LitServer(
+            ...     lit_api=MyLitAPI(max_batch_size=4),
+            ...     accelerator="cuda",
+            ...     devices=2,
+            ...     fast_queue=True,
+            ...     track_requests=True
+            ... )
 
         """
         if max_batch_size is not None:
@@ -754,9 +791,55 @@ def run(
         num_api_servers: Optional[int] = None,
         log_level: str = "info",
         generate_client_file: bool = True,
-        api_server_worker_type: Optional[str] = None,
+        api_server_worker_type: Literal["process", "thread"] = "process",
+        pretty_logs: bool = False,
         **kwargs,
     ):
+        """Run the LitServe server to handle API requests and distribute them to inference workers.
+
+        Args:
+            host (str, optional):
+                Host address to bind to. "0.0.0.0" for all IPs, "127.0.0.1" for localhost only. Defaults to "0.0.0.0".
+
+            port (Union[str, int], optional):
+                Port number to bind to. Must be available. Defaults to 8000.
+
+            num_api_servers (Optional[int], optional):
+                Number of uvicorn server instances for parallel API handling. Higher values improve
+                throughput but use more resources. Defaults to None (single instance).
+
+            log_level (str, optional):
+                Logging level: "critical", "error", "warning", "info", "debug", "trace".
+                Use "debug" for development. Defaults to "info".
+
+            generate_client_file (bool, optional):
+                Auto-generate Python client file with typed methods for API interaction. Defaults to True.
+
+            api_server_worker_type (Literal["process", "thread"], optional):
+                Worker type. "process" for better isolation/CPU usage, "thread" for less memory. Defaults to "process".
+
+            pretty_logs (bool, optional):
+                Enhanced log formatting with colors using rich library. Good for development. Defaults to False.
+
+            **kwargs:
+                Additional uvicorn server options (ssl_keyfile, ssl_certfile, etc.). See uvicorn docs.
+
+        Example:
+            >>> server.run()  # Basic
+
+            >>> server.run(  # Production
+            ...     port=8080,
+            ...     num_api_servers=4,
+            ...     log_level="warning"
+            ... )
+
+            >>> server.run(  # Development
+            ...     log_level="debug",
+            ...     pretty_logs=True,
+            ...     generate_client_file=True
+            ... )
+
+        """
         if generate_client_file:
             LitServer.generate_client_file(port=port)
 
@@ -773,6 +856,7 @@ def run(
         if host not in ["0.0.0.0", "127.0.0.1", "::"]:
             raise ValueError(host_msg)
 
+        configure_logging(log_level, use_rich=pretty_logs)
         config = uvicorn.Config(app=self.app, host=host, port=port, log_level=log_level, **kwargs)
         sockets = [config.bind_socket()]
 
diff --git a/src/litserve/utils.py b/src/litserve/utils.py
@@ -21,7 +21,7 @@
 import uuid
 from contextlib import contextmanager
 from enum import Enum
-from typing import TYPE_CHECKING, Any, AsyncIterator
+from typing import TYPE_CHECKING, Any, AsyncIterator, TextIO, Union
 
 from fastapi import HTTPException
 
@@ -118,17 +118,29 @@ def _get_default_handler(stream, format):
 
 
 def configure_logging(
-    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", stream=sys.stdout, use_rich=False
+    level: Union[str, int] = logging.INFO,
+    format: str = "%(asctime)s - %(processName)s[%(process)d] - %(name)s - %(levelname)s - %(message)s",
+    stream: TextIO = sys.stdout,
+    use_rich: bool = False,
 ):
     """Configure logging for the entire library with sensible defaults.
 
     Args:
         level (int): Logging level (default: logging.INFO)
         format (str): Log message format string
         stream (file-like): Output stream for logs
-        use_rich (bool): Whether to use rich for logging
+        use_rich (bool): Makes the logs more readable by using rich, useful for debugging. Defaults to False.
 
     """
+    if isinstance(level, str):
+        level = level.upper()
+        level = getattr(logging, level)
+
+    # Clear any existing handlers to prevent duplicates
+    library_logger = logging.getLogger("litserve")
+    for handler in library_logger.handlers[:]:
+        library_logger.removeHandler(handler)
+
     if use_rich:
         try:
             from rich.logging import RichHandler
@@ -139,16 +151,12 @@ def configure_logging(
         except ImportError:
             logger.warning("Rich is not installed, using default logging")
             handler = _get_default_handler(stream, format)
-
     else:
         handler = _get_default_handler(stream, format)
 
-    # Configure root library logger
-    library_logger = logging.getLogger("litserve")
+    # Configure library logger
     library_logger.setLevel(level)
     library_logger.addHandler(handler)
-
-    # Prevent propagation to root logger to avoid duplicate logs
     library_logger.propagate = False