Skip to content

Commit 145daea

Browse files
authored
Improve developer experience (#519)
* Enhance LitAPI and LitServer initialization documentation - Updated docstrings for LitAPI and LitServer constructors to provide detailed descriptions of parameters and their usage. - Improved clarity on deprecated parameters and their replacements, ensuring better guidance for users. - Added examples for instantiating LitAPI and LitServer, demonstrating various configurations for enhanced usability. * bumo * fix tests
1 parent 4ac9010 commit 145daea

File tree

5 files changed

+172
-54
lines changed

5 files changed

+172
-54
lines changed

src/litserve/__about__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
__version__ = "0.2.11a2"
14+
__version__ = "0.2.11"
1515
__author__ = "Lightning-AI et al."
1616
__author_email__ = "community@lightning.ai"
1717
__license__ = "Apache-2.0"

src/litserve/api.py

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,16 +45,52 @@ def __init__(
4545
spec: Optional[LitSpec] = None,
4646
enable_async: bool = False,
4747
):
48-
"""Initialize a LitAPI instance.
48+
"""Initialize a LitAPI instance that defines the model's inference behavior.
4949
5050
Args:
51-
max_batch_size: Maximum number of requests to process in a batch.
52-
batch_timeout: Maximum time to wait for a batch to fill before processing.
53-
api_path: URL path for the prediction endpoint.
54-
stream: Whether to enable streaming responses.
55-
loop: Inference loop to use, or 'auto' to select based on settings.
56-
spec: Specification for the API, such as OpenAISpec or custom specs.
57-
enable_async: Enable async support.
51+
max_batch_size (int, optional):
52+
Maximum requests to batch together for inference. Higher values improve throughput
53+
for models that benefit from batching but use more memory. Defaults to 1.
54+
55+
batch_timeout (float, optional):
56+
Maximum seconds to wait for a batch to fill before processing incomplete batches.
57+
Lower values reduce latency, higher values improve batching efficiency. Defaults to 0.0.
58+
59+
api_path (str, optional):
60+
URL endpoint path for predictions (e.g., "/predict", "/v1/chat"). Defaults to "/predict".
61+
62+
stream (bool, optional):
63+
Enable streaming responses for real-time output (useful for LLMs, long-running tasks).
64+
Requires implementing encode_response() for streaming. Defaults to False.
65+
66+
loop (Union[str, LitLoop], optional):
67+
Inference loop strategy. "auto" selects optimal loop based on batching/streaming settings,
68+
or provide custom LitLoop instance for advanced control. Defaults to "auto".
69+
70+
spec (LitSpec, optional):
71+
API specification defining input/output schemas and behavior. Use OpenAISpec for
72+
OpenAI-compatible APIs or custom LitSpec implementations. Defaults to None.
73+
74+
enable_async (bool, optional):
75+
Enable async/await support for non-blocking operations in predict() method.
76+
Useful for I/O-bound inference or external API calls. Defaults to False.
77+
78+
Example:
79+
>>> # Simple API
80+
>>> api = LitAPI()
81+
82+
>>> # Batched inference
83+
>>> api = LitAPI(max_batch_size=8, batch_timeout=0.1)
84+
85+
>>> # OpenAI-compatible API
86+
>>> api = LitAPI(spec=OpenAISpec())
87+
88+
>>> # Async processing
89+
>>> api = LitAPI(enable_async=True)
90+
91+
Note:
92+
You must implement setup(), predict(), and optionally decode_request()/encode_response()
93+
methods to define your model's behavior.
5894
5995
"""
6096

src/litserve/python_client.py

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,6 @@
1-
client_template = """# Copyright The Lightning AI team.
2-
#
3-
# Licensed under the Apache License, Version 2.0 (the "License");
4-
# you may not use this file except in compliance with the License.
5-
# You may obtain a copy of the License at
6-
#
7-
# http://www.apache.org/licenses/LICENSE-2.0
8-
#
9-
# Unless required by applicable law or agreed to in writing, software
10-
# distributed under the License is distributed on an "AS IS" BASIS,
11-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12-
# See the License for the specific language governing permissions and
13-
# limitations under the License.
1+
client_template = """# This file is auto-generated by LitServe.
2+
# Disable auto-generation by setting `generate_client_file=False` in `LitServer.run()`.
3+
144
import requests
155
166
response = requests.post("http://127.0.0.1:{PORT}/predict", json={{"input": 4.0}})

src/litserve/server.py

Lines changed: 108 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from multiprocessing.context import Process
3131
from queue import Queue
3232
from threading import Thread
33-
from typing import Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union
33+
from typing import Callable, Dict, Iterable, List, Literal, Optional, Sequence, Tuple, Union
3434

3535
import uvicorn
3636
import uvicorn.server
@@ -50,7 +50,7 @@
5050
from litserve.specs.base import LitSpec
5151
from litserve.transport.base import MessageTransport
5252
from litserve.transport.factory import TransportConfig, create_transport_from_config
53-
from litserve.utils import LitAPIStatus, LoopResponseType, WorkerSetupStatus, call_after_stream
53+
from litserve.utils import LitAPIStatus, LoopResponseType, WorkerSetupStatus, call_after_stream, configure_logging
5454

5555
mp.allow_connection_pickling()
5656

@@ -371,29 +371,66 @@ def __init__(
371371
api_path: Optional[str] = None,
372372
loop: Optional[Union[str, LitLoop]] = None,
373373
):
374-
"""Initialize a LitServer instance.
374+
"""Initialize a LitServer instance for high-performance model inference.
375375
376376
Args:
377-
lit_api: The API instance that handles requests and responses.
378-
accelerator: Type of hardware to use, like 'cpu', 'cuda', or 'mps'. 'auto' selects the best available.
379-
devices: Number of devices to use, or 'auto' to select automatically.
380-
workers_per_device: Number of worker processes per device.
381-
max_batch_size: Deprecated. Use `lit_api.max_batch_size` instead.
382-
batch_timeout: Deprecated. Use `lit_api.batch_timeout` instead.
383-
timeout: Maximum time to wait for a request to complete. Set to False for no timeout.
384-
api_path: Deprecated. Use `LitAPI(api_path=...)` instead.
385-
healthcheck_path: URL path for the health check endpoint.
386-
info_path: URL path for the server and model information endpoint.
387-
model_metadata: Metadata about the model, shown at the info endpoint.
388-
stream: Whether to enable streaming responses.
389-
spec: Specification for the API, such as OpenAISpec or custom specs.
390-
max_payload_size: Maximum size of request payloads.
391-
track_requests: Whether to track the number of active requests.
392-
loop: Inference loop to use, or 'auto' to select based on settings.
393-
callbacks: List of callback classes to execute at various stages.
394-
middlewares: List of middleware classes to apply to the server.
395-
loggers: List of loggers to use for recording server activity.
396-
fast_queue: Whether to use ZeroMQ for faster response handling.
377+
lit_api (Union[LitAPI, List[LitAPI]]):
378+
API instance(s) defining model inference logic. Single instance or list for multi-model serving.
379+
380+
accelerator (str, optional):
381+
Hardware type: 'cpu', 'cuda', 'mps', or 'auto' (detects best available). Defaults to 'auto'.
382+
383+
devices (Union[int, str], optional):
384+
Number of devices to use, or 'auto' for all available. Defaults to 'auto'.
385+
386+
workers_per_device (int, optional):
387+
Worker processes per device. Higher values improve throughput but use more memory. Defaults to 1.
388+
389+
timeout (Union[float, bool], optional):
390+
Request timeout in seconds, or False to disable. Defaults to 30.
391+
392+
healthcheck_path (str, optional):
393+
Health check endpoint path for load balancers. Defaults to "/health".
394+
395+
info_path (str, optional):
396+
Server info endpoint path showing metadata and configuration. Defaults to "/info".
397+
398+
model_metadata (dict, optional):
399+
Model metadata displayed at info endpoint (e.g., {"version": "1.0"}). Defaults to None.
400+
401+
max_payload_size (Union[int, str], optional):
402+
Maximum request size as bytes or string ("10MB"). Defaults to "100MB".
403+
404+
track_requests (bool, optional):
405+
Enable request tracking for monitoring. Recommended for production. Defaults to False.
406+
407+
callbacks (List[Callback], optional):
408+
Callback instances for lifecycle events (logging, metrics). Defaults to None.
409+
410+
middlewares (List[Middleware], optional):
411+
HTTP middleware for auth, CORS, rate limiting, etc. Defaults to None.
412+
413+
loggers (List[Logger], optional):
414+
Custom loggers for server activity. Defaults to standard logging.
415+
416+
fast_queue (bool, optional):
417+
Enable ZeroMQ for high-throughput (>100 RPS). Requires ZeroMQ installation. Defaults to False.
418+
419+
max_batch_size, batch_timeout, stream, spec, api_path, loop:
420+
**Deprecated**: Configure these in your LitAPI implementation instead.
421+
422+
Example:
423+
>>> # Basic
424+
>>> server = LitServer(MyLitAPI())
425+
426+
>>> # Production
427+
>>> server = LitServer(
428+
... lit_api=MyLitAPI(max_batch_size=4),
429+
... accelerator="cuda",
430+
... devices=2,
431+
... fast_queue=True,
432+
... track_requests=True
433+
... )
397434
398435
"""
399436
if max_batch_size is not None:
@@ -754,9 +791,55 @@ def run(
754791
num_api_servers: Optional[int] = None,
755792
log_level: str = "info",
756793
generate_client_file: bool = True,
757-
api_server_worker_type: Optional[str] = None,
794+
api_server_worker_type: Literal["process", "thread"] = "process",
795+
pretty_logs: bool = False,
758796
**kwargs,
759797
):
798+
"""Run the LitServe server to handle API requests and distribute them to inference workers.
799+
800+
Args:
801+
host (str, optional):
802+
Host address to bind to. "0.0.0.0" for all IPs, "127.0.0.1" for localhost only. Defaults to "0.0.0.0".
803+
804+
port (Union[str, int], optional):
805+
Port number to bind to. Must be available. Defaults to 8000.
806+
807+
num_api_servers (Optional[int], optional):
808+
Number of uvicorn server instances for parallel API handling. Higher values improve
809+
throughput but use more resources. Defaults to None (single instance).
810+
811+
log_level (str, optional):
812+
Logging level: "critical", "error", "warning", "info", "debug", "trace".
813+
Use "debug" for development. Defaults to "info".
814+
815+
generate_client_file (bool, optional):
816+
Auto-generate Python client file with typed methods for API interaction. Defaults to True.
817+
818+
api_server_worker_type (Literal["process", "thread"], optional):
819+
Worker type. "process" for better isolation/CPU usage, "thread" for less memory. Defaults to "process".
820+
821+
pretty_logs (bool, optional):
822+
Enhanced log formatting with colors using rich library. Good for development. Defaults to False.
823+
824+
**kwargs:
825+
Additional uvicorn server options (ssl_keyfile, ssl_certfile, etc.). See uvicorn docs.
826+
827+
Example:
828+
>>> server.run() # Basic
829+
830+
>>> server.run( # Production
831+
... port=8080,
832+
... num_api_servers=4,
833+
... log_level="warning"
834+
... )
835+
836+
>>> server.run( # Development
837+
... log_level="debug",
838+
... pretty_logs=True,
839+
... generate_client_file=True
840+
... )
841+
842+
"""
760843
if generate_client_file:
761844
LitServer.generate_client_file(port=port)
762845

@@ -773,6 +856,7 @@ def run(
773856
if host not in ["0.0.0.0", "127.0.0.1", "::"]:
774857
raise ValueError(host_msg)
775858

859+
configure_logging(log_level, use_rich=pretty_logs)
776860
config = uvicorn.Config(app=self.app, host=host, port=port, log_level=log_level, **kwargs)
777861
sockets = [config.bind_socket()]
778862

src/litserve/utils.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import uuid
2222
from contextlib import contextmanager
2323
from enum import Enum
24-
from typing import TYPE_CHECKING, Any, AsyncIterator
24+
from typing import TYPE_CHECKING, Any, AsyncIterator, TextIO, Union
2525

2626
from fastapi import HTTPException
2727

@@ -118,17 +118,29 @@ def _get_default_handler(stream, format):
118118

119119

120120
def configure_logging(
121-
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", stream=sys.stdout, use_rich=False
121+
level: Union[str, int] = logging.INFO,
122+
format: str = "%(asctime)s - %(processName)s[%(process)d] - %(name)s - %(levelname)s - %(message)s",
123+
stream: TextIO = sys.stdout,
124+
use_rich: bool = False,
122125
):
123126
"""Configure logging for the entire library with sensible defaults.
124127
125128
Args:
126129
level (int): Logging level (default: logging.INFO)
127130
format (str): Log message format string
128131
stream (file-like): Output stream for logs
129-
use_rich (bool): Whether to use rich for logging
132+
use_rich (bool): Makes the logs more readable by using rich, useful for debugging. Defaults to False.
130133
131134
"""
135+
if isinstance(level, str):
136+
level = level.upper()
137+
level = getattr(logging, level)
138+
139+
# Clear any existing handlers to prevent duplicates
140+
library_logger = logging.getLogger("litserve")
141+
for handler in library_logger.handlers[:]:
142+
library_logger.removeHandler(handler)
143+
132144
if use_rich:
133145
try:
134146
from rich.logging import RichHandler
@@ -139,16 +151,12 @@ def configure_logging(
139151
except ImportError:
140152
logger.warning("Rich is not installed, using default logging")
141153
handler = _get_default_handler(stream, format)
142-
143154
else:
144155
handler = _get_default_handler(stream, format)
145156

146-
# Configure root library logger
147-
library_logger = logging.getLogger("litserve")
157+
# Configure library logger
148158
library_logger.setLevel(level)
149159
library_logger.addHandler(handler)
150-
151-
# Prevent propagation to root logger to avoid duplicate logs
152160
library_logger.propagate = False
153161

154162

0 commit comments

Comments
 (0)