3030from multiprocessing .context import Process
3131from queue import Queue
3232from threading import Thread
33- from typing import Callable , Dict , Iterable , List , Optional , Sequence , Tuple , Union
33+ from typing import Callable , Dict , Iterable , List , Literal , Optional , Sequence , Tuple , Union
3434
3535import uvicorn
3636import uvicorn .server
5050from litserve .specs .base import LitSpec
5151from litserve .transport .base import MessageTransport
5252from litserve .transport .factory import TransportConfig , create_transport_from_config
53- from litserve .utils import LitAPIStatus , LoopResponseType , WorkerSetupStatus , call_after_stream
53+ from litserve .utils import LitAPIStatus , LoopResponseType , WorkerSetupStatus , call_after_stream , configure_logging
5454
5555mp .allow_connection_pickling ()
5656
@@ -371,29 +371,66 @@ def __init__(
371371 api_path : Optional [str ] = None ,
372372 loop : Optional [Union [str , LitLoop ]] = None ,
373373 ):
374- """Initialize a LitServer instance.
374+ """Initialize a LitServer instance for high-performance model inference .
375375
376376 Args:
377- lit_api: The API instance that handles requests and responses.
378- accelerator: Type of hardware to use, like 'cpu', 'cuda', or 'mps'. 'auto' selects the best available.
379- devices: Number of devices to use, or 'auto' to select automatically.
380- workers_per_device: Number of worker processes per device.
381- max_batch_size: Deprecated. Use `lit_api.max_batch_size` instead.
382- batch_timeout: Deprecated. Use `lit_api.batch_timeout` instead.
383- timeout: Maximum time to wait for a request to complete. Set to False for no timeout.
384- api_path: Deprecated. Use `LitAPI(api_path=...)` instead.
385- healthcheck_path: URL path for the health check endpoint.
386- info_path: URL path for the server and model information endpoint.
387- model_metadata: Metadata about the model, shown at the info endpoint.
388- stream: Whether to enable streaming responses.
389- spec: Specification for the API, such as OpenAISpec or custom specs.
390- max_payload_size: Maximum size of request payloads.
391- track_requests: Whether to track the number of active requests.
392- loop: Inference loop to use, or 'auto' to select based on settings.
393- callbacks: List of callback classes to execute at various stages.
394- middlewares: List of middleware classes to apply to the server.
395- loggers: List of loggers to use for recording server activity.
396- fast_queue: Whether to use ZeroMQ for faster response handling.
377+ lit_api (Union[LitAPI, List[LitAPI]]):
378+ API instance(s) defining model inference logic. Single instance or list for multi-model serving.
379+
380+ accelerator (str, optional):
381+ Hardware type: 'cpu', 'cuda', 'mps', or 'auto' (detects best available). Defaults to 'auto'.
382+
383+ devices (Union[int, str], optional):
384+ Number of devices to use, or 'auto' for all available. Defaults to 'auto'.
385+
386+ workers_per_device (int, optional):
387+ Worker processes per device. Higher values improve throughput but use more memory. Defaults to 1.
388+
389+ timeout (Union[float, bool], optional):
390+ Request timeout in seconds, or False to disable. Defaults to 30.
391+
392+ healthcheck_path (str, optional):
393+ Health check endpoint path for load balancers. Defaults to "/health".
394+
395+ info_path (str, optional):
396+ Server info endpoint path showing metadata and configuration. Defaults to "/info".
397+
398+ model_metadata (dict, optional):
399+ Model metadata displayed at info endpoint (e.g., {"version": "1.0"}). Defaults to None.
400+
401+ max_payload_size (Union[int, str], optional):
402+ Maximum request size as bytes or string ("10MB"). Defaults to "100MB".
403+
404+ track_requests (bool, optional):
405+ Enable request tracking for monitoring. Recommended for production. Defaults to False.
406+
407+ callbacks (List[Callback], optional):
408+ Callback instances for lifecycle events (logging, metrics). Defaults to None.
409+
410+ middlewares (List[Middleware], optional):
411+ HTTP middleware for auth, CORS, rate limiting, etc. Defaults to None.
412+
413+ loggers (List[Logger], optional):
414+ Custom loggers for server activity. Defaults to standard logging.
415+
416+ fast_queue (bool, optional):
417+ Enable ZeroMQ for high-throughput (>100 RPS). Requires ZeroMQ installation. Defaults to False.
418+
419+ max_batch_size, batch_timeout, stream, spec, api_path, loop:
420+ **Deprecated**: Configure these in your LitAPI implementation instead.
421+
422+ Example:
423+ >>> # Basic
424+ >>> server = LitServer(MyLitAPI())
425+
426+ >>> # Production
427+ >>> server = LitServer(
428+ ... lit_api=MyLitAPI(max_batch_size=4),
429+ ... accelerator="cuda",
430+ ... devices=2,
431+ ... fast_queue=True,
432+ ... track_requests=True
433+ ... )
397434
398435 """
399436 if max_batch_size is not None :
@@ -754,9 +791,55 @@ def run(
754791 num_api_servers : Optional [int ] = None ,
755792 log_level : str = "info" ,
756793 generate_client_file : bool = True ,
757- api_server_worker_type : Optional [str ] = None ,
794+ api_server_worker_type : Literal ["process" , "thread" ] = "process" ,
795+ pretty_logs : bool = False ,
758796 ** kwargs ,
759797 ):
798+ """Run the LitServe server to handle API requests and distribute them to inference workers.
799+
800+ Args:
801+ host (str, optional):
802+ Host address to bind to. "0.0.0.0" for all IPs, "127.0.0.1" for localhost only. Defaults to "0.0.0.0".
803+
804+ port (Union[str, int], optional):
805+ Port number to bind to. Must be available. Defaults to 8000.
806+
807+ num_api_servers (Optional[int], optional):
808+ Number of uvicorn server instances for parallel API handling. Higher values improve
809+ throughput but use more resources. Defaults to None (single instance).
810+
811+ log_level (str, optional):
812+ Logging level: "critical", "error", "warning", "info", "debug", "trace".
813+ Use "debug" for development. Defaults to "info".
814+
815+ generate_client_file (bool, optional):
816+ Auto-generate Python client file with typed methods for API interaction. Defaults to True.
817+
818+ api_server_worker_type (Literal["process", "thread"], optional):
819+ Worker type. "process" for better isolation/CPU usage, "thread" for less memory. Defaults to "process".
820+
821+ pretty_logs (bool, optional):
822+ Enhanced log formatting with colors using rich library. Good for development. Defaults to False.
823+
824+ **kwargs:
825+ Additional uvicorn server options (ssl_keyfile, ssl_certfile, etc.). See uvicorn docs.
826+
827+ Example:
828+ >>> server.run() # Basic
829+
830+ >>> server.run( # Production
831+ ... port=8080,
832+ ... num_api_servers=4,
833+ ... log_level="warning"
834+ ... )
835+
836+ >>> server.run( # Development
837+ ... log_level="debug",
838+ ... pretty_logs=True,
839+ ... generate_client_file=True
840+ ... )
841+
842+ """
760843 if generate_client_file :
761844 LitServer .generate_client_file (port = port )
762845
@@ -773,6 +856,7 @@ def run(
773856 if host not in ["0.0.0.0" , "127.0.0.1" , "::" ]:
774857 raise ValueError (host_msg )
775858
859+ configure_logging (log_level , use_rich = pretty_logs )
776860 config = uvicorn .Config (app = self .app , host = host , port = port , log_level = log_level , ** kwargs )
777861 sockets = [config .bind_socket ()]
778862
0 commit comments