VectorInstitute
diff --git a/‎vec_inf/cli/_cli.py‎
Lines changed: 119 additions & 6 deletions b/‎vec_inf/cli/_cli.py‎
Lines changed: 119 additions & 6 deletions
@@ -1,4 +1,22 @@
-"""Command line interface for Vector Inference."""
+"""Command line interface for Vector Inference.
+
+This module provides the command-line interface for interacting with Vector
+Inference services, including model launching, status checking, metrics
+monitoring, and shutdown operations.
+
+Commands
+--------
+launch
+    Launch a model on the cluster
+status
+    Check the status of a running model
+shutdown
+    Stop a running model
+list
+    List available models or get specific model configuration
+metrics
+    Stream real-time performance metrics
+"""
 
 import time
 from typing import Optional, Union
@@ -83,7 +101,44 @@ def launch(
     model_name: str,
     **cli_kwargs: Optional[Union[str, int, float, bool]],
 ) -> None:
-    """Launch a model on the cluster."""
+    """Launch a model on the cluster.
+
+    Parameters
+    ----------
+    model_name : str
+        Name of the model to launch
+    **cli_kwargs : dict
+        Additional launch options including:
+        - model_family : str, optional
+            Family/architecture of the model
+        - model_variant : str, optional
+            Specific variant of the model
+        - partition : str, optional
+            Type of compute partition
+        - num_nodes : int, optional
+            Number of nodes to use
+        - gpus_per_node : int, optional
+            Number of GPUs per node
+        - qos : str, optional
+            Quality of service tier
+        - time : str, optional
+            Time limit for job
+        - venv : str, optional
+            Path to virtual environment
+        - log_dir : str, optional
+            Path to SLURM log directory
+        - model_weights_parent_dir : str, optional
+            Path to model weights directory
+        - vllm_args : str, optional
+            vLLM engine arguments
+        - json_mode : bool, optional
+            Output in JSON format
+
+    Raises
+    ------
+    click.ClickException
+        If launch fails for any reason
+    """
     try:
         # Convert cli_kwargs to LaunchOptions
         kwargs = {k: v for k, v in cli_kwargs.items() if k != "json_mode"}
@@ -124,7 +179,22 @@ def launch(
 def status(
     slurm_job_id: int, log_dir: Optional[str] = None, json_mode: bool = False
 ) -> None:
-    """Get the status of a running model on the cluster."""
+    """Get the status of a running model on the cluster.
+
+    Parameters
+    ----------
+    slurm_job_id : int
+        ID of the SLURM job to check
+    log_dir : str, optional
+        Path to SLURM log directory
+    json_mode : bool, default=False
+        Whether to output in JSON format
+
+    Raises
+    ------
+    click.ClickException
+        If status check fails
+    """
     try:
         # Start the client and get model inference server status
         client = VecInfClient()
@@ -146,7 +216,18 @@ def status(
 @cli.command("shutdown")
 @click.argument("slurm_job_id", type=int, nargs=1)
 def shutdown(slurm_job_id: int) -> None:
-    """Shutdown a running model on the cluster."""
+    """Shutdown a running model on the cluster.
+
+    Parameters
+    ----------
+    slurm_job_id : int
+        ID of the SLURM job to shut down
+
+    Raises
+    ------
+    click.ClickException
+        If shutdown operation fails
+    """
     try:
         client = VecInfClient()
         client.shutdown_model(slurm_job_id)
@@ -163,7 +244,20 @@ def shutdown(slurm_job_id: int) -> None:
     help="Output in JSON string",
 )
 def list_models(model_name: Optional[str] = None, json_mode: bool = False) -> None:
-    """List all available models, or get default setup of a specific model."""
+    """List all available models, or get default setup of a specific model.
+
+    Parameters
+    ----------
+    model_name : str, optional
+        Name of specific model to get information for
+    json_mode : bool, default=False
+        Whether to output in JSON format
+
+    Raises
+    ------
+    click.ClickException
+        If list operation fails
+    """
     try:
         # Start the client
         client = VecInfClient()
@@ -186,7 +280,26 @@ def list_models(model_name: Optional[str] = None, json_mode: bool = False) -> No
     "--log-dir", type=str, help="Path to slurm log directory (if used during launch)"
 )
 def metrics(slurm_job_id: int, log_dir: Optional[str] = None) -> None:
-    """Stream real-time performance metrics from the model endpoint."""
+    """Stream real-time performance metrics from the model endpoint.
+
+    Parameters
+    ----------
+    slurm_job_id : int
+        ID of the SLURM job to monitor
+    log_dir : str, optional
+        Path to SLURM log directory
+
+    Raises
+    ------
+    click.ClickException
+        If metrics collection fails
+
+    Notes
+    -----
+    This command continuously streams metrics with a 2-second refresh interval
+    until interrupted. If metrics are not available, it will display status
+    information instead.
+    """
     try:
         # Start the client and get inference server metrics
         client = VecInfClient()