1- """Command line interface for Vector Inference."""
1+ """Command line interface for Vector Inference.
2+
3+ This module provides the command-line interface for interacting with Vector
4+ Inference services, including model launching, status checking, metrics
5+ monitoring, and shutdown operations.
6+
7+ Commands
8+ --------
9+ launch
10+ Launch a model on the cluster
11+ status
12+ Check the status of a running model
13+ shutdown
14+ Stop a running model
15+ list
16+ List available models or get specific model configuration
17+ metrics
18+ Stream real-time performance metrics
19+ """
220
321import time
422from typing import Optional , Union
@@ -83,7 +101,44 @@ def launch(
83101 model_name : str ,
84102 ** cli_kwargs : Optional [Union [str , int , float , bool ]],
85103) -> None :
86- """Launch a model on the cluster."""
104+ """Launch a model on the cluster.
105+
106+ Parameters
107+ ----------
108+ model_name : str
109+ Name of the model to launch
110+ **cli_kwargs : dict
111+ Additional launch options including:
112+ - model_family : str, optional
113+ Family/architecture of the model
114+ - model_variant : str, optional
115+ Specific variant of the model
116+ - partition : str, optional
117+ Type of compute partition
118+ - num_nodes : int, optional
119+ Number of nodes to use
120+ - gpus_per_node : int, optional
121+ Number of GPUs per node
122+ - qos : str, optional
123+ Quality of service tier
124+ - time : str, optional
125+ Time limit for job
126+ - venv : str, optional
127+ Path to virtual environment
128+ - log_dir : str, optional
129+ Path to SLURM log directory
130+ - model_weights_parent_dir : str, optional
131+ Path to model weights directory
132+ - vllm_args : str, optional
133+ vLLM engine arguments
134+ - json_mode : bool, optional
135+ Output in JSON format
136+
137+ Raises
138+ ------
139+ click.ClickException
140+ If launch fails for any reason
141+ """
87142 try :
88143 # Convert cli_kwargs to LaunchOptions
89144 kwargs = {k : v for k , v in cli_kwargs .items () if k != "json_mode" }
@@ -124,7 +179,22 @@ def launch(
124179def status (
125180 slurm_job_id : int , log_dir : Optional [str ] = None , json_mode : bool = False
126181) -> None :
127- """Get the status of a running model on the cluster."""
182+ """Get the status of a running model on the cluster.
183+
184+ Parameters
185+ ----------
186+ slurm_job_id : int
187+ ID of the SLURM job to check
188+ log_dir : str, optional
189+ Path to SLURM log directory
190+ json_mode : bool, default=False
191+ Whether to output in JSON format
192+
193+ Raises
194+ ------
195+ click.ClickException
196+ If status check fails
197+ """
128198 try :
129199 # Start the client and get model inference server status
130200 client = VecInfClient ()
@@ -146,7 +216,18 @@ def status(
146216@cli .command ("shutdown" )
147217@click .argument ("slurm_job_id" , type = int , nargs = 1 )
148218def shutdown (slurm_job_id : int ) -> None :
149- """Shutdown a running model on the cluster."""
219+ """Shutdown a running model on the cluster.
220+
221+ Parameters
222+ ----------
223+ slurm_job_id : int
224+ ID of the SLURM job to shut down
225+
226+ Raises
227+ ------
228+ click.ClickException
229+ If shutdown operation fails
230+ """
150231 try :
151232 client = VecInfClient ()
152233 client .shutdown_model (slurm_job_id )
@@ -163,7 +244,20 @@ def shutdown(slurm_job_id: int) -> None:
163244 help = "Output in JSON string" ,
164245)
165246def list_models (model_name : Optional [str ] = None , json_mode : bool = False ) -> None :
166- """List all available models, or get default setup of a specific model."""
247+ """List all available models, or get default setup of a specific model.
248+
249+ Parameters
250+ ----------
251+ model_name : str, optional
252+ Name of specific model to get information for
253+ json_mode : bool, default=False
254+ Whether to output in JSON format
255+
256+ Raises
257+ ------
258+ click.ClickException
259+ If list operation fails
260+ """
167261 try :
168262 # Start the client
169263 client = VecInfClient ()
@@ -186,7 +280,26 @@ def list_models(model_name: Optional[str] = None, json_mode: bool = False) -> No
186280 "--log-dir" , type = str , help = "Path to slurm log directory (if used during launch)"
187281)
188282def metrics (slurm_job_id : int , log_dir : Optional [str ] = None ) -> None :
189- """Stream real-time performance metrics from the model endpoint."""
283+ """Stream real-time performance metrics from the model endpoint.
284+
285+ Parameters
286+ ----------
287+ slurm_job_id : int
288+ ID of the SLURM job to monitor
289+ log_dir : str, optional
290+ Path to SLURM log directory
291+
292+ Raises
293+ ------
294+ click.ClickException
295+ If metrics collection fails
296+
297+ Notes
298+ -----
299+ This command continuously streams metrics with a 2-second refresh interval
300+ until interrupted. If metrics are not available, it will display status
301+ information instead.
302+ """
190303 try :
191304 # Start the client and get inference server metrics
192305 client = VecInfClient ()
0 commit comments