Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions vec_inf/cli/_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,14 @@ def cli() -> None:
help="GPU memory utilization, default to 0.9",
)
@click.option(
"--enable-prefix-caching",
is_flag=True,
help="Enables automatic prefix caching",
"--enable-prefix-caching/--disable-prefix-caching",
default=None,
help="Enable or disable automatic prefix caching",
)
@click.option(
"--enable-chunked-prefill",
is_flag=True,
help="Enable chunked prefill, enabled by default if max number of sequences > 32k",
"--enable-chunked-prefill/--disable-chunked-prefill",
default=None,
help="Enable or disable chunked prefill, enabled by default if max number of sequences > 32k",
)
@click.option(
"--max-num-batched-tokens",
Expand Down Expand Up @@ -101,24 +101,24 @@ def cli() -> None:
help="Path to parent directory containing model weights",
)
@click.option(
"--pipeline-parallelism",
is_flag=True,
help="Enable pipeline parallelism, enabled by default for supported models",
"--pipeline-parallelism/--no-pipeline-parallelism",
default=None,
help="Enable or disable pipeline parallelism, enabled by default for supported models",
)
@click.option(
"--compilation-config",
type=click.Choice(["0", "3"]),
help="torch.compile optimization level, accepts '0' or '3', default to '0', which means no optimization is applied",
)
@click.option(
"--enforce-eager",
is_flag=True,
help="Always use eager-mode PyTorch",
"--enforce-eager/--no-enforce-eager",
default=None,
help="If enabled, always use eager-mode PyTorch",
)
@click.option(
"--json-mode",
is_flag=True,
help="Output in JSON string",
help="If enabled, output will be in JSON string format instead of a table",
)
def launch(
model_name: str,
Expand Down
2 changes: 1 addition & 1 deletion vec_inf/cli/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class ModelConfig(BaseModel):
default=0.9, gt=0.0, le=1.0, description="GPU memory utilization"
)
pipeline_parallelism: bool = Field(
default=True, description="Enable pipeline parallelism"
default=False, description="Enable pipeline parallelism"
)
enforce_eager: bool = Field(default=False, description="Force eager mode execution")
qos: Union[QOS, str] = Field(default="m2", description="Quality of Service tier")
Expand Down
4 changes: 2 additions & 2 deletions vec_inf/cli/_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ def _get_launch_params(self) -> dict[str, Any]:

# Process boolean fields
for bool_field in BOOLEAN_FIELDS:
if self.cli_kwargs[bool_field]:
params[bool_field] = True
if self.cli_kwargs.get(bool_field) is not None:
params[bool_field] = self.cli_kwargs[bool_field]

# Merge other overrides
for key, value in self.cli_kwargs.items():
Expand Down
Loading