Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/together/cli/api/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,13 @@ def endpoints(ctx: click.Context) -> None:
@click.option(
"--no-prompt-cache",
is_flag=True,
default=True, #temporary default value before configs fixed
help="Disable the prompt cache for this endpoint",
)
@click.option(
"--no-speculative-decoding",
is_flag=True,
default=True, #temporary default value before configs fixed
help="Disable speculative decoding for this endpoint",
)
@click.option(
Expand Down
8 changes: 4 additions & 4 deletions src/together/resources/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ def create(
min_replicas: int,
max_replicas: int,
display_name: Optional[str] = None,
disable_prompt_cache: bool = False,
disable_speculative_decoding: bool = False,
disable_prompt_cache: bool = True,
disable_speculative_decoding: bool = True,
state: Literal["STARTED", "STOPPED"] = "STARTED",
inactive_timeout: Optional[int] = None,
) -> DedicatedEndpoint:
Expand Down Expand Up @@ -304,8 +304,8 @@ async def create(
min_replicas: int,
max_replicas: int,
display_name: Optional[str] = None,
disable_prompt_cache: bool = False,
disable_speculative_decoding: bool = False,
disable_prompt_cache: bool = True,
disable_speculative_decoding: bool = True,
state: Literal["STARTED", "STOPPED"] = "STARTED",
inactive_timeout: Optional[int] = None,
) -> DedicatedEndpoint:
Expand Down
Loading