Skip to content

Commit 2d7b835

Browse files
release: 2.0.0-alpha.17 (#217)
* feat: Improve usage of models list cli command (#216) * feat: Improve usage of models list cli command * bring back context length and improve pricing field * cleanup * feat(cli): add b200 and h200 GPU options for endpoint creation (#218) * chore: Deprecate CLI usage for endpoints create flag --no-promopt-cache (#219) * chore: Mark disable_prompt_cache as deprecated for endpoint creation * release: 2.0.0-alpha.17 --------- Co-authored-by: Blaine Kasten <blainekasten@gmail.com> Co-authored-by: stainless-app[bot] <142633134+stainless-app[bot]@users.noreply.github.com>
1 parent 345b121 commit 2d7b835

File tree

9 files changed

+48
-27
lines changed

9 files changed

+48
-27
lines changed

.release-please-manifest.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
{
2-
".": "2.0.0-alpha.16"
2+
".": "2.0.0-alpha.17"
33
}

.stats.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
configured_endpoints: 55
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-585412004e70865cc6e32fdda4177eabcffd0f165e485da320bad9514960ebe3.yml
3-
openapi_spec_hash: 70b0de2b3a0eaa3dca00722ba76d5e54
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-cea384db9edf6057ebc5c198a155955b97771430e7afe3be910842c734bb9812.yml
3+
openapi_spec_hash: 9c2c575baec9b59add4b2e91c14089ad
44
config_hash: a955366d5f659d70d9e6b26116e119bf

CHANGELOG.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,20 @@
11
# Changelog
22

3+
## 2.0.0-alpha.17 (2026-01-21)
4+
5+
Full Changelog: [v2.0.0-alpha.16...v2.0.0-alpha.17](https://github.com/togethercomputer/together-py/compare/v2.0.0-alpha.16...v2.0.0-alpha.17)
6+
7+
### Features
8+
9+
* **cli:** add b200 and h200 GPU options for endpoint creation ([#218](https://github.com/togethercomputer/together-py/issues/218)) ([b514912](https://github.com/togethercomputer/together-py/commit/b514912a281922fefbf8a9f62b936ed1de243718))
10+
* Improve usage of models list cli command ([#216](https://github.com/togethercomputer/together-py/issues/216)) ([430e6c1](https://github.com/togethercomputer/together-py/commit/430e6c1e030749be474f020b677d91014ba4482c))
11+
12+
13+
### Chores
14+
15+
* Deprecate CLI usage for endpoints create flag --no-promopt-cache ([#219](https://github.com/togethercomputer/together-py/issues/219)) ([55e9700](https://github.com/togethercomputer/together-py/commit/55e9700187b42f8baff6f567a3a657b46577ed88))
16+
* Mark disable_prompt_cache as deprecated for endpoint creation ([6a629b2](https://github.com/togethercomputer/together-py/commit/6a629b29e53b4374503d30ca75456184ef313b67))
17+
318
## 2.0.0-alpha.16 (2026-01-18)
419

520
Full Changelog: [v2.0.0-alpha.15...v2.0.0-alpha.16](https://github.com/togethercomputer/together-py/compare/v2.0.0-alpha.15...v2.0.0-alpha.16)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "together"
3-
version = "2.0.0-alpha.16"
3+
version = "2.0.0-alpha.17"
44
description = "The official Python library for the together API"
55
dynamic = ["readme"]
66
license = "Apache-2.0"

src/together/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
22

33
__title__ = "together"
4-
__version__ = "2.0.0-alpha.16" # x-release-please-version
4+
__version__ = "2.0.0-alpha.17" # x-release-please-version

src/together/lib/cli/api/endpoints/create.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
)
3232
@click.option(
3333
"--gpu",
34-
type=click.Choice(["h100", "a100", "l40", "l40s", "rtx-6000"]),
34+
type=click.Choice(["b200", "h200", "h100", "a100", "l40", "l40s", "rtx-6000"]),
3535
required=True,
3636
help="GPU type to use for inference",
3737
)
@@ -48,7 +48,7 @@
4848
@click.option(
4949
"--no-prompt-cache",
5050
is_flag=True,
51-
help="Disable the prompt cache for this endpoint",
51+
help="Deprecated and no longer has any effect.",
5252
)
5353
@click.option(
5454
"--no-speculative-decoding",
@@ -95,13 +95,18 @@ def create(
9595
client: Together = ctx.obj
9696
# Map GPU types to their full hardware ID names
9797
gpu_map = {
98+
"b200": "nvidia_b200_180gb_sxm",
99+
"h200": "nvidia_h200_140gb_sxm",
98100
"h100": "nvidia_h100_80gb_sxm",
99101
"a100": "nvidia_a100_80gb_pcie" if gpu_count == 1 else "nvidia_a100_80gb_sxm",
100102
"l40": "nvidia_l40",
101103
"l40s": "nvidia_l40s",
102104
"rtx-6000": "nvidia_rtx_6000_ada",
103105
}
104106

107+
if no_prompt_cache is not None:
108+
click.echo("Warning: --no-prompt-cache is deprecated and no longer has any effect.", err=True)
109+
105110
hardware_id = f"{gpu_count}x_{gpu_map[gpu]}"
106111

107112
try:
@@ -113,7 +118,6 @@ def create(
113118
"max_replicas": max_replicas,
114119
},
115120
display_name=display_name or omit,
116-
disable_prompt_cache=no_prompt_cache or omit,
117121
disable_speculative_decoding=no_speculative_decoding or omit,
118122
state="STOPPED" if no_auto_start else "STARTED",
119123
inactive_timeout=inactive_timeout,
@@ -134,8 +138,6 @@ def create(
134138
click.echo(f" Hardware: {hardware_id}", err=True)
135139
if display_name:
136140
click.echo(f" Display name: {display_name}", err=True)
137-
if no_prompt_cache:
138-
click.echo(" Prompt cache: disabled", err=True)
139141
if no_speculative_decoding:
140142
click.echo(" Speculative decoding: disabled", err=True)
141143
if no_auto_start:

src/together/lib/cli/api/models/list.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
from tabulate import tabulate
66

77
from together import Together, omit
8-
from together._models import BaseModel
98
from together._response import APIResponse as APIResponse
109
from together.lib.cli.api._utils import handle_api_errors
10+
from together.lib.utils.serializer import datetime_serializer
1111

1212

1313
@click.command()
@@ -29,23 +29,27 @@ def list(ctx: click.Context, type: Optional[str], json: bool) -> None:
2929

3030
models_list = client.models.list(dedicated=type == "dedicated" if type else omit)
3131

32+
if json:
33+
items = [model.model_dump() for model in models_list]
34+
click.echo(json_lib.dumps(items, indent=2, default=datetime_serializer))
35+
return
36+
3237
display_list: List[Dict[str, Any]] = []
33-
model: BaseModel
34-
for model in models_list:
38+
for model in sorted(models_list, key=lambda x: x.type):
39+
price_parts: List[str] = []
40+
41+
# Only show pricing if a value actually exists
42+
if model.pricing and model.pricing.input > 0 and model.pricing.output > 0:
43+
price_parts.append(f"${model.pricing.input:.2f}")
44+
price_parts.append(f"${model.pricing.output:.2f}")
45+
3546
display_list.append(
3647
{
37-
"ID": model.id,
38-
"Name": model.display_name,
39-
"Organization": model.organization,
48+
"Model": model.id,
4049
"Type": model.type,
41-
"Context Length": model.context_length,
42-
"License": model.license,
43-
"Input per 1M token": model.pricing.input if model.pricing else None,
44-
"Output per 1M token": model.pricing.output if model.pricing else None,
50+
"Context length": model.context_length if model.context_length else None,
51+
"Price per 1M Tokens (input/output)": "/".join(price_parts),
4552
}
4653
)
4754

48-
if json:
49-
click.echo(json_lib.dumps(display_list, indent=2))
50-
else:
51-
click.echo(tabulate(display_list, headers="keys", tablefmt="plain"))
55+
click.echo(tabulate(display_list, headers="keys"))

src/together/resources/endpoints.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def create(
8181
8282
availability_zone: Create the endpoint in a specified availability zone (e.g., us-central-4b)
8383
84-
disable_prompt_cache: Whether to disable the prompt cache for this endpoint
84+
disable_prompt_cache: This parameter is deprecated and no longer has any effect.
8585
8686
disable_speculative_decoding: Whether to disable speculative decoding for this endpoint
8787
@@ -375,7 +375,7 @@ async def create(
375375
376376
availability_zone: Create the endpoint in a specified availability zone (e.g., us-central-4b)
377377
378-
disable_prompt_cache: Whether to disable the prompt cache for this endpoint
378+
disable_prompt_cache: This parameter is deprecated and no longer has any effect.
379379
380380
disable_speculative_decoding: Whether to disable speculative decoding for this endpoint
381381

src/together/types/endpoint_create_params.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class EndpointCreateParams(TypedDict, total=False):
2424
"""Create the endpoint in a specified availability zone (e.g., us-central-4b)"""
2525

2626
disable_prompt_cache: bool
27-
"""Whether to disable the prompt cache for this endpoint"""
27+
"""This parameter is deprecated and no longer has any effect."""
2828

2929
disable_speculative_decoding: bool
3030
"""Whether to disable speculative decoding for this endpoint"""

0 commit comments

Comments
 (0)