Skip to content

Commit a10e18a

Browse files
committed
add hardware call and --wait option
1 parent 33b6c81 commit a10e18a

25 files changed

+340
-1191
lines changed

scripts/openapi.yaml

Lines changed: 22 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -934,75 +934,40 @@ paths:
934934
get:
935935
tags: ["Hardware"]
936936
summary: List available hardware configurations
937-
description: Returns a list of available hardware configurations for deploying models. When a model parameter is provided, it returns only hardware configurations compatible with that model, including their current availability status.
937+
description: >
938+
Returns a list of available hardware configurations for deploying models.
939+
When a model parameter is provided, it returns only hardware configurations compatible
940+
with that model, including their current availability status.
938941
operationId: listHardware
939942
parameters:
940943
- name: model
941944
in: query
942945
required: false
943946
schema:
944947
type: string
945-
description: Filter hardware configurations by model compatibility
948+
description: >
949+
Filter hardware configurations by model compatibility. When provided,
950+
the response includes availability status for each compatible configuration.
946951
example: meta-llama/Llama-3-70b-chat-hf
947952
responses:
948953
"200":
949954
description: "List of available hardware configurations"
950955
content:
951956
application/json:
952957
schema:
953-
oneOf:
954-
- type: object
955-
description: Response when no model filter is provided
956-
required:
957-
- object
958-
- data
959-
properties:
960-
object:
961-
type: string
962-
enum:
963-
- list
964-
data:
965-
type: array
966-
items:
967-
allOf:
968-
- $ref: "#/components/schemas/HardwareWithStatus"
969-
- type: object
970-
properties:
971-
availability:
972-
not: {}
973-
- type: object
974-
description: Response when model filter is provided
975-
required:
976-
- object
977-
- data
978-
properties:
979-
object:
980-
type: string
981-
enum:
982-
- list
983-
data:
984-
type: array
985-
items:
986-
allOf:
987-
- $ref: "#/components/schemas/HardwareWithStatus"
988-
- type: object
989-
required:
990-
- availability
991-
example:
992-
object: "list"
958+
type: object
959+
required:
960+
- object
961+
- data
962+
properties:
963+
object:
964+
type: string
965+
enum:
966+
- list
993967
data:
994-
- object: "hardware"
995-
name: "2x_nvidia_a100_80gb_sxm"
996-
pricing:
997-
input: 0
998-
output: 0
999-
cents_per_minute: 5.42
1000-
specs:
1001-
gpu_type: "a100-80gb"
1002-
gpu_link: "sxm"
1003-
gpu_memory: 80
1004-
gpu_count: 2
1005-
updated_at: "2024-01-01T00:00:00Z"
968+
type: array
969+
items:
970+
$ref: "#/components/schemas/HardwareWithStatus"
1006971
"403":
1007972
description: "Unauthorized"
1008973
content:
@@ -2646,10 +2611,10 @@ components:
26462611

26472612
HardwareWithStatus:
26482613
type: object
2649-
description: Hardware configuration details including current availability status
2614+
description: Hardware configuration details with optional availability status
26502615
required:
26512616
- object
2652-
- name
2617+
- id
26532618
- pricing
26542619
- specs
26552620
- updated_at
@@ -2658,7 +2623,7 @@ components:
26582623
type: string
26592624
enum:
26602625
- hardware
2661-
name:
2626+
id:
26622627
type: string
26632628
description: Unique identifier for the hardware configuration
26642629
examples:

src/together/cli/api/endpoints.py

Lines changed: 151 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import json
34
import sys
45
from functools import wraps
56
from typing import Any, Callable, Dict, List, Literal, TypeVar, Union
@@ -8,7 +9,12 @@
89

910
from together import Together
1011
from together.error import AuthenticationError, InvalidRequestError
11-
from together.generated.exceptions import ForbiddenException, ServiceException
12+
from together.generated.exceptions import (
13+
BadRequestException,
14+
ForbiddenException,
15+
NotFoundException,
16+
ServiceException,
17+
)
1218
from together.types import DedicatedEndpoint, ListEndpoint
1319

1420

@@ -67,27 +73,43 @@ def print_endpoint(
6773
F = TypeVar("F", bound=Callable[..., Any])
6874

6975

76+
def print_api_error(
77+
e: Union[
78+
ForbiddenException, NotFoundException, BadRequestException, ServiceException
79+
],
80+
) -> None:
81+
error_details = ""
82+
if e.data is not None:
83+
error_details = e.data.to_dict()["error"]["message"]
84+
elif e.body:
85+
error_details = json.loads(e.body)["error"]["message"]
86+
else:
87+
error_details = str(e)
88+
89+
if (
90+
"credentials" in error_details.lower()
91+
or "authentication" in error_details.lower()
92+
):
93+
click.echo("Error: Invalid API key or authentication failed", err=True)
94+
else:
95+
click.echo(f"Error: {error_details}", err=True)
96+
97+
7098
def handle_api_errors(f: F) -> F:
7199
"""Decorator to handle common API errors in CLI commands."""
72100

73101
@wraps(f)
74102
def wrapper(*args: Any, **kwargs: Any) -> Any:
75103
try:
76104
return f(*args, **kwargs)
77-
except (ForbiddenException, ServiceException) as e:
78-
error_details = ""
79-
if e.data is not None:
80-
error_details = e.data.to_dict()["error"]["message"]
81-
else:
82-
error_details = str(e)
83-
84-
if (
85-
"credentials" in error_details.lower()
86-
or "authentication" in error_details.lower()
87-
):
88-
click.echo("Error: Invalid API key or authentication failed", err=True)
89-
else:
90-
click.echo(f"Error: {error_details}", err=True)
105+
except (
106+
ForbiddenException,
107+
NotFoundException,
108+
BadRequestException,
109+
ServiceException,
110+
) as e:
111+
print_api_error(e)
112+
91113
sys.exit(1)
92114
except AuthenticationError as e:
93115
click.echo(f"Error details: {str(e)}", err=True)
@@ -160,6 +182,12 @@ def endpoints(ctx: click.Context) -> None:
160182
is_flag=True,
161183
help="Create the endpoint in STOPPED state instead of auto-starting it",
162184
)
185+
@click.option(
186+
"--wait",
187+
is_flag=True,
188+
default=True,
189+
help="Wait for the endpoint to be ready after creation",
190+
)
163191
@click.pass_obj
164192
@handle_api_errors
165193
def create(
@@ -173,6 +201,7 @@ def create(
173201
no_prompt_cache: bool,
174202
no_speculative_decoding: bool,
175203
no_auto_start: bool,
204+
wait: bool,
176205
) -> None:
177206
"""Create a new dedicated inference endpoint."""
178207
# Map GPU types to their full hardware ID names
@@ -186,16 +215,26 @@ def create(
186215

187216
hardware_id = f"{gpu_count}x_{gpu_map[gpu]}"
188217

189-
response = client.endpoints.create(
190-
model=model,
191-
hardware=hardware_id,
192-
min_replicas=min_replicas,
193-
max_replicas=max_replicas,
194-
display_name=display_name,
195-
disable_prompt_cache=no_prompt_cache,
196-
disable_speculative_decoding=no_speculative_decoding,
197-
state="STOPPED" if no_auto_start else "STARTED",
198-
)
218+
try:
219+
response = client.endpoints.create(
220+
model=model,
221+
hardware=hardware_id,
222+
min_replicas=min_replicas,
223+
max_replicas=max_replicas,
224+
display_name=display_name,
225+
disable_prompt_cache=no_prompt_cache,
226+
disable_speculative_decoding=no_speculative_decoding,
227+
state="STOPPED" if no_auto_start else "STARTED",
228+
)
229+
except NotFoundException as e:
230+
if "check the hardware api" in str(e).lower():
231+
print_api_error(e)
232+
fetch_and_print_hardware_options(
233+
client=client, model=model, print_json=False, available=True
234+
)
235+
sys.exit(1)
236+
237+
raise e
199238

200239
# Print detailed information to stderr
201240
click.echo("Created dedicated endpoint with:", err=True)
@@ -212,7 +251,16 @@ def create(
212251
if no_auto_start:
213252
click.echo(" Auto-start: disabled", err=True)
214253

215-
click.echo("Endpoint created successfully, id: ", err=True)
254+
click.echo("Endpoint created successfully", err=True)
255+
256+
if wait:
257+
import time
258+
259+
click.echo("Waiting for endpoint to be ready...", err=True)
260+
while client.endpoints.get(response.id).state != "STARTED":
261+
time.sleep(1)
262+
click.echo("Endpoint ready", err=True)
263+
216264
# Print only the endpoint ID to stdout
217265
click.echo(response.id)
218266

@@ -228,25 +276,98 @@ def get(client: Together, endpoint_id: str, json: bool) -> None:
228276
print_endpoint(endpoint, json=json)
229277

230278

279+
@endpoints.command()
280+
@click.option("--model", help="Filter hardware options by model")
281+
@click.option("--json", is_flag=True, help="Print output in JSON format")
282+
@click.option(
283+
"--available",
284+
is_flag=True,
285+
help="Print only available hardware options (can only be used if model is passed in)",
286+
)
287+
@click.pass_obj
288+
@handle_api_errors
289+
def hardware(client: Together, model: str | None, json: bool, available: bool) -> None:
290+
"""List all available hardware options, optionally filtered by model."""
291+
fetch_and_print_hardware_options(client, model, json, available)
292+
293+
294+
def fetch_and_print_hardware_options(
295+
client: Together, model: str | None, print_json: bool, available: bool
296+
) -> None:
297+
"""Print hardware options for a model."""
298+
299+
message = "Available hardware options:" if available else "All hardware options:"
300+
click.echo(message, err=True)
301+
hardware_options = client.endpoints.list_hardware(model)
302+
if available:
303+
hardware_options = [
304+
hardware
305+
for hardware in hardware_options
306+
if hardware.availability is not None
307+
and hardware.availability.status == "available"
308+
]
309+
310+
if print_json:
311+
json_output = [
312+
{
313+
"id": hardware.id,
314+
"pricing": hardware.pricing.to_dict(),
315+
"specs": hardware.specs.to_dict(),
316+
"availability": (
317+
hardware.availability.to_dict() if hardware.availability else None
318+
),
319+
}
320+
for hardware in hardware_options
321+
]
322+
click.echo(json.dumps(json_output, indent=2))
323+
else:
324+
for hardware in hardware_options:
325+
click.echo(f" {hardware.id}", err=True)
326+
327+
231328
@endpoints.command()
232329
@click.argument("endpoint-id", required=True)
330+
@click.option(
331+
"--wait", is_flag=True, default=True, help="Wait for the endpoint to stop"
332+
)
233333
@click.pass_obj
234334
@handle_api_errors
235-
def stop(client: Together, endpoint_id: str) -> None:
335+
def stop(client: Together, endpoint_id: str, wait: bool) -> None:
236336
"""Stop a dedicated inference endpoint."""
237337
client.endpoints.update(endpoint_id, state="STOPPED")
238-
click.echo("Successfully stopped endpoint", err=True)
338+
click.echo("Successfully marked endpoint as stopping", err=True)
339+
340+
if wait:
341+
import time
342+
343+
click.echo("Waiting for endpoint to stop...", err=True)
344+
while client.endpoints.get(endpoint_id).state != "STOPPED":
345+
time.sleep(1)
346+
click.echo("Endpoint stopped", err=True)
347+
239348
click.echo(endpoint_id)
240349

241350

242351
@endpoints.command()
243352
@click.argument("endpoint-id", required=True)
353+
@click.option(
354+
"--wait", is_flag=True, default=True, help="Wait for the endpoint to start"
355+
)
244356
@click.pass_obj
245357
@handle_api_errors
246-
def start(client: Together, endpoint_id: str) -> None:
358+
def start(client: Together, endpoint_id: str, wait: bool) -> None:
247359
"""Start a dedicated inference endpoint."""
248360
client.endpoints.update(endpoint_id, state="STARTED")
249-
click.echo("Successfully started endpoint", err=True)
361+
click.echo("Successfully marked endpoint as starting", err=True)
362+
363+
if wait:
364+
import time
365+
366+
click.echo("Waiting for endpoint to start...", err=True)
367+
while client.endpoints.get(endpoint_id).state != "STARTED":
368+
time.sleep(1)
369+
click.echo("Endpoint started", err=True)
370+
250371
click.echo(endpoint_id)
251372

252373

src/together/generated/__init__.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -188,18 +188,6 @@
188188
ListEndpoints200Response,
189189
)
190190
from together.generated.models.list_hardware200_response import ListHardware200Response
191-
from together.generated.models.list_hardware200_response_one_of import (
192-
ListHardware200ResponseOneOf,
193-
)
194-
from together.generated.models.list_hardware200_response_one_of1 import (
195-
ListHardware200ResponseOneOf1,
196-
)
197-
from together.generated.models.list_hardware200_response_one_of1_data_inner import (
198-
ListHardware200ResponseOneOf1DataInner,
199-
)
200-
from together.generated.models.list_hardware200_response_one_of_data_inner import (
201-
ListHardware200ResponseOneOfDataInner,
202-
)
203191
from together.generated.models.lo_ra_training_type import LoRATrainingType
204192
from together.generated.models.logprobs_part import LogprobsPart
205193
from together.generated.models.model_info import ModelInfo

0 commit comments

Comments
 (0)