togethercomputer
diff --git a/‎scripts/openapi.yaml‎
Lines changed: 22 additions & 57 deletions b/‎scripts/openapi.yaml‎
Lines changed: 22 additions & 57 deletions
diff --git a/‎src/together/cli/api/endpoints.py‎
Lines changed: 151 additions & 30 deletions b/‎src/together/cli/api/endpoints.py‎
Lines changed: 151 additions & 30 deletions
diff --git a/‎src/together/generated/__init__.py‎
Lines changed: 0 additions & 12 deletions b/‎src/together/generated/__init__.py‎
Lines changed: 0 additions & 12 deletions
@@ -934,75 +934,40 @@ paths:
     get:
       tags: ["Hardware"]
       summary: List available hardware configurations
-      description: Returns a list of available hardware configurations for deploying models. When a model parameter is provided, it returns only hardware configurations compatible with that model, including their current availability status.
+      description: >
+        Returns a list of available hardware configurations for deploying models.
+        When a model parameter is provided, it returns only hardware configurations compatible
+        with that model, including their current availability status.
       operationId: listHardware
       parameters:
         - name: model
           in: query
           required: false
           schema:
             type: string
-          description: Filter hardware configurations by model compatibility
+          description: >
+            Filter hardware configurations by model compatibility. When provided,
+            the response includes availability status for each compatible configuration.
           example: meta-llama/Llama-3-70b-chat-hf
       responses:
         "200":
           description: "List of available hardware configurations"
           content:
             application/json:
               schema:
-                oneOf:
-                  - type: object
-                    description: Response when no model filter is provided
-                    required:
-                      - object
-                      - data
-                    properties:
-                      object:
-                        type: string
-                        enum:
-                          - list
-                      data:
-                        type: array
-                        items:
-                          allOf:
-                            - $ref: "#/components/schemas/HardwareWithStatus"
-                            - type: object
-                              properties:
-                                availability:
-                                  not: {}
-                  - type: object
-                    description: Response when model filter is provided
-                    required:
-                      - object
-                      - data
-                    properties:
-                      object:
-                        type: string
-                        enum:
-                          - list
-                      data:
-                        type: array
-                        items:
-                          allOf:
-                            - $ref: "#/components/schemas/HardwareWithStatus"
-                            - type: object
-                              required:
-                                - availability
-                example:
-                  object: "list"
+                type: object
+                required:
+                  - object
+                  - data
+                properties:
+                  object:
+                    type: string
+                    enum:
+                      - list
                   data:
-                    - object: "hardware"
-                      name: "2x_nvidia_a100_80gb_sxm"
-                      pricing:
-                        input: 0
-                        output: 0
-                        cents_per_minute: 5.42
-                      specs:
-                        gpu_type: "a100-80gb"
-                        gpu_link: "sxm"
-                        gpu_memory: 80
-                        gpu_count: 2
-                      updated_at: "2024-01-01T00:00:00Z"
+                    type: array
+                    items:
+                      $ref: "#/components/schemas/HardwareWithStatus"
         "403":
           description: "Unauthorized"
           content:
@@ -2646,10 +2611,10 @@ components:
 
     HardwareWithStatus:
       type: object
-      description: Hardware configuration details including current availability status
+      description: Hardware configuration details with optional availability status
       required:
         - object
-        - name
+        - id
         - pricing
         - specs
         - updated_at
@@ -2658,7 +2623,7 @@ components:
           type: string
           enum:
             - hardware
-        name:
+        id:
           type: string
           description: Unique identifier for the hardware configuration
           examples:
 
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import json
 import sys
 from functools import wraps
 from typing import Any, Callable, Dict, List, Literal, TypeVar, Union
@@ -8,7 +9,12 @@
 
 from together import Together
 from together.error import AuthenticationError, InvalidRequestError
-from together.generated.exceptions import ForbiddenException, ServiceException
+from together.generated.exceptions import (
+    BadRequestException,
+    ForbiddenException,
+    NotFoundException,
+    ServiceException,
+)
 from together.types import DedicatedEndpoint, ListEndpoint
 
 
@@ -67,27 +73,43 @@ def print_endpoint(
 F = TypeVar("F", bound=Callable[..., Any])
 
 
+def print_api_error(
+    e: Union[
+        ForbiddenException, NotFoundException, BadRequestException, ServiceException
+    ],
+) -> None:
+    error_details = ""
+    if e.data is not None:
+        error_details = e.data.to_dict()["error"]["message"]
+    elif e.body:
+        error_details = json.loads(e.body)["error"]["message"]
+    else:
+        error_details = str(e)
+
+    if (
+        "credentials" in error_details.lower()
+        or "authentication" in error_details.lower()
+    ):
+        click.echo("Error: Invalid API key or authentication failed", err=True)
+    else:
+        click.echo(f"Error: {error_details}", err=True)
+
+
 def handle_api_errors(f: F) -> F:
     """Decorator to handle common API errors in CLI commands."""
 
     @wraps(f)
     def wrapper(*args: Any, **kwargs: Any) -> Any:
         try:
             return f(*args, **kwargs)
-        except (ForbiddenException, ServiceException) as e:
-            error_details = ""
-            if e.data is not None:
-                error_details = e.data.to_dict()["error"]["message"]
-            else:
-                error_details = str(e)
-
-            if (
-                "credentials" in error_details.lower()
-                or "authentication" in error_details.lower()
-            ):
-                click.echo("Error: Invalid API key or authentication failed", err=True)
-            else:
-                click.echo(f"Error: {error_details}", err=True)
+        except (
+            ForbiddenException,
+            NotFoundException,
+            BadRequestException,
+            ServiceException,
+        ) as e:
+            print_api_error(e)
+
             sys.exit(1)
         except AuthenticationError as e:
             click.echo(f"Error details: {str(e)}", err=True)
@@ -160,6 +182,12 @@ def endpoints(ctx: click.Context) -> None:
     is_flag=True,
     help="Create the endpoint in STOPPED state instead of auto-starting it",
 )
+@click.option(
+    "--wait",
+    is_flag=True,
+    default=True,
+    help="Wait for the endpoint to be ready after creation",
+)
 @click.pass_obj
 @handle_api_errors
 def create(
@@ -173,6 +201,7 @@ def create(
     no_prompt_cache: bool,
     no_speculative_decoding: bool,
     no_auto_start: bool,
+    wait: bool,
 ) -> None:
     """Create a new dedicated inference endpoint."""
     # Map GPU types to their full hardware ID names
@@ -186,16 +215,26 @@ def create(
 
     hardware_id = f"{gpu_count}x_{gpu_map[gpu]}"
 
-    response = client.endpoints.create(
-        model=model,
-        hardware=hardware_id,
-        min_replicas=min_replicas,
-        max_replicas=max_replicas,
-        display_name=display_name,
-        disable_prompt_cache=no_prompt_cache,
-        disable_speculative_decoding=no_speculative_decoding,
-        state="STOPPED" if no_auto_start else "STARTED",
-    )
+    try:
+        response = client.endpoints.create(
+            model=model,
+            hardware=hardware_id,
+            min_replicas=min_replicas,
+            max_replicas=max_replicas,
+            display_name=display_name,
+            disable_prompt_cache=no_prompt_cache,
+            disable_speculative_decoding=no_speculative_decoding,
+            state="STOPPED" if no_auto_start else "STARTED",
+        )
+    except NotFoundException as e:
+        if "check the hardware api" in str(e).lower():
+            print_api_error(e)
+            fetch_and_print_hardware_options(
+                client=client, model=model, print_json=False, available=True
+            )
+            sys.exit(1)
+
+        raise e
 
     # Print detailed information to stderr
     click.echo("Created dedicated endpoint with:", err=True)
@@ -212,7 +251,16 @@ def create(
     if no_auto_start:
         click.echo("  Auto-start: disabled", err=True)
 
-    click.echo("Endpoint created successfully, id: ", err=True)
+    click.echo("Endpoint created successfully", err=True)
+
+    if wait:
+        import time
+
+        click.echo("Waiting for endpoint to be ready...", err=True)
+        while client.endpoints.get(response.id).state != "STARTED":
+            time.sleep(1)
+        click.echo("Endpoint ready", err=True)
+
     # Print only the endpoint ID to stdout
     click.echo(response.id)
 
@@ -228,25 +276,98 @@ def get(client: Together, endpoint_id: str, json: bool) -> None:
     print_endpoint(endpoint, json=json)
 
 
+@endpoints.command()
+@click.option("--model", help="Filter hardware options by model")
+@click.option("--json", is_flag=True, help="Print output in JSON format")
+@click.option(
+    "--available",
+    is_flag=True,
+    help="Print only available hardware options (can only be used if model is passed in)",
+)
+@click.pass_obj
+@handle_api_errors
+def hardware(client: Together, model: str | None, json: bool, available: bool) -> None:
+    """List all available hardware options, optionally filtered by model."""
+    fetch_and_print_hardware_options(client, model, json, available)
+
+
+def fetch_and_print_hardware_options(
+    client: Together, model: str | None, print_json: bool, available: bool
+) -> None:
+    """Print hardware options for a model."""
+
+    message = "Available hardware options:" if available else "All hardware options:"
+    click.echo(message, err=True)
+    hardware_options = client.endpoints.list_hardware(model)
+    if available:
+        hardware_options = [
+            hardware
+            for hardware in hardware_options
+            if hardware.availability is not None
+            and hardware.availability.status == "available"
+        ]
+
+    if print_json:
+        json_output = [
+            {
+                "id": hardware.id,
+                "pricing": hardware.pricing.to_dict(),
+                "specs": hardware.specs.to_dict(),
+                "availability": (
+                    hardware.availability.to_dict() if hardware.availability else None
+                ),
+            }
+            for hardware in hardware_options
+        ]
+        click.echo(json.dumps(json_output, indent=2))
+    else:
+        for hardware in hardware_options:
+            click.echo(f"  {hardware.id}", err=True)
+
+
 @endpoints.command()
 @click.argument("endpoint-id", required=True)
+@click.option(
+    "--wait", is_flag=True, default=True, help="Wait for the endpoint to stop"
+)
 @click.pass_obj
 @handle_api_errors
-def stop(client: Together, endpoint_id: str) -> None:
+def stop(client: Together, endpoint_id: str, wait: bool) -> None:
     """Stop a dedicated inference endpoint."""
     client.endpoints.update(endpoint_id, state="STOPPED")
-    click.echo("Successfully stopped endpoint", err=True)
+    click.echo("Successfully marked endpoint as stopping", err=True)
+
+    if wait:
+        import time
+
+        click.echo("Waiting for endpoint to stop...", err=True)
+        while client.endpoints.get(endpoint_id).state != "STOPPED":
+            time.sleep(1)
+        click.echo("Endpoint stopped", err=True)
+
     click.echo(endpoint_id)
 
 
 @endpoints.command()
 @click.argument("endpoint-id", required=True)
+@click.option(
+    "--wait", is_flag=True, default=True, help="Wait for the endpoint to start"
+)
 @click.pass_obj
 @handle_api_errors
-def start(client: Together, endpoint_id: str) -> None:
+def start(client: Together, endpoint_id: str, wait: bool) -> None:
     """Start a dedicated inference endpoint."""
     client.endpoints.update(endpoint_id, state="STARTED")
-    click.echo("Successfully started endpoint", err=True)
+    click.echo("Successfully marked endpoint as starting", err=True)
+
+    if wait:
+        import time
+
+        click.echo("Waiting for endpoint to start...", err=True)
+        while client.endpoints.get(endpoint_id).state != "STARTED":
+            time.sleep(1)
+        click.echo("Endpoint started", err=True)
+
     click.echo(endpoint_id)
 
 
 
@@ -188,18 +188,6 @@
     ListEndpoints200Response,
 )
 from together.generated.models.list_hardware200_response import ListHardware200Response
-from together.generated.models.list_hardware200_response_one_of import (
-    ListHardware200ResponseOneOf,
-)
-from together.generated.models.list_hardware200_response_one_of1 import (
-    ListHardware200ResponseOneOf1,
-)
-from together.generated.models.list_hardware200_response_one_of1_data_inner import (
-    ListHardware200ResponseOneOf1DataInner,
-)
-from together.generated.models.list_hardware200_response_one_of_data_inner import (
-    ListHardware200ResponseOneOfDataInner,
-)
 from together.generated.models.lo_ra_training_type import LoRATrainingType
 from together.generated.models.logprobs_part import LogprobsPart
 from together.generated.models.model_info import ModelInfo