diff --git a/src/together/cli/api/endpoints.py b/src/together/cli/api/endpoints.py index 25c3cd2..271f864 100644 --- a/src/together/cli/api/endpoints.py +++ b/src/together/cli/api/endpoints.py @@ -132,6 +132,10 @@ def endpoints(ctx: click.Context) -> None: type=int, help="Number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable.", ) +@click.option( + "--availability-zone", + help="Start endpoint in specified availability zone (e.g., us-central-4b)", +) @click.option( "--wait", is_flag=True, @@ -152,6 +156,7 @@ def create( no_speculative_decoding: bool, no_auto_start: bool, inactive_timeout: int | None, + availability_zone: str | None, wait: bool, ) -> None: """Create a new dedicated inference endpoint.""" @@ -177,6 +182,7 @@ def create( disable_speculative_decoding=no_speculative_decoding, state="STOPPED" if no_auto_start else "STARTED", inactive_timeout=inactive_timeout, + availability_zone=availability_zone, ) except InvalidRequestError as e: print_api_error(e) @@ -203,6 +209,8 @@ def create( click.echo(" Auto-start: disabled", err=True) if inactive_timeout is not None: click.echo(f" Inactive timeout: {inactive_timeout} minutes", err=True) + if availability_zone: + click.echo(f" Availability zone: {availability_zone}", err=True) click.echo(f"Endpoint created successfully, id: {response.id}", err=True) @@ -449,3 +457,25 @@ def update( click.echo("Successfully updated endpoint", err=True) click.echo(endpoint_id) + + +@endpoints.command() +@click.option("--json", is_flag=True, help="Print output in JSON format") +@click.pass_obj +@handle_api_errors +def availability_zones(client: Together, json: bool) -> None: + """List all availability zones.""" + avzones = client.endpoints.list_avzones() + + if not avzones: + click.echo("No availability zones found", err=True) + return + + if json: + import json as json_lib + + click.echo(json_lib.dumps({"avzones": avzones}, indent=2)) + else: + click.echo("Available zones:", err=True) + for availability_zone in sorted(avzones): + click.echo(f" {availability_zone}") diff --git a/src/together/resources/endpoints.py b/src/together/resources/endpoints.py index 993f73d..ba84c01 100644 --- a/src/together/resources/endpoints.py +++ b/src/together/resources/endpoints.py @@ -76,6 +76,7 @@ def create( disable_speculative_decoding: bool = True, state: Literal["STARTED", "STOPPED"] = "STARTED", inactive_timeout: Optional[int] = None, + availability_zone: Optional[str] = None, ) -> DedicatedEndpoint: """ Create a new dedicated endpoint. @@ -90,6 +91,7 @@ def create( disable_speculative_decoding (bool, optional): Whether to disable speculative decoding. Defaults to False. state (str, optional): The desired state of the endpoint. Defaults to "STARTED". inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout. + availability_zone (str, optional): Start endpoint in specified availability zone (e.g., us-central-4b). Returns: DedicatedEndpoint: Object containing endpoint information @@ -116,6 +118,9 @@ def create( if inactive_timeout is not None: data["inactive_timeout"] = inactive_timeout + if availability_zone is not None: + data["availability_zone"] = availability_zone + response, _, _ = requestor.request( options=TogetherRequest( method="POST", @@ -273,6 +278,31 @@ def list_hardware(self, model: Optional[str] = None) -> List[HardwareWithStatus] return [HardwareWithStatus(**item) for item in response.data["data"]] + def list_avzones(self) -> List[str]: + """ + List all available availability zones. + + Returns: + List[str]: List of unique availability zones + """ + requestor = api_requestor.APIRequestor( + client=self._client, + ) + + response, _, _ = requestor.request( + options=TogetherRequest( + method="GET", + url="clusters/availability-zones", + ), + stream=False, + ) + + assert isinstance(response, TogetherResponse) + assert isinstance(response.data, dict) + assert isinstance(response.data["avzones"], list) + + return response.data["avzones"] + class AsyncEndpoints: def __init__(self, client: TogetherClient) -> None: @@ -340,6 +370,7 @@ async def create( disable_speculative_decoding: bool = True, state: Literal["STARTED", "STOPPED"] = "STARTED", inactive_timeout: Optional[int] = None, + availability_zone: Optional[str] = None, ) -> DedicatedEndpoint: """ Create a new dedicated endpoint. @@ -380,6 +411,9 @@ async def create( if inactive_timeout is not None: data["inactive_timeout"] = inactive_timeout + if availability_zone is not None: + data["availability_zone"] = availability_zone + response, _, _ = await requestor.arequest( options=TogetherRequest( method="POST", @@ -538,3 +572,28 @@ async def list_hardware( assert isinstance(response.data["data"], list) return [HardwareWithStatus(**item) for item in response.data["data"]] + + async def list_avzones(self) -> List[str]: + """ + List all availability zones. + + Returns: + List[str]: List of unique availability zones + """ + requestor = api_requestor.APIRequestor( + client=self._client, + ) + + response, _, _ = await requestor.arequest( + options=TogetherRequest( + method="GET", + url="clusters/availability-zones", + ), + stream=False, + ) + + assert isinstance(response, TogetherResponse) + assert isinstance(response.data, dict) + assert isinstance(response.data["avzones"], list) + + return response.data["avzones"]