Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"

[tool.poetry]
name = "together"
version = "1.5.20"
version = "1.5.21"
authors = ["Together AI <[email protected]>"]
description = "Python client for Together's Cloud Platform!"
readme = "README.md"
Expand Down
2 changes: 2 additions & 0 deletions src/together/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ class AsyncTogether:
models: resources.AsyncModels
fine_tuning: resources.AsyncFineTuning
rerank: resources.AsyncRerank
audio: resources.AsyncAudio
code_interpreter: CodeInterpreter
batches: resources.AsyncBatches
# client options
Expand Down Expand Up @@ -167,6 +168,7 @@ def __init__(
self.models = resources.AsyncModels(self.client)
self.fine_tuning = resources.AsyncFineTuning(self.client)
self.rerank = resources.AsyncRerank(self.client)
self.audio = resources.AsyncAudio(self.client)
self.code_interpreter = CodeInterpreter(self.client)
self.batches = resources.AsyncBatches(self.client)

Expand Down
18 changes: 18 additions & 0 deletions src/together/resources/audio/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from functools import cached_property

from together.resources.audio.speech import AsyncSpeech, Speech
from together.resources.audio.transcriptions import AsyncTranscriptions, Transcriptions
from together.resources.audio.translations import AsyncTranslations, Translations
from together.types import (
TogetherClient,
)
Expand All @@ -14,6 +16,14 @@ def __init__(self, client: TogetherClient) -> None:
def speech(self) -> Speech:
return Speech(self._client)

@cached_property
def transcriptions(self) -> Transcriptions:
return Transcriptions(self._client)

@cached_property
def translations(self) -> Translations:
return Translations(self._client)


class AsyncAudio:
def __init__(self, client: TogetherClient) -> None:
Expand All @@ -22,3 +32,11 @@ def __init__(self, client: TogetherClient) -> None:
@cached_property
def speech(self) -> AsyncSpeech:
return AsyncSpeech(self._client)

@cached_property
def transcriptions(self) -> AsyncTranscriptions:
return AsyncTranscriptions(self._client)

@cached_property
def translations(self) -> AsyncTranslations:
return AsyncTranslations(self._client)
266 changes: 266 additions & 0 deletions src/together/resources/audio/transcriptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
from __future__ import annotations

from pathlib import Path
from typing import Any, BinaryIO, Dict, Optional, Tuple, Union

from together.abstract import api_requestor
from together.types import (
AudioTimestampGranularities,
AudioTranscriptionResponse,
AudioTranscriptionResponseFormat,
AudioTranscriptionVerboseResponse,
TogetherClient,
TogetherRequest,
)


class Transcriptions:
def __init__(self, client: TogetherClient) -> None:
self._client = client

def create(
self,
*,
file: Union[str, BinaryIO, Path],
model: str = "openai/whisper-large-v3",
language: Optional[str] = None,
prompt: Optional[str] = None,
response_format: Union[str, AudioTranscriptionResponseFormat] = "json",
temperature: float = 0.0,
timestamp_granularities: Optional[
Union[str, AudioTimestampGranularities]
] = None,
**kwargs: Any,
) -> Union[AudioTranscriptionResponse, AudioTranscriptionVerboseResponse]:
"""
Transcribes audio into the input language.

Args:
file: The audio file object (not file name) to transcribe, in one of these formats:
flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
Can be a file path (str/Path), file object (BinaryIO), or URL (str).
model: ID of the model to use. Defaults to "openai/whisper-large-v3".
language: The language of the input audio. Supplying the input language in
ISO-639-1 format will improve accuracy and latency.
prompt: An optional text to guide the model's style or continue a previous
audio segment. The prompt should match the audio language.
response_format: The format of the transcript output, in one of these options:
json, verbose_json.
temperature: The sampling temperature, between 0 and 1. Higher values like 0.8
will make the output more random, while lower values like 0.2 will make it
more focused and deterministic.
timestamp_granularities: The timestamp granularities to populate for this
transcription. response_format must be set verbose_json to use timestamp
granularities. Either or both of these options are supported: word, or segment.

Returns:
The transcribed text in the requested format.
"""

requestor = api_requestor.APIRequestor(
client=self._client,
)

# Handle file input - could be a path, URL, or file object
files_data: Dict[str, Union[Tuple[None, str], BinaryIO]] = {}
params_data = {}

if isinstance(file, (str, Path)):
if isinstance(file, str) and file.startswith(("http://", "https://")):
# URL string - send as multipart field
files_data["file"] = (None, file)
else:
# Local file path
file_path = Path(file)
files_data["file"] = open(file_path, "rb")
else:
# File object
files_data["file"] = file

# Build request parameters
params_data.update(
{
"model": model,
"response_format": (
response_format.value
if hasattr(response_format, "value")
else response_format
),
"temperature": temperature,
}
)

if language is not None:
params_data["language"] = language

if prompt is not None:
params_data["prompt"] = prompt

if timestamp_granularities is not None:
params_data["timestamp_granularities"] = (
timestamp_granularities.value
if hasattr(timestamp_granularities, "value")
else timestamp_granularities
)

# Add any additional kwargs
params_data.update(kwargs)

try:
response, _, _ = requestor.request(
options=TogetherRequest(
method="POST",
url="audio/transcriptions",
params=params_data,
files=files_data,
),
)
finally:
# Close file if we opened it
if files_data and "file" in files_data:
try:
# Only close if it's a file object (not a tuple for URL)
file_obj = files_data["file"]
if hasattr(file_obj, "close") and not isinstance(file_obj, tuple):
file_obj.close()
except:
pass

# Parse response based on format
if (
response_format == "verbose_json"
or response_format == AudioTranscriptionResponseFormat.VERBOSE_JSON
):
return AudioTranscriptionVerboseResponse(**response.data)
else:
return AudioTranscriptionResponse(**response.data)


class AsyncTranscriptions:
def __init__(self, client: TogetherClient) -> None:
self._client = client

async def create(
self,
*,
file: Union[str, BinaryIO, Path],
model: str = "openai/whisper-large-v3",
language: Optional[str] = None,
prompt: Optional[str] = None,
response_format: Union[str, AudioTranscriptionResponseFormat] = "json",
temperature: float = 0.0,
timestamp_granularities: Optional[
Union[str, AudioTimestampGranularities]
] = None,
**kwargs: Any,
) -> Union[AudioTranscriptionResponse, AudioTranscriptionVerboseResponse]:
"""
Async version of transcribe audio into the input language.

Args:
file: The audio file object (not file name) to transcribe, in one of these formats:
flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
Can be a file path (str/Path), file object (BinaryIO), or URL (str).
model: ID of the model to use. Defaults to "openai/whisper-large-v3".
language: The language of the input audio. Supplying the input language in
ISO-639-1 format will improve accuracy and latency.
prompt: An optional text to guide the model's style or continue a previous
audio segment. The prompt should match the audio language.
response_format: The format of the transcript output, in one of these options:
json, verbose_json.
temperature: The sampling temperature, between 0 and 1. Higher values like 0.8
will make the output more random, while lower values like 0.2 will make it
more focused and deterministic.
timestamp_granularities: The timestamp granularities to populate for this
transcription. response_format must be set verbose_json to use timestamp
granularities. Either or both of these options are supported: word, or segment.

Returns:
The transcribed text in the requested format.
"""

requestor = api_requestor.APIRequestor(
client=self._client,
)

# Handle file input - could be a path, URL, or file object
files_data: Dict[str, Union[Tuple[None, str], BinaryIO]] = {}
params_data = {}

if isinstance(file, (str, Path)):
if isinstance(file, str) and file.startswith(("http://", "https://")):
# URL string - send as multipart field
files_data["file"] = (None, file)
else:
# Local file path
file_path = Path(file)
files_data["file"] = open(file_path, "rb")
else:
# File object
files_data["file"] = file

# Build request parameters
params_data.update(
{
"model": model,
"response_format": (
response_format
if isinstance(response_format, str)
else (
response_format.value
if hasattr(response_format, "value")
else response_format
)
),
"temperature": temperature,
}
)

if language is not None:
params_data["language"] = language

if prompt is not None:
params_data["prompt"] = prompt

if timestamp_granularities is not None:
params_data["timestamp_granularities"] = (
timestamp_granularities
if isinstance(timestamp_granularities, str)
else (
timestamp_granularities.value
if hasattr(timestamp_granularities, "value")
else timestamp_granularities
)
)

# Add any additional kwargs
params_data.update(kwargs)

try:
response, _, _ = await requestor.arequest(
options=TogetherRequest(
method="POST",
url="audio/transcriptions",
params=params_data,
files=files_data,
),
)
finally:
# Close file if we opened it
if files_data and "file" in files_data:
try:
# Only close if it's a file object (not a tuple for URL)
file_obj = files_data["file"]
if hasattr(file_obj, "close") and not isinstance(file_obj, tuple):
file_obj.close()
except:
pass

# Parse response based on format
if (
response_format == "verbose_json"
or response_format == AudioTranscriptionResponseFormat.VERBOSE_JSON
):
return AudioTranscriptionVerboseResponse(**response.data)
else:
return AudioTranscriptionResponse(**response.data)
Loading