Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"

[tool.poetry]
name = "together"
version = "1.5.20"
version = "1.5.21"
authors = ["Together AI <[email protected]>"]
description = "Python client for Together's Cloud Platform!"
readme = "README.md"
Expand Down
2 changes: 2 additions & 0 deletions src/together/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ class AsyncTogether:
models: resources.AsyncModels
fine_tuning: resources.AsyncFineTuning
rerank: resources.AsyncRerank
audio: resources.AsyncAudio
code_interpreter: CodeInterpreter
batches: resources.AsyncBatches
# client options
Expand Down Expand Up @@ -167,6 +168,7 @@ def __init__(
self.models = resources.AsyncModels(self.client)
self.fine_tuning = resources.AsyncFineTuning(self.client)
self.rerank = resources.AsyncRerank(self.client)
self.audio = resources.AsyncAudio(self.client)
self.code_interpreter = CodeInterpreter(self.client)
self.batches = resources.AsyncBatches(self.client)

Expand Down
18 changes: 18 additions & 0 deletions src/together/resources/audio/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from functools import cached_property

from together.resources.audio.speech import AsyncSpeech, Speech
from together.resources.audio.transcriptions import AsyncTranscriptions, Transcriptions
from together.resources.audio.translations import AsyncTranslations, Translations
from together.types import (
TogetherClient,
)
Expand All @@ -14,6 +16,14 @@ def __init__(self, client: TogetherClient) -> None:
def speech(self) -> Speech:
return Speech(self._client)

@cached_property
def transcriptions(self) -> Transcriptions:
return Transcriptions(self._client)

@cached_property
def translations(self) -> Translations:
return Translations(self._client)


class AsyncAudio:
def __init__(self, client: TogetherClient) -> None:
Expand All @@ -22,3 +32,11 @@ def __init__(self, client: TogetherClient) -> None:
@cached_property
def speech(self) -> AsyncSpeech:
return AsyncSpeech(self._client)

@cached_property
def transcriptions(self) -> AsyncTranscriptions:
return AsyncTranscriptions(self._client)

@cached_property
def translations(self) -> AsyncTranslations:
return AsyncTranslations(self._client)
266 changes: 266 additions & 0 deletions src/together/resources/audio/transcriptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
from __future__ import annotations

from pathlib import Path
from typing import Any, BinaryIO, Dict, Optional, Tuple, Union

from together.abstract import api_requestor
from together.types import (
AudioTimestampGranularities,
AudioTranscriptionResponse,
AudioTranscriptionResponseFormat,
AudioTranscriptionVerboseResponse,
TogetherClient,
TogetherRequest,
)


class Transcriptions:
def __init__(self, client: TogetherClient) -> None:
self._client = client

def create(
self,
*,
file: Union[str, BinaryIO, Path],
model: str = "openai/whisper-large-v3",
language: Optional[str] = None,
prompt: Optional[str] = None,
response_format: Union[str, AudioTranscriptionResponseFormat] = "json",
temperature: float = 0.0,
timestamp_granularities: Optional[
Union[str, AudioTimestampGranularities]
] = None,
**kwargs: Any,
) -> Union[AudioTranscriptionResponse, AudioTranscriptionVerboseResponse]:
"""
Transcribes audio into the input language.

Args:
file: The audio file object (not file name) to transcribe, in one of these formats:
flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
Can be a file path (str/Path), file object (BinaryIO), or URL (str).
model: ID of the model to use. Defaults to "openai/whisper-large-v3".
language: The language of the input audio. Supplying the input language in
ISO-639-1 format will improve accuracy and latency.
prompt: An optional text to guide the model's style or continue a previous
audio segment. The prompt should match the audio language.
response_format: The format of the transcript output, in one of these options:
json, verbose_json.
temperature: The sampling temperature, between 0 and 1. Higher values like 0.8
will make the output more random, while lower values like 0.2 will make it
more focused and deterministic.
timestamp_granularities: The timestamp granularities to populate for this
transcription. response_format must be set verbose_json to use timestamp
granularities. Either or both of these options are supported: word, or segment.

Returns:
The transcribed text in the requested format.
"""

requestor = api_requestor.APIRequestor(
client=self._client,
)

# Handle file input - could be a path, URL, or file object
files_data: Dict[str, Union[Tuple[None, str], BinaryIO]] = {}
params_data = {}

if isinstance(file, (str, Path)):
if isinstance(file, str) and file.startswith(("http://", "https://")):
# URL string - send as multipart field
files_data["file"] = (None, file)
else:
# Local file path
file_path = Path(file)
files_data["file"] = open(file_path, "rb")
else:
# File object
files_data["file"] = file

# Build request parameters
params_data.update(
{
"model": model,
"response_format": (
response_format.value
if hasattr(response_format, "value")
else response_format
),
"temperature": temperature,
}
)

if language is not None:
params_data["language"] = language

if prompt is not None:
params_data["prompt"] = prompt

if timestamp_granularities is not None:
params_data["timestamp_granularities"] = (
timestamp_granularities.value
if hasattr(timestamp_granularities, "value")
else timestamp_granularities
)

# Add any additional kwargs
params_data.update(kwargs)

try:
response, _, _ = requestor.request(
options=TogetherRequest(
method="POST",
url="audio/transcriptions",
params=params_data,
files=files_data,
),
)
finally:
# Close file if we opened it
if files_data and "file" in files_data:
try:
# Only close if it's a file object (not a tuple for URL)
file_obj = files_data["file"]
if hasattr(file_obj, "close") and not isinstance(file_obj, tuple):
file_obj.close()
except:
pass

# Parse response based on format
if (
response_format == "verbose_json"
or response_format == AudioTranscriptionResponseFormat.VERBOSE_JSON
):
return AudioTranscriptionVerboseResponse(**response.data)
else:
return AudioTranscriptionResponse(**response.data)


class AsyncTranscriptions:
def __init__(self, client: TogetherClient) -> None:
self._client = client

async def create(
self,
*,
file: Union[str, BinaryIO, Path],
model: str = "openai/whisper-large-v3",
language: Optional[str] = None,
prompt: Optional[str] = None,
response_format: Union[str, AudioTranscriptionResponseFormat] = "json",
temperature: float = 0.0,
timestamp_granularities: Optional[
Union[str, AudioTimestampGranularities]
] = None,
**kwargs: Any,
) -> Union[AudioTranscriptionResponse, AudioTranscriptionVerboseResponse]:
"""
Async version of transcribe audio into the input language.

Args:
file: The audio file object (not file name) to transcribe, in one of these formats:
flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
Can be a file path (str/Path), file object (BinaryIO), or URL (str).
model: ID of the model to use. Defaults to "openai/whisper-large-v3".
language: The language of the input audio. Supplying the input language in
ISO-639-1 format will improve accuracy and latency.
prompt: An optional text to guide the model's style or continue a previous
audio segment. The prompt should match the audio language.
response_format: The format of the transcript output, in one of these options:
json, verbose_json.
temperature: The sampling temperature, between 0 and 1. Higher values like 0.8
will make the output more random, while lower values like 0.2 will make it
more focused and deterministic.
timestamp_granularities: The timestamp granularities to populate for this
transcription. response_format must be set verbose_json to use timestamp
granularities. Either or both of these options are supported: word, or segment.

Returns:
The transcribed text in the requested format.
"""

requestor = api_requestor.APIRequestor(
client=self._client,
)

# Handle file input - could be a path, URL, or file object
files_data: Dict[str, Union[Tuple[None, str], BinaryIO]] = {}
params_data = {}

if isinstance(file, (str, Path)):
if isinstance(file, str) and file.startswith(("http://", "https://")):
# URL string - send as multipart field
files_data["file"] = (None, file)
else:
# Local file path
file_path = Path(file)
files_data["file"] = open(file_path, "rb")
else:
# File object
files_data["file"] = file

# Build request parameters
params_data.update(
{
"model": model,
"response_format": (
response_format
if isinstance(response_format, str)
else (
response_format.value
if hasattr(response_format, "value")
else response_format
)
),
"temperature": temperature,
}
)

if language is not None:
params_data["language"] = language

if prompt is not None:
params_data["prompt"] = prompt

if timestamp_granularities is not None:
params_data["timestamp_granularities"] = (
timestamp_granularities
if isinstance(timestamp_granularities, str)
else (
timestamp_granularities.value
if hasattr(timestamp_granularities, "value")
else timestamp_granularities
)
)

# Add any additional kwargs
params_data.update(kwargs)

try:
response, _, _ = await requestor.arequest(
options=TogetherRequest(
method="POST",
url="audio/transcriptions",
params=params_data,
files=files_data,
),
)
finally:
# Close file if we opened it
if files_data and "file" in files_data:
try:
# Only close if it's a file object (not a tuple for URL)
file_obj = files_data["file"]
if hasattr(file_obj, "close") and not isinstance(file_obj, tuple):
file_obj.close()
except:
pass

# Parse response based on format
if (
response_format == "verbose_json"
or response_format == AudioTranscriptionResponseFormat.VERBOSE_JSON
):
return AudioTranscriptionVerboseResponse(**response.data)
else:
return AudioTranscriptionResponse(**response.data)
Loading