Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,7 @@ API Reference
HuggingFaceEndpointTarget
limit_requests_per_minute
OpenAICompletionTarget
OpenAICompletionsAudioConfig
OpenAIImageTarget
OpenAIChatTarget
OpenAIResponseTarget
Expand Down
4 changes: 3 additions & 1 deletion pyrit/prompt_target/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from pyrit.prompt_target.http_target.httpx_api_target import HTTPXAPITarget
from pyrit.prompt_target.hugging_face.hugging_face_chat_target import HuggingFaceChatTarget
from pyrit.prompt_target.hugging_face.hugging_face_endpoint_target import HuggingFaceEndpointTarget
from pyrit.prompt_target.openai.completions_audio_config import OpenAICompletionsAudioConfig
from pyrit.prompt_target.openai.openai_chat_target import OpenAIChatTarget
from pyrit.prompt_target.openai.openai_completion_target import OpenAICompletionTarget
from pyrit.prompt_target.openai.openai_image_target import OpenAIImageTarget
Expand Down Expand Up @@ -51,8 +52,9 @@
"HuggingFaceEndpointTarget",
"limit_requests_per_minute",
"OpenAICompletionTarget",
"OpenAIImageTarget",
"OpenAICompletionsAudioConfig",
"OpenAIChatTarget",
"OpenAIImageTarget",
"OpenAIResponseTarget",
"OpenAIVideoTarget",
"OpenAITTSTarget",
Expand Down
48 changes: 48 additions & 0 deletions pyrit/prompt_target/openai/completions_audio_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from dataclasses import dataclass
from typing import Literal

# Voices supported by OpenAI Chat Completions API audio output.
# See: https://platform.openai.com/docs/guides/text-to-speech#voice-options
CompletionsAudioVoice = Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"]
CompletionsAudioFormat = Literal["wav", "mp3", "flac", "opus", "pcm16"]


@dataclass
class OpenAICompletionsAudioConfig:
"""
Configuration for audio output from OpenAI Chat Completions API.

When provided to OpenAIChatTarget, this enables audio output from models
that support it (e.g., gpt-4o-audio-preview).

Note: This is specific to the Chat Completions API. The Responses API does not
support audio input or output. For real-time audio, use RealtimeTarget instead.
"""

# The voice to use for audio output. Supported voices are:
# "alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar".
voice: CompletionsAudioVoice

# The audio format for the response. Supported formats are:
# "wav", "mp3", "flac", "opus", "pcm16". Defaults to "wav".
audio_format: CompletionsAudioFormat = "wav"

# If True, historical user messages that contain both audio and text will only send
# the text (transcript) to reduce bandwidth and token usage. The current (last) user
# message will still include audio. Defaults to True.
prefer_transcript_for_history: bool = True

def to_extra_body_parameters(self) -> dict:
"""
Convert the config to extra_body_parameters format for OpenAI API.

Returns:
dict: Parameters to include in the request body for audio output.
"""
return {
"modalities": ["text", "audio"],
"audio": {"voice": self.voice, "format": self.audio_format},
}
Loading