Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,7 @@ API Reference
HuggingFaceEndpointTarget
limit_requests_per_minute
OpenAICompletionTarget
OpenAICompletionsAudioConfig
OpenAIImageTarget
OpenAIChatTarget
OpenAIResponseTarget
Expand Down
4 changes: 3 additions & 1 deletion pyrit/prompt_target/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from pyrit.prompt_target.http_target.httpx_api_target import HTTPXAPITarget
from pyrit.prompt_target.hugging_face.hugging_face_chat_target import HuggingFaceChatTarget
from pyrit.prompt_target.hugging_face.hugging_face_endpoint_target import HuggingFaceEndpointTarget
from pyrit.prompt_target.openai.completions_audio_config import OpenAICompletionsAudioConfig
from pyrit.prompt_target.openai.openai_chat_target import OpenAIChatTarget
from pyrit.prompt_target.openai.openai_completion_target import OpenAICompletionTarget
from pyrit.prompt_target.openai.openai_image_target import OpenAIImageTarget
Expand Down Expand Up @@ -51,8 +52,9 @@
"HuggingFaceEndpointTarget",
"limit_requests_per_minute",
"OpenAICompletionTarget",
"OpenAIImageTarget",
"OpenAICompletionsAudioConfig",
"OpenAIChatTarget",
"OpenAIImageTarget",
"OpenAIResponseTarget",
"OpenAIVideoTarget",
"OpenAITTSTarget",
Expand Down
48 changes: 48 additions & 0 deletions pyrit/prompt_target/openai/completions_audio_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from dataclasses import dataclass
from typing import Literal

# Voices supported by OpenAI Chat Completions API audio output.
# See: https://platform.openai.com/docs/guides/text-to-speech#voice-options
CompletionsAudioVoice = Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"]
CompletionsAudioFormat = Literal["wav", "mp3", "flac", "opus", "pcm16"]


@dataclass
class OpenAICompletionsAudioConfig:
"""
Configuration for audio output from OpenAI Chat Completions API.

When provided to OpenAIChatTarget, this enables audio output from models
that support it (e.g., gpt-4o-audio-preview).

Note: This is specific to the Chat Completions API. The Responses API does not
support audio input or output. For real-time audio, use RealtimeTarget instead.
"""

# The voice to use for audio output. Supported voices are:
# "alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar".
voice: CompletionsAudioVoice

# The audio format for the response. Supported formats are:
# "wav", "mp3", "flac", "opus", "pcm16". Defaults to "wav".
audio_format: CompletionsAudioFormat = "wav"

# If True, historical user messages that contain both audio and text will only send
# the text (transcript) to reduce bandwidth and token usage. The current (last) user
# message will still include audio. Defaults to True.
prefer_transcript_for_history: bool = True

def to_extra_body_parameters(self) -> dict:
"""
Convert the config to extra_body_parameters format for OpenAI API.

Returns:
dict: Parameters to include in the request body for audio output.
"""
return {
"modalities": ["text", "audio"],
"audio": {"voice": self.voice, "format": self.audio_format},
}
Loading