Azure · rlundeen2 · Jan 10, 2026 · Jan 10, 2026 · Jan 10, 2026 · Jan 10, 2026
diff --git a/doc/api.rst b/doc/api.rst
@@ -502,6 +502,7 @@ API Reference
     HuggingFaceEndpointTarget
     limit_requests_per_minute
     OpenAICompletionTarget
+    OpenAICompletionsAudioConfig
     OpenAIImageTarget
     OpenAIChatTarget
     OpenAIResponseTarget

diff --git a/pyrit/prompt_target/__init__.py b/pyrit/prompt_target/__init__.py
@@ -23,6 +23,7 @@
 from pyrit.prompt_target.http_target.httpx_api_target import HTTPXAPITarget
 from pyrit.prompt_target.hugging_face.hugging_face_chat_target import HuggingFaceChatTarget
 from pyrit.prompt_target.hugging_face.hugging_face_endpoint_target import HuggingFaceEndpointTarget
+from pyrit.prompt_target.openai.completions_audio_config import OpenAICompletionsAudioConfig
 from pyrit.prompt_target.openai.openai_chat_target import OpenAIChatTarget
 from pyrit.prompt_target.openai.openai_completion_target import OpenAICompletionTarget
 from pyrit.prompt_target.openai.openai_image_target import OpenAIImageTarget
@@ -51,8 +52,9 @@
     "HuggingFaceEndpointTarget",
     "limit_requests_per_minute",
     "OpenAICompletionTarget",
-    "OpenAIImageTarget",
+    "OpenAICompletionsAudioConfig",
     "OpenAIChatTarget",
+    "OpenAIImageTarget",
     "OpenAIResponseTarget",
     "OpenAIVideoTarget",
     "OpenAITTSTarget",

diff --git a/pyrit/prompt_target/openai/completions_audio_config.py b/pyrit/prompt_target/openai/completions_audio_config.py
@@ -0,0 +1,48 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+from dataclasses import dataclass
+from typing import Literal
+
+# Voices supported by OpenAI Chat Completions API audio output.
+# See: https://platform.openai.com/docs/guides/text-to-speech#voice-options
+CompletionsAudioVoice = Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar"]
+CompletionsAudioFormat = Literal["wav", "mp3", "flac", "opus", "pcm16"]
+
+
+@dataclass
+class OpenAICompletionsAudioConfig:
+    """
+    Configuration for audio output from OpenAI Chat Completions API.
+
+    When provided to OpenAIChatTarget, this enables audio output from models
+    that support it (e.g., gpt-4o-audio-preview).
+
+    Note: This is specific to the Chat Completions API. The Responses API does not
+    support audio input or output. For real-time audio, use RealtimeTarget instead.
+    """
+
+    # The voice to use for audio output. Supported voices are:
+    # "alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse", "marin", "cedar".
+    voice: CompletionsAudioVoice
+
+    # The audio format for the response. Supported formats are:
+    # "wav", "mp3", "flac", "opus", "pcm16". Defaults to "wav".
+    audio_format: CompletionsAudioFormat = "wav"
+
+    # If True, historical user messages that contain both audio and text will only send
+    # the text (transcript) to reduce bandwidth and token usage. The current (last) user
+    # message will still include audio. Defaults to True.
+    prefer_transcript_for_history: bool = True
+
+    def to_extra_body_parameters(self) -> dict:
+        """
+        Convert the config to extra_body_parameters format for OpenAI API.
+
+        Returns:
+            dict: Parameters to include in the request body for audio output.
+        """
+        return {
+            "modalities": ["text", "audio"],
+            "audio": {"voice": self.voice, "format": self.audio_format},
+        }