diff --git a/README.md b/README.md index 7f7d539..8f0ee86 100644 --- a/README.md +++ b/README.md @@ -107,7 +107,9 @@ An example `session.update` that configures several aspects of the session, incl "instructions": "Call provided tools if appropriate for the user's input.", "input_audio_format": "pcm16", "input_audio_transcription": { - "model": "whisper-1" + "model": "whisper-1", // We can also choose from "gpt-4o-mini-transcribe" or "gpt-4o-transcribe" + "language": "en", // optional language (ISO-639-1 format) + "prompt":"words you can expect in the input" // optional }, "turn_detection": { "threshold": 0.4, diff --git a/javascript/standalone/src/models.ts b/javascript/standalone/src/models.ts index 27a1fc1..e8c3ad0 100644 --- a/javascript/standalone/src/models.ts +++ b/javascript/standalone/src/models.ts @@ -36,7 +36,9 @@ export type ToolChoice = "auto" | "none" | "required" | FunctionToolChoice; export type MessageRole = "system" | "assistant" | "user"; export interface InputAudioTranscription { - model: "whisper-1"; + model: "whisper-1" | "gpt-4o-mini-transcribe" | "gpt-4o-transcribe"; + language?: string; + prompt?: string; } export interface ClientMessageBase { diff --git a/python/rtclient/models.py b/python/rtclient/models.py index cdef1ce..4eb0b78 100644 --- a/python/rtclient/models.py +++ b/python/rtclient/models.py @@ -44,7 +44,9 @@ class FunctionToolChoice(ModelWithDefaults): class InputAudioTranscription(BaseModel): - model: Literal["whisper-1"] + model: Literal["whisper-1", "gpt-4o-mini-transcribe", "gpt-4o-transcribe"] + language: Optional[str] = None + prompt: Optional[str] = None class ClientMessageBase(ModelWithDefaults):