Skip to content

Commit c4e47e4

Browse files
committed
Update live_translate & live_transcribe docs, openapi spec & SWML schema
1 parent ad6dd24 commit c4e47e4

File tree

16 files changed

+1015
-95
lines changed

16 files changed

+1015
-95
lines changed

specs/signalwire-rest/calling-api/calls/models/examples.tsp

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,115 @@ const aiMessageExample = #{
6565
},
6666
},
6767
};
68+
69+
// Live Transcribe examples
70+
const liveTranscribeStartExample = #{
71+
request: #{
72+
command: "calling.live_transcribe",
73+
id: callId,
74+
params: #{
75+
action: #{
76+
start: #{
77+
lang: "en-US",
78+
direction: #["local-caller", "remote-caller"],
79+
webhook: "https://example.com/transcription-events",
80+
live_events: true,
81+
ai_summary: true,
82+
ai_summary_prompt: "Summarize the key points of this conversation.",
83+
ai_model: "gpt-4.1-nano",
84+
speech_engine: "deepgram",
85+
},
86+
},
87+
},
88+
},
89+
};
90+
91+
const liveTranscribeStopExample = #{
92+
request: #{
93+
command: "calling.live_transcribe",
94+
id: callId,
95+
params: #{
96+
action: "stop",
97+
},
98+
},
99+
};
100+
101+
const liveTranscribeSummarizeExample = #{
102+
request: #{
103+
command: "calling.live_transcribe",
104+
id: callId,
105+
params: #{
106+
action: #{
107+
summarize: #{
108+
webhook: "https://example.com/summary",
109+
prompt: "Provide a bullet-point summary of the main topics discussed.",
110+
},
111+
},
112+
},
113+
},
114+
};
115+
116+
// Live Translate examples
117+
const liveTranslateStartExample = #{
118+
request: #{
119+
command: "calling.live_translate",
120+
id: callId,
121+
params: #{
122+
action: #{
123+
start: #{
124+
from_lang: "en-US",
125+
to_lang: "es-ES",
126+
direction: #["local-caller", "remote-caller"],
127+
from_voice: "elevenlabs.josh",
128+
to_voice: "elevenlabs.josh",
129+
filter_from: "professional",
130+
webhook: "https://example.com/translation-events",
131+
live_events: true,
132+
ai_summary: true,
133+
ai_model: "gpt-4.1-nano",
134+
speech_engine: "deepgram",
135+
},
136+
},
137+
},
138+
},
139+
};
140+
141+
const liveTranslateStopExample = #{
142+
request: #{
143+
command: "calling.live_translate",
144+
id: callId,
145+
params: #{
146+
action: "stop",
147+
},
148+
},
149+
};
150+
151+
const liveTranslateSummarizeExample = #{
152+
request: #{
153+
command: "calling.live_translate",
154+
id: callId,
155+
params: #{
156+
action: #{
157+
summarize: #{
158+
webhook: "https://example.com/summary",
159+
prompt: "Summarize the key agreements reached in both languages.",
160+
},
161+
},
162+
},
163+
},
164+
};
165+
166+
const liveTranslateInjectExample = #{
167+
request: #{
168+
command: "calling.live_translate",
169+
id: callId,
170+
params: #{
171+
action: #{
172+
inject: #{
173+
message: "Please hold while I transfer you to a specialist.",
174+
direction: "remote-caller",
175+
},
176+
},
177+
},
178+
},
179+
};

specs/signalwire-rest/calling-api/calls/models/requests.tsp

Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,255 @@ model CallAIMessageRequest {
115115
};
116116
}
117117

118+
// ============================================
119+
// Live Transcribe Commands
120+
// ============================================
121+
122+
alias TranscribeDirection = "local-caller" | "remote-caller";
123+
alias SpeechEngine = "deepgram" | "google";
124+
alias SupportedAIModels = "gpt-4o-mini" | "gpt-4.1-mini" | "gpt-4.1-nano";
125+
126+
@summary("Start")
127+
model LiveTranscribeStartAction {
128+
@doc("Starts live transcription of the call.")
129+
start: {
130+
@doc("The language to transcribe (e.g., 'en-US', 'es-ES').")
131+
@example("en-US")
132+
lang: string;
133+
134+
@doc("The direction(s) of the call to transcribe.")
135+
@example(#["local-caller", "remote-caller"])
136+
direction: TranscribeDirection[];
137+
138+
@doc("The webhook URL to receive transcription events.")
139+
@example("https://example.com/webhook")
140+
webhook?: string;
141+
142+
@doc("Whether to send real-time utterance events as speech is recognized.")
143+
@example(true)
144+
live_events?: boolean;
145+
146+
@doc("Whether to generate an AI summary when transcription ends.")
147+
@example(true)
148+
ai_summary?: boolean;
149+
150+
@doc("The AI prompt that instructs how to summarize the conversation when `ai_summary` is enabled.")
151+
@example("Summarize the key points of this conversation.")
152+
ai_summary_prompt?: string;
153+
154+
@doc("The speech recognition engine to use.")
155+
@example("deepgram")
156+
speech_engine?: SpeechEngine = "deepgram";
157+
158+
@doc("Speech timeout in milliseconds.")
159+
@example(60000)
160+
speech_timeout?: int32 = 60000;
161+
162+
@doc("Voice activity detection silence time in milliseconds. Default depends on speech engine: `300` for Deepgram, `500` for Google.")
163+
@example(300)
164+
vad_silence_ms?: int32;
165+
166+
@doc("Voice activity detection threshold (0-1800).")
167+
@example(400)
168+
vad_thresh?: int32 = 400;
169+
170+
@doc("Debug level for logging (0-2).")
171+
@example(0)
172+
debug_level?: int32 = 0;
173+
};
174+
}
175+
176+
@summary("Summarize")
177+
model LiveTranscribeSummarizeAction {
178+
@doc("Request an on-demand AI summary of the conversation.")
179+
summarize: {
180+
@doc("The webhook URL to receive the summary.")
181+
@example("https://example.com/webhook")
182+
webhook?: string;
183+
184+
@doc("The AI prompt that instructs how to summarize the conversation.")
185+
@example("Provide a bullet-point summary of the main topics discussed.")
186+
prompt?: string;
187+
};
188+
}
189+
190+
@summary("Stop")
191+
@doc("Stops the live transcription session.")
192+
enum LiveTranscribeStopAction {
193+
stop,
194+
}
195+
196+
alias LiveTranscribeAction = LiveTranscribeStartAction | LiveTranscribeSummarizeAction | LiveTranscribeStopAction;
197+
198+
@summary("Live Transcribe")
199+
model CallLiveTranscribeRequest {
200+
@doc(uuidDescription)
201+
@example(CallIdExample)
202+
id: uuid;
203+
204+
@doc("The `calling.live_transcribe` command is used to control live transcription on an active call.")
205+
@example("calling.live_transcribe")
206+
command: "calling.live_transcribe";
207+
208+
@doc(paramsDescription)
209+
params: {
210+
@doc("The transcription action to perform: start, stop, or summarize.")
211+
action: LiveTranscribeAction;
212+
};
213+
}
214+
215+
// ============================================
216+
// Live Translate Commands
217+
// ============================================
218+
219+
@summary("Filter Presets")
220+
@doc("""
221+
Preset translation filter values that adjust the tone or style of translated speech.
222+
223+
- `polite` - Translates to a polite version, removing anything insulting while maintaining sentiment
224+
- `rude` - Translates to a rude and insulting version while maintaining sentiment
225+
- `professional` - Translates to sound professional, removing slang or lingo
226+
- `shakespeare` - Translates to sound like Shakespeare, speaking in iambic pentameter
227+
- `gen-z` - Translates to use Gen-Z slang and expressions
228+
""")
229+
enum TranslationFilterPreset {
230+
polite,
231+
rude,
232+
professional,
233+
shakespeare,
234+
`gen-z`,
235+
}
236+
237+
@summary("Custom Filter")
238+
@doc("Custom translation filter with a prompt prefix. Use `prompt:` followed by your custom instructions (e.g., `prompt:Use formal business language`).")
239+
@pattern("^prompt:.+$")
240+
scalar CustomTranslationFilter extends string;
241+
242+
alias TranslationFilter = TranslationFilterPreset | CustomTranslationFilter;
243+
244+
@summary("Start")
245+
model LiveTranslateStartAction {
246+
@doc("Starts live translation of the call.")
247+
start: {
248+
@doc("The language to translate from (e.g., 'en-US').")
249+
@example("en-US")
250+
from_lang: string;
251+
252+
@doc("The language to translate to (e.g., 'es-ES').")
253+
@example("es-ES")
254+
to_lang: string;
255+
256+
@doc("The direction(s) of the call to translate.")
257+
@example(#["local-caller", "remote-caller"])
258+
direction: TranscribeDirection[];
259+
260+
@doc("The TTS voice for the source language.")
261+
@example("elevenlabs.josh")
262+
from_voice?: string;
263+
264+
@doc("The TTS voice for the target language.")
265+
@example("elevenlabs.josh")
266+
to_voice?: string;
267+
268+
@doc("Translation filter for the source language direction.")
269+
@example("professional")
270+
filter_from?: TranslationFilter;
271+
272+
@doc("Translation filter for the target language direction.")
273+
@example("professional")
274+
filter_to?: TranslationFilter;
275+
276+
@doc("The webhook URL to receive translation events.")
277+
@example("https://example.com/webhook")
278+
webhook?: string;
279+
280+
@doc("Whether to send real-time translation events.")
281+
@example(true)
282+
live_events?: boolean;
283+
284+
@doc("Whether to generate AI summaries in both languages when translation ends.")
285+
@example(true)
286+
ai_summary?: boolean;
287+
288+
@doc("The AI prompt that instructs how to summarize the conversation when `ai_summary` is enabled.")
289+
@example("Summarize this translated conversation.")
290+
ai_summary_prompt?: string;
291+
292+
@doc("The speech recognition engine to use.")
293+
@example("deepgram")
294+
speech_engine?: SpeechEngine = "deepgram";
295+
296+
@doc("Speech timeout in milliseconds.")
297+
@example(60000)
298+
speech_timeout?: int32 = 60000;
299+
300+
@doc("Voice activity detection silence time in milliseconds. Default depends on speech engine: `300` for Deepgram, `500` for Google.")
301+
@example(300)
302+
vad_silence_ms?: int32;
303+
304+
@doc("Voice activity detection threshold (0-1800).")
305+
@example(400)
306+
vad_thresh?: int32 = 400;
307+
308+
@doc("Debug level for logging (0-2).")
309+
@example(0)
310+
debug_level?: int32 = 0;
311+
};
312+
}
313+
314+
@summary("Summarize")
315+
model LiveTranslateSummarizeAction {
316+
@doc("Request an on-demand AI summary of the translated conversation.")
317+
summarize: {
318+
@doc("The webhook URL to receive the summary.")
319+
@example("https://example.com/webhook")
320+
webhook?: string;
321+
322+
@doc("The AI prompt that instructs how to summarize the conversation.")
323+
@example("Summarize the key agreements reached in both languages.")
324+
prompt?: string;
325+
};
326+
}
327+
328+
@summary("Inject")
329+
model LiveTranslateInjectAction {
330+
@doc("Inject a message into the conversation to be translated and spoken.")
331+
inject: {
332+
@doc("The text message to inject and translate.")
333+
@example("Please hold while I transfer you to a specialist.")
334+
message: string;
335+
336+
@doc("The direction to send the translated message.")
337+
@example("remote-caller")
338+
direction: TranscribeDirection;
339+
};
340+
}
341+
342+
@summary("Stop")
343+
@doc("Stops the live translation session.")
344+
enum LiveTranslateStopAction {
345+
stop,
346+
}
347+
348+
alias LiveTranslateAction = LiveTranslateStartAction | LiveTranslateSummarizeAction | LiveTranslateInjectAction | LiveTranslateStopAction;
349+
350+
@summary("Live Translate")
351+
model CallLiveTranslateRequest {
352+
@doc(uuidDescription)
353+
@example(CallIdExample)
354+
id: uuid;
355+
356+
@doc("The `calling.live_translate` command is used to control live translation on an active call.")
357+
@example("calling.live_translate")
358+
command: "calling.live_translate";
359+
360+
@doc(paramsDescription)
361+
params: {
362+
@doc("The translation action to perform: start, stop, summarize, or inject.")
363+
action: LiveTranslateAction;
364+
};
365+
}
366+
118367
@summary("Create call")
119368
model CallCreateRequest {
120369
@doc("The `dial` command is used to create a new call.")
@@ -234,6 +483,10 @@ Call request union for JSON-RPC style method dispatch. Use the `command` field t
234483
- **`calling.ai_unhold`** - Resume an AI call from hold state. Reactivates the AI agent and continues the conversation from where it was paused.
235484
236485
- **`calling.ai_message`** - Inject a message into an active AI conversation. Allows you to dynamically add context, instructions, or system messages to guide the AI agent's behavior during the call.
486+
487+
- **`calling.live_transcribe`** - Control live transcription on an active call. Start real-time speech-to-text transcription, stop transcription, or request an on-demand AI summary of the conversation.
488+
489+
- **`calling.live_translate`** - Control live translation on an active call. Start real-time language translation between call participants, stop translation, request summaries, or inject messages to be translated and spoken.
237490
""")
238491
union CallRequest {
239492
dial: CallCreateRequest,
@@ -242,4 +495,6 @@ union CallRequest {
242495
`calling.ai_hold`: CallHoldRequest,
243496
`calling.ai_unhold`: CallUnholdRequest,
244497
`calling.ai_message`: CallAIMessageRequest,
498+
`calling.live_transcribe`: CallLiveTranscribeRequest,
499+
`calling.live_translate`: CallLiveTranslateRequest,
245500
}

0 commit comments

Comments
 (0)