@@ -6,8 +6,10 @@ use serde::Deserialize;
66use super :: AssemblyAIAdapter ;
77use crate :: adapter:: RealtimeSttAdapter ;
88
9+ // https://www.assemblyai.com/docs/api-reference/streaming-api/streaming-api.md
910impl RealtimeSttAdapter for AssemblyAIAdapter {
1011 fn supports_native_multichannel ( & self ) -> bool {
12+ // https://www.assemblyai.com/docs/universal-streaming/multichannel-streams.md
1113 false
1214 }
1315
@@ -22,38 +24,25 @@ impl RealtimeSttAdapter for AssemblyAIAdapter {
2224 query_pairs. append_pair ( "encoding" , "pcm_s16le" ) ;
2325 query_pairs. append_pair ( "format_turns" , "true" ) ;
2426
25- // Compute final speech_model and language_detection values
2627 let model = params
2728 . model
2829 . as_deref ( )
2930 . unwrap_or ( "universal-streaming-english" ) ;
30- let mut speech_model_final = match model {
31- "multilingual" | "universal-streaming-multilingual" => {
32- "universal-streaming-multilingual"
33- }
34- _ => "universal-streaming-english" ,
35- } ;
36- let mut language_detection = false ;
37-
38- if !params. languages . is_empty ( ) {
39- if params. languages . len ( ) > 1
40- || speech_model_final == "universal-streaming-multilingual"
41- {
42- language_detection = true ;
43- } else if let Some ( lang) = params. languages . first ( ) {
44- let code = lang. iso639 ( ) . code ( ) ;
45- if code != "en" {
46- speech_model_final = "universal-streaming-multilingual" ;
47- language_detection = true ;
48- }
49- }
50- }
5131
52- query_pairs. append_pair ( "speech_model" , speech_model_final) ;
32+ let ( speech_model, language, language_detection) =
33+ Self :: resolve_language_config ( model, params) ;
34+
35+ query_pairs. append_pair ( "speech_model" , speech_model) ;
36+ query_pairs. append_pair ( "language" , language) ;
5337 if language_detection {
5438 query_pairs. append_pair ( "language_detection" , "true" ) ;
5539 }
5640
41+ if let Some ( redemption_time) = params. redemption_time_ms {
42+ let max_silence = redemption_time. to_string ( ) ;
43+ query_pairs. append_pair ( "max_turn_silence" , & max_silence) ;
44+ }
45+
5746 if !params. keywords . is_empty ( ) {
5847 let keyterms_json = serde_json:: to_string ( & params. keywords ) . unwrap_or_default ( ) ;
5948 query_pairs. append_pair ( "keyterms_prompt" , & keyterms_json) ;
@@ -64,8 +53,7 @@ impl RealtimeSttAdapter for AssemblyAIAdapter {
6453 }
6554
6655 fn build_auth_header ( & self , api_key : Option < & str > ) -> Option < ( & ' static str , String ) > {
67- // AssemblyAI accepts the API key directly in the Authorization header (no Bearer prefix)
68- api_key. map ( |key| ( "authorization" , key. to_string ( ) ) )
56+ api_key. map ( |key| ( "Authorization" , key. to_string ( ) ) )
6957 }
7058
7159 fn keep_alive_message ( & self ) -> Option < Message > {
@@ -86,8 +74,8 @@ impl RealtimeSttAdapter for AssemblyAIAdapter {
8674 } ;
8775
8876 match msg {
89- AssemblyAIMessage :: Begin { .. } => {
90- tracing:: debug!( "assemblyai_session_began" ) ;
77+ AssemblyAIMessage :: Begin { id , expires_at } => {
78+ tracing:: debug!( session_id = %id , expires_at = %expires_at , "assemblyai_session_began" ) ;
9179 vec ! [ ]
9280 }
9381 AssemblyAIMessage :: Turn ( turn) => Self :: parse_turn ( turn) ,
@@ -107,6 +95,10 @@ impl RealtimeSttAdapter for AssemblyAIAdapter {
10795 channels: 1 ,
10896 } ]
10997 }
98+ AssemblyAIMessage :: Error { error } => {
99+ tracing:: error!( error = %error, "assemblyai_error" ) ;
100+ vec ! [ ]
101+ }
110102 AssemblyAIMessage :: Unknown => {
111103 tracing:: debug!( raw = raw, "assemblyai_unknown_message" ) ;
112104 vec ! [ ]
@@ -119,16 +111,17 @@ impl RealtimeSttAdapter for AssemblyAIAdapter {
119111#[ serde( tag = "type" ) ]
120112enum AssemblyAIMessage {
121113 Begin {
122- #[ allow( dead_code) ]
123114 id : String ,
124- #[ allow( dead_code) ]
125115 expires_at : u64 ,
126116 } ,
127117 Turn ( TurnMessage ) ,
128118 Termination {
129119 audio_duration_seconds : u64 ,
130120 session_duration_seconds : u64 ,
131121 } ,
122+ Error {
123+ error : String ,
124+ } ,
132125 #[ serde( other) ]
133126 Unknown ,
134127}
@@ -172,6 +165,28 @@ struct AssemblyAIWord {
172165}
173166
174167impl AssemblyAIAdapter {
168+ fn resolve_language_config (
169+ model : & str ,
170+ params : & ListenParams ,
171+ ) -> ( & ' static str , & ' static str , bool ) {
172+ let is_multilingual_model =
173+ matches ! ( model, "multilingual" | "universal-streaming-multilingual" ) ;
174+
175+ let needs_multilingual = is_multilingual_model
176+ || params. languages . len ( ) > 1
177+ || params
178+ . languages
179+ . first ( )
180+ . map ( |l| l. iso639 ( ) . code ( ) != "en" )
181+ . unwrap_or ( false ) ;
182+
183+ if needs_multilingual {
184+ ( "universal-streaming-multilingual" , "multi" , true )
185+ } else {
186+ ( "universal-streaming-english" , "en" , false )
187+ }
188+ }
189+
175190 fn parse_turn ( turn : TurnMessage ) -> Vec < StreamResponse > {
176191 tracing:: debug!(
177192 transcript = %turn. transcript,
@@ -217,8 +232,26 @@ impl AssemblyAIAdapter {
217232
218233 let transcript = if turn. turn_is_formatted {
219234 turn. transcript . clone ( )
235+ } else if let Some ( ref utt) = turn. utterance {
236+ if !utt. is_empty ( ) {
237+ utt. clone ( )
238+ } else if !turn. transcript . is_empty ( ) {
239+ turn. transcript . clone ( )
240+ } else {
241+ words
242+ . iter ( )
243+ . map ( |w| w. word . as_str ( ) )
244+ . collect :: < Vec < _ > > ( )
245+ . join ( " " )
246+ }
247+ } else if !turn. transcript . is_empty ( ) {
248+ turn. transcript . clone ( )
220249 } else {
221- turn. utterance . clone ( ) . unwrap_or ( turn. transcript . clone ( ) )
250+ words
251+ . iter ( )
252+ . map ( |w| w. word . as_str ( ) )
253+ . collect :: < Vec < _ > > ( )
254+ . join ( " " )
222255 } ;
223256
224257 let channel = Channel {
@@ -249,11 +282,9 @@ mod tests {
249282 use hypr_audio_utils:: AudioFormatExt ;
250283
251284 use super :: AssemblyAIAdapter ;
252- use crate :: live:: { FinalizeHandle , ListenClientInput } ;
285+ use crate :: live:: ListenClientInput ;
253286 use crate :: ListenClient ;
254287
255- // Integration test that makes real network calls to AssemblyAI.
256- // Run explicitly with: cargo test -p owhisper-client test_client -- --ignored
257288 #[ tokio:: test]
258289 #[ ignore]
259290 async fn test_client ( ) {
@@ -283,42 +314,22 @@ mod tests {
283314 } )
284315 . build_single ( ) ;
285316
286- let ( stream, handle ) = client. from_realtime_audio ( input) . await . unwrap ( ) ;
317+ let ( stream, _handle ) = client. from_realtime_audio ( input) . await . unwrap ( ) ;
287318 futures_util:: pin_mut!( stream) ;
288319
289- let mut saw_transcript = false ;
290320 while let Some ( result) = stream. next ( ) . await {
291321 match result {
292322 Ok ( response) => match response {
293323 owhisper_interface:: stream:: StreamResponse :: TranscriptResponse {
294324 channel,
295- speech_final,
296325 ..
297326 } => {
298- let transcript = & channel. alternatives . first ( ) . unwrap ( ) . transcript ;
299- println ! (
300- "Transcript (speech_final={}): {:?}" ,
301- speech_final, transcript
302- ) ;
303- if !transcript. is_empty ( ) {
304- saw_transcript = true ;
305- break ;
306- }
327+ println ! ( "{:?}" , channel. alternatives. first( ) . unwrap( ) . transcript) ;
307328 }
308329 _ => { }
309330 } ,
310- Err ( e) => {
311- println ! ( "Error: {:?}" , e) ;
312- break ;
313- }
331+ _ => { }
314332 }
315333 }
316-
317- handle. finalize ( ) . await ;
318-
319- assert ! (
320- saw_transcript,
321- "expected at least one non-empty transcript from AssemblyAI"
322- ) ;
323334 }
324335}
0 commit comments