11use std:: path:: { Path , PathBuf } ;
22
3- use hypr_audio_utils:: { Source , f32_to_i16_bytes, resample_audio, source_from_path} ;
43use owhisper_interface:: ListenParams ;
54use owhisper_interface:: batch:: Response as BatchResponse ;
65
@@ -26,28 +25,38 @@ impl BatchSttAdapter for DeepgramAdapter {
2625 }
2726}
2827
28+ fn mime_type_from_extension ( path : & Path ) -> & ' static str {
29+ match path. extension ( ) . and_then ( |e| e. to_str ( ) ) {
30+ Some ( "mp3" ) => "audio/mpeg" ,
31+ Some ( "mp4" ) => "audio/mp4" ,
32+ Some ( "m4a" ) => "audio/mp4" ,
33+ Some ( "wav" ) => "audio/wav" ,
34+ Some ( "webm" ) => "audio/webm" ,
35+ Some ( "ogg" ) => "audio/ogg" ,
36+ Some ( "flac" ) => "audio/flac" ,
37+ _ => "application/octet-stream" ,
38+ }
39+ }
40+
2941async fn do_transcribe_file (
3042 client : & ClientWithMiddleware ,
3143 api_base : & str ,
3244 api_key : & str ,
3345 params : & ListenParams ,
3446 file_path : PathBuf ,
3547) -> Result < BatchResponse , Error > {
36- let ( audio_data, sample_rate) = decode_audio_to_linear16 ( file_path) . await ?;
48+ let audio_data = tokio:: fs:: read ( & file_path)
49+ . await
50+ . map_err ( |e| Error :: AudioProcessing ( format ! ( "failed to read file: {}" , e) ) ) ?;
3751
38- let url = {
39- let mut url = build_batch_url (
40- api_base,
41- params,
42- & DeepgramLanguageStrategy ,
43- & DeepgramKeywordStrategy ,
44- ) ;
45- url. query_pairs_mut ( )
46- . append_pair ( "sample_rate" , & sample_rate. to_string ( ) ) ;
47- url
48- } ;
52+ let content_type = mime_type_from_extension ( & file_path) ;
4953
50- let content_type = format ! ( "audio/raw;encoding=linear16;rate={}" , sample_rate) ;
54+ let url = build_batch_url (
55+ api_base,
56+ params,
57+ & DeepgramLanguageStrategy ,
58+ & DeepgramKeywordStrategy ,
59+ ) ;
5160
5261 let response = client
5362 . post ( url)
@@ -69,45 +78,6 @@ async fn do_transcribe_file(
6978 }
7079}
7180
72- async fn decode_audio_to_linear16 ( path : PathBuf ) -> Result < ( bytes:: Bytes , u32 ) , Error > {
73- tokio:: task:: spawn_blocking ( move || -> Result < ( bytes:: Bytes , u32 ) , Error > {
74- let decoder =
75- source_from_path ( & path) . map_err ( |err| Error :: AudioProcessing ( err. to_string ( ) ) ) ?;
76-
77- let channels = decoder. channels ( ) . max ( 1 ) ;
78- let sample_rate = decoder. sample_rate ( ) ;
79-
80- let samples = resample_audio ( decoder, sample_rate)
81- . map_err ( |err| Error :: AudioProcessing ( err. to_string ( ) ) ) ?;
82-
83- let samples = if channels == 1 {
84- samples
85- } else {
86- let channels_usize = channels as usize ;
87- let mut mono = Vec :: with_capacity ( samples. len ( ) / channels_usize) ;
88- for frame in samples. chunks ( channels_usize) {
89- if frame. is_empty ( ) {
90- continue ;
91- }
92- let sum: f32 = frame. iter ( ) . copied ( ) . sum ( ) ;
93- mono. push ( sum / frame. len ( ) as f32 ) ;
94- }
95- mono
96- } ;
97-
98- if samples. is_empty ( ) {
99- return Err ( Error :: AudioProcessing (
100- "audio file contains no samples" . to_string ( ) ,
101- ) ) ;
102- }
103-
104- let bytes = f32_to_i16_bytes ( samples. into_iter ( ) ) ;
105-
106- Ok ( ( bytes, sample_rate) )
107- } )
108- . await ?
109- }
110-
11181#[ cfg( test) ]
11282mod tests {
11383 use super :: * ;
0 commit comments