@@ -16,11 +16,27 @@ pub(crate) struct TextToSpeechArgs {
1616 /// Output file (optional)
1717 #[ arg( long, short = 'o' ) ]
1818 output : Option < String > ,
19+
20+ /// Language code (optional)
21+ #[ arg( long) ]
22+ language_code : Option < String > ,
23+
24+ /// Output format (optional)
25+ #[ arg( long) ]
26+ output_format : Option < String > ,
27+ }
28+
29+ fn default_output_format ( provider : & Provider ) -> Option < String > {
30+ match provider {
31+ Provider :: DeepInfra => Some ( "mp3" . to_string ( ) ) ,
32+ _ => None ,
33+ }
1934}
2035
2136fn default_voice ( provider : & Provider ) -> Option < String > {
2237 match provider {
2338 Provider :: OpenAI => Some ( "alloy" . to_string ( ) ) ,
39+ Provider :: Google => Some ( "en-US-Studio-Q" . to_string ( ) ) ,
2440 _ => None ,
2541 }
2642}
@@ -34,13 +50,27 @@ fn default_model(provider: &Provider, task: &Task) -> Option<String> {
3450 }
3551}
3652
53+ fn default_language_code ( provider : & Provider ) -> Option < String > {
54+ match provider {
55+ Provider :: Google => Some ( "en-US" . to_string ( ) ) ,
56+ _ => None ,
57+ }
58+ }
3759pub ( crate ) async fn tts ( args : & TextToSpeechArgs , key : & transformrs:: Key , input : & str ) {
3860 let provider = key. provider . clone ( ) ;
3961 let config = transformrs:: text_to_speech:: TTSConfig {
4062 voice : args. voice . clone ( ) . or_else ( || default_voice ( & provider) ) ,
41- output_format : Some ( "mp3" . to_string ( ) ) ,
63+ output_format : args
64+ . output_format
65+ . clone ( )
66+ . or_else ( || default_output_format ( & provider) ) ,
67+ language_code : args
68+ . language_code
69+ . clone ( )
70+ . or_else ( || default_language_code ( & provider) ) ,
4271 ..Default :: default ( )
4372 } ;
73+ println ! ( "Config: {:?}" , config) ;
4474 let model = args
4575 . model
4676 . clone ( )
0 commit comments