@@ -502,6 +502,7 @@ static int tts_max_len = 4096;
502502static bool is_ttscpp_file = false ;
503503static generation_configuration * ttscpp_config = nullptr ;
504504static struct tts_runner * ttscpp_runner = nullptr ;
505+ static std::string detectedarch = " " ;
505506
506507int total_tts_gens = 0 ;
507508static std::string tts_executable_path = " " ;
@@ -540,7 +541,7 @@ bool ttstype_load_model(const tts_load_model_inputs inputs)
540541
541542 std::string modelfile_ttc = inputs.ttc_model_filename ;
542543 std::string modelfile_cts = inputs.cts_model_filename ;
543- std::string detectedarch = gguf_get_model_arch (modelfile_ttc);
544+ detectedarch = gguf_get_model_arch (modelfile_ttc);
544545
545546 is_ttscpp_file = false ;
546547 if (detectedarch!=" " && SUPPORTED_ARCHITECTURES.find (detectedarch) != SUPPORTED_ARCHITECTURES.end ()) {
@@ -663,24 +664,34 @@ static tts_generation_outputs ttstype_generate_ttscpp(const tts_generation_input
663664 std::string prompt = inputs.prompt ;
664665 double ttstime = 0 ;
665666 timer_start ();
666- switch (speaker_seed)
667+
668+ std::vector<std::string> vmapper = {};
669+ std::vector<std::string> vpermitted = {};
670+
671+ if (detectedarch==" kokoro" )
667672 {
668- case 1 :
669- voiceused = " am_echo" ;
670- break ;
671- case 2 :
672- voiceused = " af_alloy" ;
673- break ;
674- case 3 :
675- voiceused = " af_jessica" ;
676- break ;
677- case 4 :
678- voiceused = " bm_daniel" ;
679- break ;
680- case 5 :
681- voiceused = " bf_isabella" ;
682- break ;
673+ vmapper = {" am_echo" ," af_heart" ," af_alloy" ," bm_daniel" ," bf_isabella" };
674+ vpermitted = {" af_alloy" , " af_aoede" , " af_bella" , " af_heart" , " af_jessica" , " af_kore" , " af_nicole" , " af_nova" , " af_river" , " af_sarah" , " af_sky" , " am_adam" , " am_echo" , " am_eric" , " am_fenrir" , " am_liam" , " am_michael" , " am_onyx" , " am_puck" , " am_santa" , " bf_alice" , " bf_emma" , " bf_isabella" , " bf_lily" , " bm_daniel" , " bm_fable" , " bm_george" , " bm_lewis" };
675+ }
676+ else if (detectedarch==" dia" )
677+ {
678+ vmapper = {" zoe" , " zac" , " jess" , " leo" , " mia" };
679+ vpermitted = {" zoe" , " zac" ," jess" , " leo" , " mia" , " julia" , " leah" };
680+ }
681+
682+ if (speaker_seed>=1 && speaker_seed<=5 && vmapper.size ()>=5 )
683+ {
684+ voiceused = vmapper[speaker_seed-1 ];
683685 }
686+ else if (vpermitted.size ()>0 )
687+ {
688+ // if we can match the voice, use it
689+ const std::string cspeaker = inputs.custom_speaker_voice ;
690+ if (std::find (vpermitted.begin (), vpermitted.end (), cspeaker) != vpermitted.end ()) {
691+ voiceused = cspeaker;
692+ }
693+ }
694+
684695 if (ttsdebugmode==1 && !tts_is_quiet)
685696 {
686697 printf (" \n Using Speaker ID: %d, Voice: %s" , speaker_seed, voiceused.c_str ());
0 commit comments