Skip to content

Commit 2abe110

Browse files
committed
custom voice handling
1 parent 67ef5e6 commit 2abe110

File tree

3 files changed

+31
-17
lines changed

3 files changed

+31
-17
lines changed

expose.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@ struct tts_generation_inputs
247247
const char * prompt = nullptr;
248248
const int speaker_seed = 0;
249249
const int audio_seed = 0;
250+
const char * custom_speaker_voice = "";
250251
const char * custom_speaker_text = "";
251252
const char * custom_speaker_data = "";
252253
};

koboldcpp.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,7 @@ class tts_generation_inputs(ctypes.Structure):
352352
_fields_ = [("prompt", ctypes.c_char_p),
353353
("speaker_seed", ctypes.c_int),
354354
("audio_seed", ctypes.c_int),
355+
("custom_speaker_voice", ctypes.c_char_p),
355356
("custom_speaker_text", ctypes.c_char_p),
356357
("custom_speaker_data", ctypes.c_char_p)]
357358

@@ -1880,6 +1881,7 @@ def tts_generate(genparams):
18801881
else:
18811882
voice = simple_lcg_hash(voicestr.strip()) if voicestr else 1
18821883
inputs = tts_generation_inputs()
1884+
inputs.custom_speaker_voice = normalized_voice.encode("UTF-8")
18831885
inputs.prompt = prompt.encode("UTF-8")
18841886
inputs.speaker_seed = voice
18851887
aseed = -1

otherarch/tts_adapter.cpp

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -502,6 +502,7 @@ static int tts_max_len = 4096;
502502
static bool is_ttscpp_file = false;
503503
static generation_configuration * ttscpp_config = nullptr;
504504
static struct tts_runner * ttscpp_runner = nullptr;
505+
static std::string detectedarch = "";
505506

506507
int total_tts_gens = 0;
507508
static std::string tts_executable_path = "";
@@ -540,7 +541,7 @@ bool ttstype_load_model(const tts_load_model_inputs inputs)
540541

541542
std::string modelfile_ttc = inputs.ttc_model_filename;
542543
std::string modelfile_cts = inputs.cts_model_filename;
543-
std::string detectedarch = gguf_get_model_arch(modelfile_ttc);
544+
detectedarch = gguf_get_model_arch(modelfile_ttc);
544545

545546
is_ttscpp_file = false;
546547
if (detectedarch!="" && SUPPORTED_ARCHITECTURES.find(detectedarch) != SUPPORTED_ARCHITECTURES.end()) {
@@ -663,24 +664,34 @@ static tts_generation_outputs ttstype_generate_ttscpp(const tts_generation_input
663664
std::string prompt = inputs.prompt;
664665
double ttstime = 0;
665666
timer_start();
666-
switch(speaker_seed)
667+
668+
std::vector<std::string> vmapper = {};
669+
std::vector<std::string> vpermitted = {};
670+
671+
if(detectedarch=="kokoro")
667672
{
668-
case 1:
669-
voiceused = "am_echo";
670-
break;
671-
case 2:
672-
voiceused = "af_alloy";
673-
break;
674-
case 3:
675-
voiceused = "af_jessica";
676-
break;
677-
case 4:
678-
voiceused = "bm_daniel";
679-
break;
680-
case 5:
681-
voiceused = "bf_isabella";
682-
break;
673+
vmapper = {"am_echo","af_heart","af_alloy","bm_daniel","bf_isabella"};
674+
vpermitted = {"af_alloy", "af_aoede", "af_bella", "af_heart", "af_jessica", "af_kore", "af_nicole", "af_nova", "af_river", "af_sarah", "af_sky", "am_adam", "am_echo", "am_eric", "am_fenrir", "am_liam", "am_michael", "am_onyx", "am_puck", "am_santa", "bf_alice", "bf_emma", "bf_isabella", "bf_lily", "bm_daniel", "bm_fable", "bm_george", "bm_lewis"};
675+
}
676+
else if(detectedarch=="dia")
677+
{
678+
vmapper = {"zoe", "zac", "jess", "leo", "mia"};
679+
vpermitted = {"zoe", "zac","jess", "leo", "mia", "julia", "leah"};
680+
}
681+
682+
if(speaker_seed>=1 && speaker_seed<=5 && vmapper.size()>=5)
683+
{
684+
voiceused = vmapper[speaker_seed-1];
683685
}
686+
else if(vpermitted.size()>0)
687+
{
688+
//if we can match the voice, use it
689+
const std::string cspeaker = inputs.custom_speaker_voice;
690+
if (std::find(vpermitted.begin(), vpermitted.end(), cspeaker) != vpermitted.end()) {
691+
voiceused = cspeaker;
692+
}
693+
}
694+
684695
if(ttsdebugmode==1 && !tts_is_quiet)
685696
{
686697
printf("\nUsing Speaker ID: %d, Voice: %s", speaker_seed, voiceused.c_str());

0 commit comments

Comments
 (0)