Skip to content

Commit 75c919c

Browse files
committed
can't resolve the clicking
1 parent ff1d179 commit 75c919c

File tree

5 files changed

+99
-14
lines changed

5 files changed

+99
-14
lines changed

kokoro_ipa.embd

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19951,6 +19951,7 @@ excursions,ɪkskˈɜɹʒənz
1995119951
excusable,ɪkskjˈuzəbᵊl
1995219952
excused,ɪkskjˈuzd
1995319953
excusing,ɪkskjˈuzɪŋ
19954+
excuses,ɪkskjˈuzᵻz
1995419955
exec,ɛɡzˈɛk
1995519956
execrable,ˈɛksəkɹəbᵊl
1995619957
execration,ˌɛksəkɹˈAʃən
@@ -39067,16 +39068,16 @@ organelles,ˌɔɹɡənˈɛlz
3906739068
organically,ɔɹɡˈænəkᵊli
3906839069
organic,ɔɹɡˈænɪk
3906939070
organics,ɔɹɡˈænɪks
39070-
organisationally,ˌɔɹɡənəzˈAʃənᵊli
39071-
organisational,ˌɔɹɡənəzˈAʃənᵊl
39072-
organisation,ˌɔɹɡənəzˈAʃən
39073-
organisations,ˌɔɹɡənəzˈAʃənz
39074-
organised,ˈɔɹɡənˌIzd
39075-
organise,ˈɔɹɡənˌIz
39076-
organiser,ˈɔɹɡənˌIzəɹ
39077-
organisers,ˈɔɹɡənˌIzəɹz
39078-
organises,ˈɔɹɡənˌIzᵻz
39079-
organising,ˈɔɹɡənˌIzɪŋ
39071+
organizationally,ˌɔɹɡənəzˈAʃənᵊli
39072+
organizational,ˌɔɹɡənəzˈAʃənᵊl
39073+
organization,ˌɔɹɡənəzˈAʃən
39074+
organizations,ˌɔɹɡənəzˈAʃənz
39075+
organized,ˈɔɹɡənˌIzd
39076+
organize,ˈɔɹɡənˌIz
39077+
organizer,ˈɔɹɡənˌIzəɹ
39078+
organizers,ˈɔɹɡənˌIzəɹz
39079+
organizes,ˈɔɹɡənˌIzᵻz
39080+
organizing,ˈɔɹɡənˌIzɪŋ
3908039081
organism,ˈɔɹɡənˌɪzəm
3908139082
organisms,ˈɔɹɡənˌɪzəmz
3908239083
organist,ˈɔɹɡənɪst

otherarch/tts_adapter.cpp

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,85 @@ struct wav_header {
6464
uint32_t data_size;
6565
};
6666

67+
// #include <vector>
68+
// #include <cstdio>
69+
// #include <cmath>
70+
71+
// static void audio_post_clean(std::vector<float>& data) { // detect clicks
72+
// const float silenceThreshold = 1e-5f;
73+
// const float noiseThreshold = 1e-3f;
74+
// const size_t minSilence = 100; // samples
75+
// const size_t noiseSpan = 150; // samples
76+
// const size_t minSilence2 = 100; // samples
77+
78+
// size_t len = data.size();
79+
80+
// int silencecounterA = 0;
81+
// int noisecounterA = 0;
82+
// int silencecounterB = 0;
83+
// int state = 0; // 0 = finding first silence, 1 = measuring noise, 2 = finding second silence
84+
85+
// size_t noiseStart = 0;
86+
87+
// for (size_t i = 0; i < len; ++i) {
88+
// float sample = std::fabs(data[i]);
89+
90+
// if (state == 0) { // finding first silence
91+
// if (sample < silenceThreshold) {
92+
// silencecounterA++;
93+
// } else {
94+
// if (silencecounterA >= minSilence) {
95+
// state = 1;
96+
// noisecounterA = 1;
97+
// noiseStart = i;
98+
// } else {
99+
// silencecounterA = 0;
100+
// noisecounterA = 0;
101+
// silencecounterB = 0;
102+
// }
103+
// }
104+
// }
105+
// if (state == 1) { // measuring noise span
106+
// noisecounterA++;
107+
// if(sample>noiseThreshold)
108+
// {
109+
// state = 0;
110+
// silencecounterA = 0;
111+
// noisecounterA = 0;
112+
// silencecounterB = 0;
113+
// }
114+
// else if(noisecounterA>noiseSpan)
115+
// {
116+
// state = 2;
117+
// }
118+
// }
119+
// if (state == 2) { // finding second silence
120+
// if (sample < silenceThreshold) {
121+
// silencecounterB++;
122+
// if (silencecounterB >= minSilence2) {
123+
// // full click detected
124+
// size_t noiseend = noiseStart + noisecounterA - 1;
125+
// //printf("Click detected from %zu to %zu\n", noiseStart, noiseend);
126+
// for(size_t j=noiseStart;j<noiseend;++j)
127+
// {
128+
// data[j] *= 0.01f; //greatly suppress noise
129+
// }
130+
// // reset to search again
131+
// state = 0;
132+
// silencecounterA = 0;
133+
// noisecounterA = 0;
134+
// silencecounterB = 0;
135+
// }
136+
// } else {
137+
// state = 0;
138+
// silencecounterA = 0;
139+
// noisecounterA = 0;
140+
// silencecounterB = 0;
141+
// }
142+
// }
143+
// }
144+
// }
145+
67146
static std::string save_wav16_base64(const std::vector<float> &data, int sample_rate) {
68147
std::ostringstream oss;
69148
wav_header header;
@@ -740,6 +819,7 @@ static tts_generation_outputs ttstype_generate_ttscpp(const tts_generation_input
740819
ttstime = timer_check();
741820
printf("\nTTS Generated audio in %.2fs.\n",ttstime);
742821
std::vector<float> wavdat = std::vector(response_data.data, response_data.data + response_data.n_outputs);
822+
//audio_post_clean(wavdat);
743823
last_generated_audio = save_wav16_base64(wavdat, ttscpp_runner->sampling_rate);
744824
output.data = last_generated_audio.c_str();
745825
output.status = 1;

otherarch/ttscpp/include/phonemizer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ static const std::map<const char, std::string> LETTER_PHONEMES = {
128128
{'d', "dˈiː"},
129129
{'e', "ˈiː"},
130130
{'f', "ˈɛf"},
131-
{'j', "dʒˈeɪ"},
131+
{'g', "dʒˈi"},
132132
{'h', "ˈeɪtʃ"},
133133
{'i', "ˈaɪ"},
134134
{'j', "dʒˈeɪ"},

otherarch/ttscpp/src/kokoro_model.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1426,11 +1426,13 @@ int kokoro_runner::generate(std::string prompt, struct tts_response * response,
14261426
prompt = replace_any(prompt, ";:", "--");
14271427
prompt = replace_any(prompt, "\n", "--");
14281428
kokoro_str_replace_all(prompt,"","'");
1429+
kokoro_str_replace_all(prompt,"Mr. ","Mister ");
14291430
prompt = std::regex_replace(prompt, std::regex("(\\w)([.!?]) "), "$1$2, ");
14301431
kokoro_str_replace_all(prompt," - "," -- ");
14311432
kokoro_str_replace_all(prompt,"he's ","he is ");
14321433
kokoro_str_replace_all(prompt,"'s ","s ");
14331434
kokoro_str_replace_all(prompt,"n't ","nt ");
1435+
kokoro_str_replace_all(prompt,"*"," ");
14341436
std::string phonemized_prompt = phmzr->text_to_phonemes(prompt);
14351437
// printf("\nRESULT: %s\n",phonemized_prompt.c_str());
14361438

otherarch/ttscpp/src/phonemizer.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -893,9 +893,11 @@ bool phonemizer::process_word(corpus* text, std::string* output, std::string wor
893893
text->size_pop(word.size()+unaccented_size_difference);
894894
return true;
895895
}
896-
} else if (can_be_roman_numeral(word) && is_all_upper(word) && small_english_words.find(to_lower(word)) == small_english_words.end() && handle_roman_numeral(text, output, flags)) {
897-
return true;
898-
} else if (is_acronym_like(text, word, flags)) {
896+
}
897+
// else if (can_be_roman_numeral(word) && is_all_upper(word) && small_english_words.find(to_lower(word)) == small_english_words.end() && handle_roman_numeral(text, output, flags)) {
898+
// return true;
899+
// }
900+
else if (is_acronym_like(text, word, flags)) {
899901
return handle_acronym(text, word, output, flags);
900902
} else if (word.find(".") < word.length()) {
901903
bool part_has_accent = false;

0 commit comments

Comments
 (0)