Skip to content

Commit 9fb0611

Browse files
committed
handle contractions correctly, bump defaults
1 parent 2abe110 commit 9fb0611

File tree

4 files changed

+74
-15
lines changed

4 files changed

+74
-15
lines changed

klite.embd

Lines changed: 61 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ Current version indicated by LITEVER below.
1212
-->
1313

1414
<script id="init-config">
15-
const LITEVER = 273;
15+
const LITEVER = 274;
1616
const urlParams = new URLSearchParams(window.location.search);
1717
var localflag = urlParams.get('local'); //this will be replaced automatically in embedded kcpp
1818
const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_";
@@ -3348,6 +3348,7 @@ Current version indicated by LITEVER below.
33483348
notify_on: false,
33493349
narrate_both_sides: false,
33503350
narrate_only_dialog: false,
3351+
embed_narrations: false,
33513352
voice_end_delay: 300,
33523353
voice_suppress_nonspeech: false,
33533354
voice_langcode: "auto",
@@ -3431,7 +3432,7 @@ Current version indicated by LITEVER below.
34313432
second_ep_url:"",
34323433

34333434
max_context_length: (localflag?4096:3072),
3434-
max_length: (localflag?512:256),
3435+
max_length: (localflag?640:400),
34353436
last_maxctx: 0,
34363437
auto_ctxlen: true,
34373438
auto_genamt: true,
@@ -12941,6 +12942,7 @@ Current version indicated by LITEVER below.
1294112942
document.getElementById("no_escape_html").checked = no_escape_html;
1294212943
document.getElementById("narrate_both_sides").checked = localsettings.narrate_both_sides;
1294312944
document.getElementById("narrate_only_dialog").checked = localsettings.narrate_only_dialog;
12945+
document.getElementById("embed_narrations").checked = localsettings.embed_narrations;
1294412946
document.getElementById("tts_speed").value = localsettings.tts_speed;
1294512947
document.getElementById("voice_end_delay").value = localsettings.voice_end_delay;
1294612948
document.getElementById("voice_suppress_nonspeech").checked = localsettings.voice_suppress_nonspeech;
@@ -13473,6 +13475,7 @@ Current version indicated by LITEVER below.
1347313475
no_escape_html = (document.getElementById("no_escape_html").checked?true:false);
1347413476
localsettings.narrate_both_sides = (document.getElementById("narrate_both_sides").checked?true:false);
1347513477
localsettings.narrate_only_dialog = (document.getElementById("narrate_only_dialog").checked?true:false);
13478+
localsettings.embed_narrations = (document.getElementById("embed_narrations").checked?true:false);
1347613479
localsettings.tts_speed = document.getElementById("tts_speed").value;
1347713480
localsettings.voice_end_delay = document.getElementById("voice_end_delay").value;
1347813481
localsettings.voice_suppress_nonspeech = (document.getElementById("voice_suppress_nonspeech").checked?true:false);
@@ -15448,6 +15451,16 @@ Current version indicated by LITEVER below.
1544815451
a.download = "audio.wav";
1544915452
setTimeout(function(){a.click()},20);
1545015453
}
15454+
function tts_embed_audio(arrayBufferData)
15455+
{
15456+
var file = new Blob([arrayBufferData], { type: 'audio/wav' });
15457+
var reader = new FileReader();
15458+
reader.onload = function() {
15459+
var base64Audio = this.result;
15460+
self_upload_audio(base64Audio, "tts_audio");
15461+
};
15462+
reader.readAsDataURL(file);
15463+
}
1545115464

1545215465
function tts_speak(text, speech_synth_override=null, do_download=false)
1545315466
{
@@ -15486,6 +15499,12 @@ Current version indicated by LITEVER below.
1548615499
}
1548715500
}
1548815501

15502+
let do_embed_tts = false;
15503+
if(!do_download && speech_synth_override==null) //dont use it for tests
15504+
{
15505+
do_embed_tts = localsettings.embed_narrations;
15506+
}
15507+
1548915508
if(ssval==XTTS_ID || ssval==ALLTALK_ID || ssval==OAI_TTS_ID || ssval==KCPP_TTS_ID || ssval==POLLINATIONS_TTS_ID) //xtts api server
1549015509
{
1549115510
let is_xtts = (ssval==XTTS_ID);
@@ -15544,6 +15563,10 @@ Current version indicated by LITEVER below.
1554415563
{
1554515564
tts_download(audiofile_ref);
1554615565
}
15566+
if(do_embed_tts)
15567+
{
15568+
tts_embed_audio(audiofile_ref);
15569+
}
1554715570
const playSound = audioContext.createBufferSource();
1554815571
playSound.buffer = decodedData;
1554915572
playSound.connect(audioContext.destination);
@@ -15586,6 +15609,10 @@ Current version indicated by LITEVER below.
1558615609
{
1558715610
tts_download(audiofile_ref);
1558815611
}
15612+
if(do_embed_tts)
15613+
{
15614+
tts_embed_audio(audiofile_ref);
15615+
}
1558915616
const playSound = audioContext.createBufferSource();
1559015617
playSound.buffer = decodedData;
1559115618
playSound.connect(audioContext.destination);
@@ -15629,6 +15656,10 @@ Current version indicated by LITEVER below.
1562915656
{
1563015657
tts_download(audiofile_ref);
1563115658
}
15659+
if(do_embed_tts)
15660+
{
15661+
tts_embed_audio(audiofile_ref);
15662+
}
1563215663
const playSound = audioContext.createBufferSource();
1563315664
playSound.buffer = decodedData;
1563415665
playSound.connect(audioContext.destination);
@@ -15692,6 +15723,10 @@ Current version indicated by LITEVER below.
1569215723
{
1569315724
tts_download(audiofile_ref);
1569415725
}
15726+
if(do_embed_tts)
15727+
{
15728+
tts_embed_audio(audiofile_ref);
15729+
}
1569515730
playDecodedAllTalkData(decodedData);
1569615731
})
1569715732
.catch((error) => {
@@ -15743,6 +15778,10 @@ Current version indicated by LITEVER below.
1574315778
{
1574415779
tts_download(audiofile_ref);
1574515780
}
15781+
if(do_embed_tts)
15782+
{
15783+
tts_embed_audio(audiofile_ref);
15784+
}
1574615785
playDecodedAllTalkData(decodedData);
1574715786
})
1574815787
.catch((error) => {
@@ -15773,6 +15812,10 @@ Current version indicated by LITEVER below.
1577315812
{
1577415813
tts_download(audiofile_ref);
1577515814
}
15815+
if(do_embed_tts)
15816+
{
15817+
tts_embed_audio(audiofile_ref);
15818+
}
1577615819
playDecodedAllTalkData(decodedData);
1577715820
}).catch((error) => {
1577815821
console.log("AllTalk v1 Speak Error: " + error);
@@ -20297,13 +20340,20 @@ Current version indicated by LITEVER below.
2029720340

2029820341
if(elements && elements.length>0)
2029920342
{
20343+
//check if we are on an instruct boundary
20344+
let onboundary = false;
20345+
if(gametext_arr.length>0 && gametext_arr[gametext_arr.length-1].trim().endsWith("{{[OUTPUT]}}"))
20346+
{
20347+
onboundary = true;
20348+
}
2030020349
elements.forEach(function (element) {
2030120350
let temp_stream = synchro_pending_stream;
20302-
// let codeblockcount = (temp_stream.match(/```/g) || []).length;
20303-
// if(codeblockcount>0 && codeblockcount%2!=0)
20304-
// {
20305-
// temp_stream += "```"; //force end code block
20306-
// }
20351+
if (onboundary) {
20352+
let codeblockcount = (temp_stream.match(/```/g) || []).length;
20353+
if (codeblockcount > 0 && codeblockcount % 2 != 0) {
20354+
temp_stream += "```"; //force end code block
20355+
}
20356+
}
2030720357
let pend = escape_html(pending_context_preinjection) + format_streaming_text(escape_html(temp_stream));
2030820358
element.innerHTML = pend;
2030920359
});
@@ -25331,6 +25381,10 @@ Current version indicated by LITEVER below.
2533125381
<div class="justifyleft settingsmall" title="If unchecked, only speak AI replies, not other text.">Narrate Only Dialog </div>
2533225382
<input title="Narrate Only Dialog" type="checkbox" id="narrate_only_dialog" style="margin:0px 0px 0px auto;">
2533325383
</div>
25384+
<div class="settinglabel">
25385+
<div class="justifyleft settingsmall" title="Try to save and embed TTS narration files into story if possible. Does not work for internal browser TTS. Not recommend due to large size.">Save Narrations </div>
25386+
<input title="Save Narrations" type="checkbox" id="embed_narrations" style="margin:0px 0px 0px auto;">
25387+
</div>
2533425388
<div class="inlinelabel" style="font-size: 11px;">
2533525389
<div class="justifyleft">Browser TTS Speed: </div>
2533625390
<input title="Browser Narration Speed" type="text" inputmode="decimal" value="1" id="tts_speed" style="width:40px">

koboldcpp.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4524,7 +4524,7 @@ def hide_tooltip(event):
45244524
chatcompletionsadapter_var = ctk.StringVar(value="AutoGuess")
45254525
moeexperts_var = ctk.StringVar(value=str(-1))
45264526
moecpu_var = ctk.StringVar(value=str(0))
4527-
defaultgenamt_var = ctk.StringVar(value=str(512))
4527+
defaultgenamt_var = ctk.StringVar(value=str(640))
45284528
nobostoken_var = ctk.IntVar(value=0)
45294529
override_kv_var = ctk.StringVar(value="")
45304530
override_tensors_var = ctk.StringVar(value="")
@@ -5512,7 +5512,7 @@ def export_vars():
55125512
args.overridenativecontext = 0
55135513
args.moeexperts = int(moeexperts_var.get()) if moeexperts_var.get()!="" else -1
55145514
args.moecpu = int(moecpu_var.get()) if moecpu_var.get()!="" else 0
5515-
args.defaultgenamt = int(defaultgenamt_var.get()) if defaultgenamt_var.get()!="" else 512
5515+
args.defaultgenamt = int(defaultgenamt_var.get()) if defaultgenamt_var.get()!="" else 640
55165516
args.nobostoken = (nobostoken_var.get()==1)
55175517
args.enableguidance = (enableguidance_var.get()==1)
55185518
args.overridekv = None if override_kv_var.get() == "" else override_kv_var.get()
@@ -7573,7 +7573,7 @@ def range_checker(arg: str):
75737573
advparser.add_argument("--nomodel", help="Allows you to launch the GUI alone, without selecting any model.", action='store_true')
75747574
advparser.add_argument("--moeexperts", metavar=('[num of experts]'), help="How many experts to use for MoE models (default=follow gguf)", type=int, default=-1)
75757575
advparser.add_argument("--moecpu", metavar=('[layers affected]'), help="Keep the Mixture of Experts (MoE) weights of the first N layers in the CPU. If no value is provided, applies to all layers.", nargs='?', const=999, type=int, default=0)
7576-
advparser.add_argument("--defaultgenamt", help="How many tokens to generate by default, if not specified. Must be smaller than context size. Usually, your frontend GUI will override this.", type=check_range(int,64,8192), default=512)
7576+
advparser.add_argument("--defaultgenamt", help="How many tokens to generate by default, if not specified. Must be smaller than context size. Usually, your frontend GUI will override this.", type=check_range(int,64,8192), default=640)
75777577
advparser.add_argument("--nobostoken", help="Prevents BOS token from being added at the start of any prompt. Usually NOT recommended for most models.", action='store_true')
75787578
advparser.add_argument("--enableguidance", help="Enables the use of Classifier-Free-Guidance, which allows the use of negative prompts. Has performance and memory impact.", action='store_true')
75797579
advparser.add_argument("--maxrequestsize", metavar=('[size in MB]'), help="Specify a max request payload size. Any requests to the server larger than this size will be dropped. Do not change if unsure.", type=int, default=32)

otherarch/ttscpp/src/kokoro_model.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1425,8 +1425,9 @@ int kokoro_runner::generate(std::string prompt, struct tts_response * response,
14251425
prompt = replace_any(prompt, ",;:", "--");
14261426
prompt = replace_any(prompt, "\n", " ");
14271427
kokoro_str_replace_all(prompt," - "," -- ");
1428+
kokoro_str_replace_all(prompt,"he's ","he is ");
14281429
kokoro_str_replace_all(prompt,"'s ","s ");
1429-
kokoro_str_replace_all(prompt,"wasn't ","wasnt ");
1430+
kokoro_str_replace_all(prompt,"n't ","nt ");
14301431
std::string phonemized_prompt = phmzr->text_to_phonemes(prompt);
14311432
// printf("\nRESULT: %s\n",phonemized_prompt.c_str());
14321433

otherarch/ttscpp/src/phonemizer.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -851,11 +851,15 @@ bool phonemizer::process_word(corpus* text, std::string* output, std::string wor
851851
size_t unaccented_size_difference = 0;
852852

853853
std::string foundstr = found_word_to_ipa(word);
854-
if(foundstr!="")
854+
if(foundstr!="") //do not use if its part of a contracted word
855855
{
856-
output->append(foundstr);
857-
text->size_pop(word.size());
858-
return true;
856+
std::string detected = text->next(word.size()+1);
857+
if(detected.back()!='\'')
858+
{
859+
output->append(foundstr);
860+
text->size_pop(word.size());
861+
return true;
862+
}
859863
}
860864

861865
if (has_accent) {

0 commit comments

Comments
 (0)