Skip to content

Commit c2802af

Browse files
committed
fix qwen3, fixed sd, fixed glm4
1 parent 4d8a7a6 commit c2802af

File tree

7 files changed

+99
-24
lines changed

7 files changed

+99
-24
lines changed

gpttype_adapter.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1915,6 +1915,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
19151915
kcpp_data->n_ctx = clamped_max_context_length;
19161916
max_context_limit_at_load = clamped_max_context_length;
19171917
add_bos_token = !inputs.no_bos_token;
1918+
19181919
if(!add_bos_token)
19191920
{
19201921
printf("\n======\nBOS token prefix was disabled! Your output may be degraded unless model was designed for it!\n======\n");
@@ -2368,6 +2369,14 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
23682369
}
23692370
}
23702371

2372+
//we cannot really trust the add bos in vocab. old models don't set it.
2373+
// instead, we EXPLICITY need to find the add_bos_token key==false to automatically set it off.
2374+
if(!llamamodel->vocab.get_add_bos() && add_bos_token && file_format_meta.explicitly_no_bos)
2375+
{
2376+
printf("\nThis architecture has explicitly disabled the BOS token - if you need it, you must add it manually.\n");
2377+
add_bos_token = false;
2378+
}
2379+
23712380
//warmup at least 33 tokens to trigger batch
23722381
std::vector<int> tmp;
23732382
for (int i = 1; i <= 33; ++i) {
@@ -3180,6 +3189,30 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
31803189
}
31813190
}
31823191

3192+
//need to add a cursed hack to get coherency for GLM4, by ensuring injection for both sop and gmask
3193+
if (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4) {
3194+
std::string temp = gpttype_get_chat_template();
3195+
if (temp.find("[gMASK]<sop>") != std::string::npos) {
3196+
if (addedmemory == "") {
3197+
if (kcpp_data->prompt.rfind("[gMASK]", 0) == 0) { //check startswith
3198+
kcpp_data->prompt.erase(0, 7);
3199+
}
3200+
if (kcpp_data->prompt.rfind("<sop>", 0) == 0) { //check startswith
3201+
kcpp_data->prompt.erase(0, 5);
3202+
}
3203+
addedmemory = "<sop>";
3204+
} else {
3205+
if (addedmemory.rfind("[gMASK]", 0) == 0) { //check startswith
3206+
addedmemory.erase(0, 7);
3207+
}
3208+
if (addedmemory.rfind("<sop>", 0) == 0) { //check startswith
3209+
addedmemory.erase(0, 5);
3210+
}
3211+
addedmemory = "<sop>" + addedmemory;
3212+
}
3213+
}
3214+
}
3215+
31833216
bool stream_sse = inputs.stream_sse;
31843217
bool allow_regular_prints = (!is_quiet && debugmode!=-1);
31853218

kcpp_adapters/ChatML-NoThink.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"system_start": "<|im_start|>system\n",
3+
"system_end": "<|im_end|>\n",
4+
"user_start": "<|im_start|>user\n",
5+
"user_end": "<|im_end|>\n",
6+
"assistant_start": "<|im_start|>assistant\n",
7+
"assistant_end": "<|im_end|>\n<think>\n\n</think>\n"
8+
}

klite.embd

Lines changed: 43 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ Current version indicated by LITEVER below.
1212
-->
1313

1414
<script>
15-
const LITEVER = 233;
15+
const LITEVER = 234;
1616
const urlParams = new URLSearchParams(window.location.search);
1717
var localflag = urlParams.get('local'); //this will be replaced automatically in embedded kcpp
1818
const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_";
@@ -3171,6 +3171,7 @@ Current version indicated by LITEVER below.
31713171
instruct_systag_end: "",
31723172
instruct_sysprompt: "",
31733173
instruct_has_markdown: true,
3174+
instruct_has_latex: true,
31743175
placeholder_tags: true,
31753176
render_special_tags: false,
31763177
request_logprobs: false,
@@ -3330,6 +3331,16 @@ Current version indicated by LITEVER below.
33303331
},
33313332
{
33323333
"id":3,
3334+
"name":"ChatML (No Think)",
3335+
"user":"<|im_start|>user\\n",
3336+
"user_end":"<|im_end|>\\n",
3337+
"assistant":"<|im_start|>assistant\\n<think>\\n\\n</think>\\n",
3338+
"assistant_end":"<|im_end|>\\n",
3339+
"system":"<|im_start|>system\\n",
3340+
"system_end":"<|im_end|>\\n",
3341+
},
3342+
{
3343+
"id":4,
33333344
"name":"CommandR",
33343345
"user":"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>",
33353346
"user_end":"<|END_OF_TURN_TOKEN|>",
@@ -3339,7 +3350,7 @@ Current version indicated by LITEVER below.
33393350
"system_end":"<|END_OF_TURN_TOKEN|>",
33403351
},
33413352
{
3342-
"id":4,
3353+
"id":5,
33433354
"name":"Gemma 2 & 3",
33443355
"user":"<start_of_turn>user\\n",
33453356
"user_end":"<end_of_turn>\\n",
@@ -3349,7 +3360,7 @@ Current version indicated by LITEVER below.
33493360
"system_end":"<end_of_turn>\\n",
33503361
},
33513362
{
3352-
"id":5,
3363+
"id":6,
33533364
"name":"Llama 2 Chat",
33543365
"user":"[INST] ",
33553366
"user_end":"",
@@ -3359,7 +3370,7 @@ Current version indicated by LITEVER below.
33593370
"system_end":"",
33603371
},
33613372
{
3362-
"id":6,
3373+
"id":7,
33633374
"name":"Llama 3 Chat",
33643375
"user":"<|start_header_id|>user<|end_header_id|>\\n\\n",
33653376
"user_end":"<|eot_id|>",
@@ -3369,7 +3380,7 @@ Current version indicated by LITEVER below.
33693380
"system_end":"<|eot_id|>",
33703381
},
33713382
{
3372-
"id":7,
3383+
"id":8,
33733384
"name":"Llama 4 Chat",
33743385
"user":"<|header_start|>user<|header_end|>\\n\\n",
33753386
"user_end":"<|eot|>",
@@ -3379,7 +3390,7 @@ Current version indicated by LITEVER below.
33793390
"system_end":"<|eot|>",
33803391
},
33813392
{
3382-
"id":8,
3393+
"id":9,
33833394
"name":"Metharme",
33843395
"user":"<|user|>",
33853396
"user_end":"",
@@ -3389,7 +3400,7 @@ Current version indicated by LITEVER below.
33893400
"system_end":"",
33903401
},
33913402
{
3392-
"id":9,
3403+
"id":10,
33933404
"name":"Mistral V1",
33943405
"user":" [INST] ",
33953406
"user_end":"",
@@ -3399,7 +3410,7 @@ Current version indicated by LITEVER below.
33993410
"system_end":"",
34003411
},
34013412
{
3402-
"id":10,
3413+
"id":11,
34033414
"name":"Mistral V2 & V3",
34043415
"user":"[INST] ",
34053416
"user_end":"",
@@ -3409,7 +3420,7 @@ Current version indicated by LITEVER below.
34093420
"system_end":"",
34103421
},
34113422
{
3412-
"id":11,
3423+
"id":12,
34133424
"name":"Mistral V7 & V3-Tekken",
34143425
"user":"[INST]",
34153426
"user_end":"",
@@ -3419,7 +3430,7 @@ Current version indicated by LITEVER below.
34193430
"system_end":"[/SYSTEM_PROMPT]",
34203431
},
34213432
{
3422-
"id":12,
3433+
"id":13,
34233434
"name":"Phi-3 Mini",
34243435
"user":"<|user|>\\n",
34253436
"user_end":"<|end|>\\n",
@@ -3429,7 +3440,7 @@ Current version indicated by LITEVER below.
34293440
"system_end":"<|end|>\\n",
34303441
},
34313442
{
3432-
"id":13,
3443+
"id":14,
34333444
"name":"Vicuna",
34343445
"user":"\\nUSER: ",
34353446
"user_end":"",
@@ -3439,7 +3450,7 @@ Current version indicated by LITEVER below.
34393450
"system_end":"",
34403451
},
34413452
{
3442-
"id":14,
3453+
"id":15,
34433454
"name":"Deepseek V2.5",
34443455
"user":"<|User|>",
34453456
"user_end":"<|end▁of▁sentence|>",
@@ -3449,7 +3460,7 @@ Current version indicated by LITEVER below.
34493460
"system_end":"",
34503461
},
34513462
{
3452-
"id":15,
3463+
"id":16,
34533464
"name":"GLM-4",
34543465
"user":"<|user|>\\n",
34553466
"user_end":"",
@@ -5378,7 +5389,7 @@ Current version indicated by LITEVER below.
53785389
navigator.clipboard.writeText(innercode);
53795390
}
53805391

5381-
function simpleMarkdown(text) {
5392+
function simpleMarkdown(text, renderLatex) {
53825393
const escapeHTML = (str) => str.replace(/</g, "&lt;").replace(/>/g, "&gt;");
53835394
const highlightCode = (code) => {
53845395
let cpybtn = `<button class="unselectable" onclick="return copyMarkdownCode(this)" style="float:right;">Copy</button>`;
@@ -5501,7 +5512,10 @@ Current version indicated by LITEVER below.
55015512
.replace(/ \n/g, "\n<br/>");
55025513
md = replaceTabbedCodeblocks(md);
55035514
md = md.replace(/<\/code\><\/pre\>\n<pre\><code\>/g, "\n");
5504-
md = replaceLatex(md);
5515+
if(renderLatex)
5516+
{
5517+
md = replaceLatex(md);
5518+
}
55055519
md = md.replace(/<\/ul>\n/gm, "</ul>").replace(/<\/ol>\n/gm, "</ol>");
55065520
md = md.replace(/\\([`_~\*\+\-\.\^\\\<\>\(\)\[\]])/gm, "$1");
55075521
return md;
@@ -11149,6 +11163,7 @@ Current version indicated by LITEVER below.
1114911163
document.getElementById("adventure_context_mod").checked = localsettings.adventure_context_mod;
1115011164
document.getElementById("chat_context_mod").checked = localsettings.chat_context_mod;
1115111165
document.getElementById("instruct_has_markdown").checked = localsettings.instruct_has_markdown;
11166+
document.getElementById("instruct_has_latex").checked = localsettings.instruct_has_latex;
1115211167
document.getElementById("placeholder_tags").checked = localsettings.placeholder_tags;
1115311168
document.getElementById("run_in_background").checked = run_in_background;
1115411169
document.getElementById("auto_ctxlen").checked = localsettings.auto_ctxlen;
@@ -11601,6 +11616,7 @@ Current version indicated by LITEVER below.
1160111616
localsettings.adventure_context_mod = (document.getElementById("adventure_context_mod").checked ? true : false);
1160211617
localsettings.chat_context_mod = (document.getElementById("chat_context_mod").checked ? true : false);
1160311618
localsettings.instruct_has_markdown = (document.getElementById("instruct_has_markdown").checked ? true : false);
11619+
localsettings.instruct_has_latex = (document.getElementById("instruct_has_latex").checked ? true : false);
1160411620
localsettings.placeholder_tags = (document.getElementById("placeholder_tags").checked ? true : false);
1160511621
run_in_background = (document.getElementById("run_in_background").checked ? true : false);
1160611622
background_audio_loop(run_in_background);
@@ -17833,7 +17849,7 @@ Current version indicated by LITEVER below.
1783317849
{
1783417850
fulltxt += "```"; //force end code block
1783517851
}
17836-
fulltxt = simpleMarkdown(fulltxt);
17852+
fulltxt = simpleMarkdown(fulltxt,localsettings.instruct_has_latex);
1783717853
}
1783817854

1783917855
let instruct_turns = repack_instruct_turns(fulltxt, `%SpcStg%`,`%SpcEtg%`, true);
@@ -18598,7 +18614,10 @@ Current version indicated by LITEVER below.
1859818614
{
1859918615
processed_msg += "```"; //force end code block
1860018616
}
18601-
processed_msg = simpleMarkdown(processed_msg);
18617+
if(localsettings.instruct_has_markdown)
18618+
{
18619+
processed_msg = simpleMarkdown(processed_msg,localsettings.instruct_has_latex);
18620+
}
1860218621

1860318622
//convert the msg into images
1860418623
processed_msg = processed_msg.replace(/\[<\|p\|.+?\|p\|>\]/g, function (m) {
@@ -20357,7 +20376,7 @@ Current version indicated by LITEVER below.
2035720376
replacedText = replacedText.replace(/&quot;(.*?)&quot;/g, wrapperSpan(styleRole, 'speech')); // Apply the speech style to "speech".
2035820377
if(localsettings.instruct_has_markdown)
2035920378
{
20360-
replacedText = simpleMarkdown(replacedText);
20379+
replacedText = simpleMarkdown(replacedText,localsettings.instruct_has_latex);
2036120380
}
2036220381
return `<span>${replacedText}</span>`;
2036320382
});
@@ -21270,9 +21289,14 @@ Current version indicated by LITEVER below.
2127021289
<div class="settingitem">
2127121290
<div class="settinglabel">
2127221291
<div class="justifyleft settingsmall">Enable Markdown <span class="helpicon">?<span
21273-
class="helptext">Allows the UI to use markdown formatting such as quotes, LaTeX, and code blocks.</span></span></div>
21292+
class="helptext">Allows the UI to use markdown formatting such as quotes and code blocks.</span></span></div>
2127421293
<input type="checkbox" title="Enabled Markdown" id="instruct_has_markdown" style="margin:0px 0px 0px auto;">
2127521294
</div>
21295+
<div class="settinglabel">
21296+
<div class="justifyleft settingsmall">Enable LaTeX <span class="helpicon">?<span
21297+
class="helptext">Allows the UI to render LaTeX within markdown formatting (Needs Markdown).</span></span></div>
21298+
<input type="checkbox" title="Enable LaTeX (Needs Markdown)" id="instruct_has_latex" style="margin:0px 0px 0px auto;">
21299+
</div>
2127621300
<div class="settinglabel">
2127721301
<div class="justifyleft settingsmall">Trim Sentences <span class="helpicon">?<span
2127821302
class="helptext">Trims incomplete sentences in AI output.</span></span></div>

koboldcpp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -860,7 +860,7 @@ def read_data(datatype):
860860
if dt_translated=="arr":
861861
print(f"{dt_translated}: {curr_key} = [{len(curr_val)}]")
862862
elif dt_translated=="str":
863-
print(f"{dt_translated}: {curr_key} = {curr_val[:100]}")
863+
print(f"{dt_translated}: {curr_key} = {curr_val[:256]}")
864864
else:
865865
print(f"{dt_translated}: {curr_key} = {curr_val}")
866866
print("\n*** GGUF TENSOR INFO ***")

model_adapter.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,15 @@ void print_tok_vec(std::vector<float> &embd)
291291
if (keyidx != -1) {
292292
freq_base_train = gguf_get_val_f32(ctx, keyidx);
293293
}
294+
fkey = "tokenizer.ggml.add_bos_token";
295+
keyidx = gguf_find_key(ctx, fkey.c_str());
296+
if (keyidx != -1) {
297+
bool result = gguf_get_val_bool(ctx, keyidx);
298+
if(result==false)
299+
{
300+
fileformatmeta->explicitly_no_bos = true;
301+
}
302+
}
294303

295304
int filever = gguf_get_version(ctx);
296305

model_adapter.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ struct FileFormatExtraMeta
7171
GGUFArch model_architecture = GGUFArch::ARCH_DEFAULT;
7272
int n_expert_count = 0;
7373
std::string model_architecture_str = "";
74+
bool explicitly_no_bos = false; //only true if key exists AND is false
7475
};
7576

7677
struct TopPicksData

otherarch/sdcpp/stable-diffusion.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1639,15 +1639,15 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
16391639
}
16401640

16411641
struct ggml_init_params params;
1642-
params.mem_size = static_cast<size_t>(10 * 1024 * 1024); // 10 MB
1642+
params.mem_size = static_cast<size_t>(20 * 1024 * 1024); // 20 MB increased by kcpp
16431643
if (sd_version_is_sd3(sd_ctx->sd->version)) {
1644-
params.mem_size *= 3;
1644+
params.mem_size *= 2; //readjust by kcpp as above changed
16451645
}
16461646
if (sd_version_is_flux(sd_ctx->sd->version)) {
1647-
params.mem_size *= 4;
1647+
params.mem_size *= 2; //readjust by kcpp as above changed
16481648
}
16491649
if (sd_ctx->sd->stacked_id) {
1650-
params.mem_size += static_cast<size_t>(10 * 1024 * 1024); // 10 MB
1650+
params.mem_size += static_cast<size_t>(15 * 1024 * 1024); // 10 MB
16511651
}
16521652
params.mem_size += width * height * 3 * sizeof(float);
16531653
params.mem_size *= batch_count;

0 commit comments

Comments
 (0)