Skip to content

Commit 8b6dfbd

Browse files
committed
disabling the gMask prefix for glm-4 completions
1 parent 4930594 commit 8b6dfbd

File tree

3 files changed

+3
-26
lines changed

3 files changed

+3
-26
lines changed

gpttype_adapter.cpp

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2439,7 +2439,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
24392439
if (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4) {
24402440
std::string temp = gpttype_get_chat_template();
24412441
if (temp.find("[gMASK]<sop>") != std::string::npos) {
2442-
printf("GLM-4 special BOS handling used.\n");
2442+
printf("GLM-4 will have no automatic BOS token.\n");
24432443
add_bos_token = false;
24442444
}
24452445
}
@@ -3262,30 +3262,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
32623262
}
32633263
}
32643264

3265-
//need to add a cursed hack to get coherency for GLM4, by ensuring injection for both sop and gmask
3266-
// if (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4) {
3267-
// std::string temp = gpttype_get_chat_template();
3268-
// if (temp.find("[gMASK]<sop>") != std::string::npos) {
3269-
// if (addedmemory == "") {
3270-
// if (kcpp_data->prompt.rfind("[gMASK]", 0) == 0) { //check startswith
3271-
// kcpp_data->prompt.erase(0, 7);
3272-
// }
3273-
// if (kcpp_data->prompt.rfind("<sop>", 0) == 0) { //check startswith
3274-
// kcpp_data->prompt.erase(0, 5);
3275-
// }
3276-
// addedmemory = "[gMASK]<sop>";
3277-
// } else {
3278-
// if (addedmemory.rfind("[gMASK]", 0) == 0) { //check startswith
3279-
// addedmemory.erase(0, 7);
3280-
// }
3281-
// if (addedmemory.rfind("<sop>", 0) == 0) { //check startswith
3282-
// addedmemory.erase(0, 5);
3283-
// }
3284-
// addedmemory = "[gMASK]<sop>" + addedmemory;
3285-
// }
3286-
// }
3287-
// }
3288-
32893265
bool stream_sse = inputs.stream_sse;
32903266
bool allow_regular_prints = (!is_quiet && debugmode!=-1);
32913267

kcpp_adapters/AutoGuess.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@
115115
"search": ["[gMASK]<sop>"],
116116
"name": "GLM-4",
117117
"adapter": {
118+
"chat_start": "[gMASK]<sop>",
118119
"system_start": "<|system|>\n",
119120
"system_end": "",
120121
"user_start": "<|user|>\n",

koboldcpp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2079,7 +2079,7 @@ def transform_genparams(genparams, api_format):
20792079
if api_format==4 or api_format==7: #handle ollama chat here too
20802080
# translate openai chat completion messages format into one big string.
20812081
messages_array = genparams.get('messages', [])
2082-
messages_string = "" #chat start no longer needed, handled internally
2082+
messages_string = adapter_obj.get("chat_start", "")
20832083
system_message_start = adapter_obj.get("system_start", "\n### Instruction:\n")
20842084
system_message_end = adapter_obj.get("system_end", "")
20852085
user_message_start = adapter_obj.get("user_start", "\n### Instruction:\n")

0 commit comments

Comments
 (0)