disabling the gMask prefix for glm-4 completions

LostRuins · LostRuins · commit 8b6dfbd1be09 · 2025-05-21T17:29:24.000+08:00
diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
@@ -2439,7 +2439,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
         if (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4) {
             std::string temp = gpttype_get_chat_template();
             if (temp.find("[gMASK]<sop>") != std::string::npos) {
-                printf("GLM-4 special BOS handling used.\n");
+                printf("GLM-4 will have no automatic BOS token.\n");
                 add_bos_token = false;
             }
         }
@@ -3262,30 +3262,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
         }
     }
 
-    //need to add a cursed hack to get coherency for GLM4, by ensuring injection for both sop and gmask
-    // if (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4) {
-    //     std::string temp = gpttype_get_chat_template();
-    //     if (temp.find("[gMASK]<sop>") != std::string::npos) {
-    //         if (addedmemory == "") {
-    //             if (kcpp_data->prompt.rfind("[gMASK]", 0) == 0) {  //check startswith
-    //                 kcpp_data->prompt.erase(0, 7);
-    //             }
-    //             if (kcpp_data->prompt.rfind("<sop>", 0) == 0) {  //check startswith
-    //                 kcpp_data->prompt.erase(0, 5);
-    //             }
-    //             addedmemory = "[gMASK]<sop>";
-    //         } else {
-    //             if (addedmemory.rfind("[gMASK]", 0) == 0) {  //check startswith
-    //                 addedmemory.erase(0, 7);
-    //             }
-    //             if (addedmemory.rfind("<sop>", 0) == 0) {  //check startswith
-    //                 addedmemory.erase(0, 5);
-    //             }
-    //             addedmemory = "[gMASK]<sop>" + addedmemory;
-    //         }
-    //     }
-    // }
-
     bool stream_sse = inputs.stream_sse;
     bool allow_regular_prints = (!is_quiet && debugmode!=-1);
 
diff --git a/kcpp_adapters/AutoGuess.json b/kcpp_adapters/AutoGuess.json
@@ -115,6 +115,7 @@
     "search": ["[gMASK]<sop>"],
     "name": "GLM-4",
     "adapter": {
+        "chat_start": "[gMASK]<sop>",
         "system_start": "<|system|>\n",
         "system_end": "",
         "user_start": "<|user|>\n",
diff --git a/koboldcpp.py b/koboldcpp.py
@@ -2079,7 +2079,7 @@ def transform_genparams(genparams, api_format):
         if api_format==4 or api_format==7: #handle ollama chat here too
             # translate openai chat completion messages format into one big string.
             messages_array = genparams.get('messages', [])
-            messages_string = "" #chat start no longer needed, handled internally
+            messages_string = adapter_obj.get("chat_start", "")
             system_message_start = adapter_obj.get("system_start", "\n### Instruction:\n")
             system_message_end = adapter_obj.get("system_end", "")
             user_message_start = adapter_obj.get("user_start", "\n### Instruction:\n")