@@ -2439,7 +2439,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
24392439 if (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4) {
24402440 std::string temp = gpttype_get_chat_template ();
24412441 if (temp.find (" [gMASK]<sop>" ) != std::string::npos) {
2442- printf (" GLM-4 special BOS handling used .\n " );
2442+ printf (" GLM-4 will have no automatic BOS token .\n " );
24432443 add_bos_token = false ;
24442444 }
24452445 }
@@ -3262,30 +3262,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
32623262 }
32633263 }
32643264
3265- // need to add a cursed hack to get coherency for GLM4, by ensuring injection for both sop and gmask
3266- // if (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4) {
3267- // std::string temp = gpttype_get_chat_template();
3268- // if (temp.find("[gMASK]<sop>") != std::string::npos) {
3269- // if (addedmemory == "") {
3270- // if (kcpp_data->prompt.rfind("[gMASK]", 0) == 0) { //check startswith
3271- // kcpp_data->prompt.erase(0, 7);
3272- // }
3273- // if (kcpp_data->prompt.rfind("<sop>", 0) == 0) { //check startswith
3274- // kcpp_data->prompt.erase(0, 5);
3275- // }
3276- // addedmemory = "[gMASK]<sop>";
3277- // } else {
3278- // if (addedmemory.rfind("[gMASK]", 0) == 0) { //check startswith
3279- // addedmemory.erase(0, 7);
3280- // }
3281- // if (addedmemory.rfind("<sop>", 0) == 0) { //check startswith
3282- // addedmemory.erase(0, 5);
3283- // }
3284- // addedmemory = "[gMASK]<sop>" + addedmemory;
3285- // }
3286- // }
3287- // }
3288-
32893265 bool stream_sse = inputs.stream_sse ;
32903266 bool allow_regular_prints = (!is_quiet && debugmode!=-1 );
32913267
0 commit comments