@@ -3256,6 +3256,37 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
32563256 }
32573257 }
32583258
3259+ // need to add a cursed hack to improve coherency for GLM4, by ensuring injection for gmask, sop and an extra space
3260+ // any complaints please direct them to henky
3261+ if (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4) {
3262+ std::string temp = gpttype_get_chat_template ();
3263+ if (temp.find (" [gMASK]<sop>" ) != std::string::npos) {
3264+ if (addedmemory == " " ) {
3265+ if (!kcpp_data->prompt .empty () && kcpp_data->prompt .rfind (" [gMASK]" , 0 ) == 0 ) { // check startswith
3266+ kcpp_data->prompt .erase (0 , 7 );
3267+ }
3268+ if (!kcpp_data->prompt .empty () && kcpp_data->prompt .rfind (" <sop>" , 0 ) == 0 ) { // check startswith
3269+ kcpp_data->prompt .erase (0 , 5 );
3270+ }
3271+ if (!kcpp_data->prompt .empty () && kcpp_data->prompt [0 ] == ' ' ) { // check for leading space
3272+ kcpp_data->prompt .erase (0 , 1 );
3273+ }
3274+ addedmemory = " [gMASK]<sop> " ;
3275+ } else {
3276+ if (!addedmemory.empty () && addedmemory.rfind (" [gMASK]" , 0 ) == 0 ) { // check startswith
3277+ addedmemory.erase (0 , 7 );
3278+ }
3279+ if (!addedmemory.empty () && addedmemory.rfind (" <sop>" , 0 ) == 0 ) { // check startswith
3280+ addedmemory.erase (0 , 5 );
3281+ }
3282+ if (!addedmemory.empty () && addedmemory[0 ] == ' ' ) { // check for leading space
3283+ addedmemory.erase (0 , 1 );
3284+ }
3285+ addedmemory = " [gMASK]<sop> " + addedmemory;
3286+ }
3287+ }
3288+ }
3289+
32593290 bool stream_sse = inputs.stream_sse ;
32603291 bool allow_regular_prints = (!is_quiet && debugmode!=-1 );
32613292
0 commit comments