Skip to content

Commit 5d38297

Browse files
committed
glm4 unclamp for all except vulkan
1 parent 9fdec02 commit 5d38297

File tree

2 files changed

+8
-6
lines changed

2 files changed

+8
-6
lines changed

gpttype_adapter.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1907,16 +1907,18 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
19071907
printf("Warning: Only GGUF models can use max context above 16k. Max context lowered to 16k.\n");
19081908
clamped_max_context_length = 16384;
19091909
}
1910-
if (isGguf && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4 && kcpp_data->n_batch > 16) {
1910+
1911+
#if defined(GGML_USE_VULKAN)
1912+
if (isGguf && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4 && kcpp_data->n_ubatch > 16) {
19111913
if(debugmode==1)
19121914
{
1913-
printf("GLM-4 is broken on larger batch sizes. Clamp ignored in debug.\n");
1915+
printf("GLM-4 is broken on larger batch sizes in Vulkan. Clamp ignored in debug.\n");
19141916
} else {
1915-
printf("GLM-4 is broken on larger batch sizes. Clamping batch size to 16.\n");
1916-
kcpp_data->n_batch = kcpp_data->n_ubatch = 16;
1917+
printf("GLM-4 is broken on larger batch sizes in Vulkan. Clamping ubatch size to 16.\n");
1918+
kcpp_data->n_ubatch = 16;
19171919
}
1918-
19191920
}
1921+
#endif
19201922

19211923
kcpp_data->n_ctx = clamped_max_context_length;
19221924
max_context_limit_at_load = clamped_max_context_length;

klite.embd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3440,7 +3440,7 @@ Current version indicated by LITEVER below.
34403440
"name":"GLM-4",
34413441
"user":"<|user|>\\n",
34423442
"user_end":"",
3443-
"assistant":"<|assistant|>",
3443+
"assistant":"<|assistant|>\\n",
34443444
"assistant_end":"",
34453445
"system":"<|system|>\\n",
34463446
"system_end":"",

0 commit comments

Comments
 (0)