Skip to content

Commit 43a154d

Browse files
ikawrakowIwan Kawrakow
andauthored
Handle incompatible DeepSeek GGUFs (#394)
Co-authored-by: Iwan Kawrakow <[email protected]>
1 parent 967a2e1 commit 43a154d

File tree

1 file changed

+57
-6
lines changed

1 file changed

+57
-6
lines changed

src/llama.cpp

Lines changed: 57 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3468,6 +3468,30 @@ static bool llama_kv_cache_init(
34683468
cache.ctxs.push_back(ctx);
34693469
}
34703470

3471+
if (model.arch == LLM_ARCH_DEEPSEEK2) {
3472+
bool have_wkv_b = true;
3473+
for (auto& l : model.layers) {
3474+
if (!l.wkv_b) {
3475+
have_wkv_b = false;
3476+
break;
3477+
}
3478+
}
3479+
if (!have_wkv_b) {
3480+
if (cparams.mla_attn != 1) {
3481+
LLAMA_LOG_WARN("=========================================================\n");
3482+
LLAMA_LOG_WARN("%s: missing wkv_b tensor(s)\n", __func__);
3483+
LLAMA_LOG_WARN("%s: changing MLA from %d to 1\n", __func__, cparams.mla_attn);
3484+
if (cparams.mla_attn > 1) {
3485+
LLAMA_LOG_WARN("%s: ** Prompt processing performance will be crippled **\n", __func__);
3486+
}
3487+
LLAMA_LOG_WARN("=========================================================\n");
3488+
// Sorry for the hack.
3489+
auto& non_cparams = const_cast<llama_cparams&>(cparams);
3490+
non_cparams.mla_attn = 1;
3491+
}
3492+
}
3493+
}
3494+
34713495
if (model.arch == LLM_ARCH_DEEPSEEK2 && cparams.mla_attn) {
34723496
// DeepSeek MLA
34733497
cache.kv_l.reserve(n_layer);
@@ -3497,7 +3521,7 @@ static bool llama_kv_cache_init(
34973521
const uint32_t n_embd_head_qk_rope = hparams.n_rot;
34983522
const uint32_t n_embd_head_qk_nope = hparams.n_embd_head_k - hparams.n_rot;
34993523
const uint32_t kv_lora_rank = hparams.n_lora_kv;
3500-
LLAMA_LOG_INFO("%s: layer %d: n_embd_head_qk_rope = %d, kv_lora_rank = %d\n", __func__, i, n_embd_head_qk_rope, kv_lora_rank);
3524+
//LLAMA_LOG_INFO("%s: layer %d: n_embd_head_qk_rope = %d, kv_lora_rank = %d\n", __func__, i, n_embd_head_qk_rope, kv_lora_rank);
35013525
if (cparams.flash_attn) {
35023526
ggml_tensor * kv = ggml_new_tensor_2d(ctx, cache.type_k, kv_lora_rank + n_embd_head_qk_rope, kv_size);
35033527
ggml_format_name(kv, "cache_kv_l%d", i);
@@ -5847,6 +5871,25 @@ static void llm_load_hparams(
58475871
} break;
58485872
case LLM_ARCH_DEEPSEEK2:
58495873
{
5874+
if (hparams.n_head_kv() == 1) {
5875+
printf("==========================================================================\n");
5876+
printf("Detected incompatible DeepSeek model.\n");
5877+
printf("Will try to fix, but there are no guarantees\n\n");
5878+
printf("*** Your prompt processing speed will be crippled ***\n\n");
5879+
printf("Consider making your own ik_llama.cpp compatible model or\n");
5880+
printf("ask the model provider to make one for you,\n");
5881+
int n_nead_kv = hparams.n_gqa();
5882+
if (n_nead_kv%16 != 0 || hparams.n_embd_head_k != 576 || hparams.n_embd_head_v != 512 ||
5883+
hparams.n_rot != 64) {
5884+
printf("Sorry, uknown model => cannot fix it => bailing out\n");
5885+
GGML_ABORT("Fatal error");
5886+
}
5887+
for (auto& item : hparams.n_head_kv_arr) item = n_nead_kv;
5888+
hparams.n_embd_head_k = 192;
5889+
hparams.n_embd_head_v = 128;
5890+
printf("==========================================================================\n");
5891+
//GGML_ABORT("Fatal error");
5892+
}
58505893
bool is_lite = (hparams.n_layer == 27);
58515894
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
58525895
ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead);
@@ -5859,7 +5902,7 @@ static void llm_load_hparams(
58595902
ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale);
58605903
ml.get_key(LLM_KV_EXPERT_WEIGHTS_NORM, hparams.expert_weights_norm, false);
58615904
ml.get_key(LLM_KV_EXPERT_GATING_FUNC, hparams.expert_gating_func, false);
5862-
if (hparams.expert_gating_func == 0) {
5905+
if (hparams.expert_gating_func == 0) {
58635906
// for compatibility with existing DeepSeek V2 and V2.5 GGUFs
58645907
// that have no expert_gating_func model parameter set
58655908
hparams.expert_gating_func = LLM_EXPERT_GATING_FUNC_SOFTMAX;
@@ -8419,10 +8462,18 @@ static bool llm_load_tensors(
84198462
layer.wq = create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd_k_gqa});
84208463
}
84218464

8422-
layer.wkv_a_mqa = create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_KV_A_MQA, "weight", i), {n_embd, kv_lora_rank + (n_embd_head_qk_rope)});
8423-
layer.wkv_b = create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_KV_B, "weight", i), {kv_lora_rank, n_head * (n_embd_head_qk_nope + n_embd_head_v)});
8424-
layer.wk_b = create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_K_B, "weight", i), {n_embd_head_qk_nope, n_head * kv_lora_rank}, 1);
8425-
layer.wv_b = create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_V_B, "weight", i), {kv_lora_rank, n_head * n_embd_head_v}, 1);
8465+
layer.wkv_a_mqa = create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_KV_A_MQA, "weight", i),{n_embd, kv_lora_rank + (n_embd_head_qk_rope)});
8466+
layer.wkv_b = create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_KV_B, "weight", i),
8467+
{kv_lora_rank, n_head * (n_embd_head_qk_nope + n_embd_head_v)}, llama_model_loader::TENSOR_NOT_REQUIRED);
8468+
if (!layer.wkv_b) {
8469+
// Incompatible mainline model. Let's see if we can still load it
8470+
layer.wk_b = create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_K_B, "weight", i), {n_embd_head_qk_nope, kv_lora_rank, n_head}, 0);
8471+
layer.wv_b = create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_V_B, "weight", i), {kv_lora_rank, n_embd_head_v, n_head}, 0);
8472+
8473+
} else {
8474+
layer.wk_b = create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_K_B, "weight", i), {n_embd_head_qk_nope, n_head * kv_lora_rank}, 1);
8475+
layer.wv_b = create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_V_B, "weight", i), {kv_lora_rank, n_head * n_embd_head_v}, 1);
8476+
}
84268477
layer.wo = create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), { n_head * ( n_embd_head_v), n_embd});
84278478

84288479
layer.ffn_norm = create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd});

0 commit comments

Comments
 (0)