@@ -9511,10 +9511,11 @@ struct llm_build_deepseek2 : public llm_graph_context {
95119511 const float kq_scale = 1.0f*mscale*mscale/sqrtf(float(hparams.n_embd_head_k));
95129512 const float attn_factor_scaled = 1.0f / (1.0f + 0.1f * logf(1.0f / freq_scale));
95139513
9514+ const uint32_t n_embd_head_k = hparams.n_embd_head_k;
9515+ const uint32_t n_embd_head_v = hparams.n_embd_head_v;
95149516 const uint32_t n_embd_head_qk_rope = hparams.n_rot;
95159517 const uint32_t n_embd_head_qk_nope = hparams.n_embd_head_k - hparams.n_rot;
95169518 const uint32_t kv_lora_rank = hparams.n_lora_kv;
9517- const uint32_t n_embd_head_v = hparams.n_embd_head_v;
95189519
95199520 ggml_tensor * cur;
95209521 ggml_tensor * inpL;
@@ -9558,16 +9559,16 @@ struct llm_build_deepseek2 : public llm_graph_context {
95589559 // split into {n_head * n_embd_head_qk_nope, n_tokens}
95599560 ggml_tensor * q_nope = ggml_view_3d(ctx0, q,
95609561 n_embd_head_qk_nope, n_head, n_tokens,
9561- ggml_row_size(q->type, hparams. n_embd_head_k),
9562- ggml_row_size(q->type, hparams. n_embd_head_k * n_head),
9562+ ggml_row_size(q->type, n_embd_head_k),
9563+ ggml_row_size(q->type, n_embd_head_k * n_head),
95639564 0);
95649565 cb(q_nope, "q_nope", il);
95659566
95669567 // and {n_head * n_embd_head_qk_rope, n_tokens}
95679568 ggml_tensor * q_pe = ggml_view_3d(ctx0, q,
95689569 n_embd_head_qk_rope, n_head, n_tokens,
9569- ggml_row_size(q->type, hparams. n_embd_head_k),
9570- ggml_row_size(q->type, hparams. n_embd_head_k * n_head),
9570+ ggml_row_size(q->type, n_embd_head_k),
9571+ ggml_row_size(q->type, n_embd_head_k * n_head),
95719572 ggml_row_size(q->type, n_embd_head_qk_nope));
95729573 cb(q_pe, "q_pe", il);
95739574
0 commit comments