@@ -9571,21 +9571,21 @@ struct llm_build_deepseek2 : public llm_graph_context {
95719571 ggml_row_size(q->type, n_embd_head_qk_nope));
95729572 cb(q_pe, "q_pe", il);
95739573
9574- ggml_tensor * kv_pe_cmprresseed = ggml_mul_mat(ctx0, model.layers[il].wkv_a_mqa, cur);
9575- cb(kv_pe_cmprresseed , "kv_pe_cmprresseed ", il);
9574+ ggml_tensor * kv_cmpr_pe = ggml_mul_mat(ctx0, model.layers[il].wkv_a_mqa, cur);
9575+ cb(kv_cmpr_pe , "kv_cmpr_pe ", il);
95769576
95779577 // split into {kv_lora_rank, n_tokens}
9578- ggml_tensor * kv_cmpr = ggml_view_2d(ctx0, kv_pe_cmprresseed , kv_lora_rank, n_tokens,
9579- kv_pe_cmprresseed ->nb[1],
9578+ ggml_tensor * kv_cmpr = ggml_view_2d(ctx0, kv_cmpr_pe , kv_lora_rank, n_tokens,
9579+ kv_cmpr_pe ->nb[1],
95809580 0);
95819581 cb(kv_cmpr, "kv_cmpr", il);
95829582
95839583 // and {n_embd_head_qk_rope, n_tokens}
9584- ggml_tensor * k_pe = ggml_view_3d(ctx0, kv_pe_cmprresseed ,
9584+ ggml_tensor * k_pe = ggml_view_3d(ctx0, kv_cmpr_pe ,
95859585 n_embd_head_qk_rope, 1, n_tokens,
9586- kv_pe_cmprresseed ->nb[1],
9587- kv_pe_cmprresseed ->nb[1],
9588- ggml_row_size(kv_pe_cmprresseed ->type, kv_lora_rank));
9586+ kv_cmpr_pe ->nb[1],
9587+ kv_cmpr_pe ->nb[1],
9588+ ggml_row_size(kv_cmpr_pe ->type, kv_lora_rank));
95899589 cb(k_pe, "k_pe", il);
95909590
95919591 // TODO: the CUDA backend used to not support non-cont. RoPE, investigate removing this
0 commit comments