@@ -10125,24 +10125,18 @@ struct llm_build_deepseek2 : public llm_graph_context {
1012510125 ggml_row_size(kv_cmpr_pe->type, kv_lora_rank));
1012610126 cb(k_pe, "k_pe", il);
1012710127
10128- // TODO: the CUDA backend used to not support non-cont. RoPE, investigate removing this
10129- q_pe = ggml_cont(ctx0, q_pe);
1013010128 q_pe = ggml_rope_ext(ctx0, q_pe, inp_pos, nullptr,
1013110129 n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
1013210130 ext_factor, attn_factor_scaled, beta_fast, beta_slow
1013310131 );
1013410132 cb(q_pe, "q_pe", il);
1013510133
10136- // TODO: the CUDA backend used to not support non-cont. RoPE, investigate removing this
10137- k_pe = ggml_cont(ctx0, k_pe);
1013810134 k_pe = ggml_rope_ext(ctx0, k_pe, inp_pos, nullptr,
1013910135 n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
1014010136 ext_factor, attn_factor_scaled, beta_fast, beta_slow
1014110137 );
1014210138 cb(k_pe, "k_pe", il);
1014310139
10144- // TODO: the CUDA backend used to not support non-cont. (RMS) norm, investigate removing ggml_cont
10145- kv_cmpr = ggml_cont(ctx0, kv_cmpr);
1014610140 kv_cmpr = build_norm(kv_cmpr,
1014710141 model.layers[il].attn_kv_a_norm, nullptr,
1014810142 LLM_NORM_RMS, il);
0 commit comments