@@ -4642,7 +4642,7 @@ struct llm_build_context {
46424642 0 );
46434643 cb (v_states, " v_states" , il);
46444644
4645- q_pe = ggml_cont (ctx0, q_pe); // TODO: the CUDA backend does not support non-contiguous RoPE
4645+ q_pe = ggml_cont (ctx0, q_pe); // TODO: the CUDA backend used to not support non-cont. RoPE, investigate removing this
46464646 q_pe = ggml_rope_ext (
46474647 ctx0, q_pe, inp_pos, rope_factors,
46484648 n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
@@ -4651,7 +4651,7 @@ struct llm_build_context {
46514651 cb (q_pe, " q_pe" , il);
46524652
46534653 // shared RoPE key
4654- k_pe = ggml_cont (ctx0, k_pe); // TODO: the CUDA backend does not support non-contiguous RoPE
4654+ k_pe = ggml_cont (ctx0, k_pe); // TODO: the CUDA backend used to not support non-cont. RoPE, investigate removing this
46554655 k_pe = ggml_rope_ext (
46564656 ctx0, k_pe, inp_pos, rope_factors,
46574657 n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
@@ -6496,7 +6496,7 @@ struct llm_build_context {
64966496 0 );
64976497 cb (v_states, " v_states" , il);
64986498
6499- q_pe = ggml_cont (ctx0, q_pe); // TODO: the CUDA backend does not support non-contiguous RoPE
6499+ q_pe = ggml_cont (ctx0, q_pe); // TODO: the CUDA backend used to not support non-cont. RoPE, investigate removing this
65006500 q_pe = ggml_rope_ext (
65016501 ctx0, q_pe, inp_pos, nullptr ,
65026502 n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
@@ -6505,7 +6505,7 @@ struct llm_build_context {
65056505 cb (q_pe, " q_pe" , il);
65066506
65076507 // shared RoPE key
6508- k_pe = ggml_cont (ctx0, k_pe); // TODO: the CUDA backend does not support non-contiguous RoPE
6508+ k_pe = ggml_cont (ctx0, k_pe); // TODO: the CUDA backend used to not support non-cont. RoPE, investigate removing this
65096509 k_pe = ggml_rope_ext (
65106510 ctx0, k_pe, inp_pos, nullptr ,
65116511 n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
0 commit comments