@@ -1104,6 +1104,33 @@ ggml_tensor * llama_kv_cache::cpy_v(ggml_context * ctx, ggml_tensor * v_cur, ggm
11041104 return ggml_set_rows (ctx, v_view, v_cur, v_idxs);
11051105}
11061106
1107+ ggml_tensor * llama_kv_cache::get_k_lora (ggml_context * ctx, ggml_tensor * k_cur, int32_t il, uint32_t n_kv, const slot_info & sinfo) const {
1108+ if (sinfo.s0 == 0 ) {
1109+ return k_cur;
1110+ }
1111+
1112+ slot_info past_sinfo = sinfo;
1113+ past_sinfo.s0 = 0 ;
1114+ past_sinfo.s1 = sinfo.s0 - 1 ;
1115+
1116+ ggml_tensor * k_past = get_k (ctx, il, n_kv, past_sinfo);
1117+
1118+ return ggml_concat (ctx, k_past, k_cur, 2 );
1119+ }
1120+
1121+ ggml_tensor * llama_kv_cache::get_v_lora (ggml_context * ctx, ggml_tensor * v_cur, int32_t il, uint32_t n_kv, const slot_info & sinfo) const {
1122+ if (sinfo.s0 == 0 ) {
1123+ return v_cur;
1124+ }
1125+
1126+ slot_info past_sinfo = sinfo;
1127+ past_sinfo.s0 = 0 ;
1128+ past_sinfo.s1 = sinfo.s0 - 1 ;
1129+ ggml_tensor * v_past = get_v (ctx, il, n_kv, past_sinfo);
1130+
1131+ return ggml_concat (ctx, v_past, v_cur, 2 );
1132+ }
1133+
11071134ggml_tensor * llama_kv_cache::build_input_k_idxs (ggml_context * ctx, const llama_ubatch & ubatch) const {
11081135 const uint32_t n_tokens = ubatch.n_tokens ;
11091136
@@ -1978,6 +2005,14 @@ ggml_tensor * llama_kv_cache_context::cpy_v(ggml_context * ctx, ggml_tensor * v_
19782005 return kv->cpy_v (ctx, v_cur, v_idxs, il, sinfos[i_cur]);
19792006}
19802007
2008+ ggml_tensor * llama_kv_cache_context::get_k_lora (ggml_context * ctx, ggml_tensor * k_cur, int32_t il) const {
2009+ return kv->get_k_lora (ctx, k_cur, il, n_kv, sinfos[i_cur]);
2010+ }
2011+
2012+ ggml_tensor * llama_kv_cache_context::get_v_lora (ggml_context * ctx, ggml_tensor * v_cur, int32_t il) const {
2013+ return kv->get_v_lora (ctx, v_cur, il, n_kv, sinfos[i_cur]);
2014+ }
2015+
19812016ggml_tensor * llama_kv_cache_context::build_input_k_idxs (ggml_context * ctx, const llama_ubatch & ubatch) const {
19822017 return kv->build_input_k_idxs (ctx, ubatch);
19832018}
0 commit comments