File tree Expand file tree Collapse file tree 1 file changed +0
-3
lines changed
Expand file tree Collapse file tree 1 file changed +0
-3
lines changed Original file line number Diff line number Diff line change @@ -14613,23 +14613,20 @@ static int llama_decode_internal(
1461314613 const struct llama_hparams & hparams = model.hparams;
1461414614 const int64_t n_layer = hparams.n_layer;
1461514615 const int64_t kv_head = kv_self.head;
14616- std::vector<void *> kv_cache_ptrs;
1461714616 std::vector<void *> k_cache_ptrs;
1461814617 std::vector<void *> v_cache_ptrs;
1461914618 for (int il = 0; il < n_layer; ++il) {
1462014619 const int64_t n_embd_k_gqa = hparams.n_embd_k_gqa();
1462114620 const int64_t n_embd_v_gqa = hparams.n_embd_v_gqa();
1462214621 ggml_tensor * tmp_tensor = kv_self.k_l[il];
1462314622 size_t tmp_offset = (ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa))*kv_head;
14624- kv_cache_ptrs.push_back(static_cast<char*>(tmp_tensor->data) + tmp_offset);
1462514623 k_cache_ptrs.push_back(static_cast<char*>(tmp_tensor->data) + tmp_offset);
1462614624 tmp_tensor = kv_self.v_l[il];
1462714625 if (cparams.flash_attn) {
1462814626 tmp_offset = (kv_head)*ggml_row_size(kv_self.v_l[il]->type, n_embd_v_gqa);
1462914627 } else {
1463014628 tmp_offset = (kv_head)*ggml_element_size(kv_self.v_l[il]);
1463114629 }
14632- kv_cache_ptrs.push_back(static_cast<char*>(tmp_tensor->data) + tmp_offset);
1463314630 v_cache_ptrs.push_back(static_cast<char*>(tmp_tensor->data) + tmp_offset);
1463414631 }
1463514632
You can’t perform that action at this time.
0 commit comments