@@ -69,8 +69,8 @@ llama_kv_cache_recurrent::llama_kv_cache_recurrent(
6969 continue ;
7070 }
7171
72- const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (i) + hparams.n_embd_k_s (i );
73- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (i) + hparams.n_embd_v_s (i );
72+ const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (i) + hparams.n_embd_k_s ();
73+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (i) + hparams.n_embd_v_s ();
7474
7575 const char * dev_name = " CPU" ;
7676
@@ -754,7 +754,7 @@ void llama_kv_cache_recurrent::state_write_data(llama_io_write_i & io, const std
754754 // Iterate and write all the keys first, each row is a cell
755755 // Get whole range at a time
756756 for (uint32_t il = 0 ; il < n_layer; ++il) {
757- const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s (il );
757+ const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s ();
758758
759759 // Write key type
760760 const int32_t k_type_i = (int32_t )k_l[il]->type ;
@@ -774,7 +774,7 @@ void llama_kv_cache_recurrent::state_write_data(llama_io_write_i & io, const std
774774
775775 if (!v_trans) {
776776 for (uint32_t il = 0 ; il < n_layer; ++il) {
777- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
777+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
778778
779779 // Write value type
780780 const int32_t v_type_i = (int32_t )v_l[il]->type ;
@@ -795,7 +795,7 @@ void llama_kv_cache_recurrent::state_write_data(llama_io_write_i & io, const std
795795 // When v is transposed, we also need the element size and get the element ranges from each row
796796 const uint32_t kv_size = size;
797797 for (uint32_t il = 0 ; il < n_layer; ++il) {
798- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
798+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
799799
800800 // Write value type
801801 const int32_t v_type_i = (int32_t )v_l[il]->type ;
@@ -942,7 +942,7 @@ bool llama_kv_cache_recurrent::state_read_data(llama_io_read_i & io, uint32_t ce
942942
943943 // For each layer, read the keys for each cell, one row is one cell, read as one contiguous block
944944 for (uint32_t il = 0 ; il < n_layer; ++il) {
945- const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s (il );
945+ const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa (il) + hparams.n_embd_k_s ();
946946
947947 // Read type of key
948948 int32_t k_type_i_ref;
@@ -970,7 +970,7 @@ bool llama_kv_cache_recurrent::state_read_data(llama_io_read_i & io, uint32_t ce
970970
971971 if (!v_trans) {
972972 for (uint32_t il = 0 ; il < n_layer; ++il) {
973- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
973+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
974974
975975 // Read type of value
976976 int32_t v_type_i_ref;
@@ -998,7 +998,7 @@ bool llama_kv_cache_recurrent::state_read_data(llama_io_read_i & io, uint32_t ce
998998 } else {
999999 // For each layer, read the values for each cell (transposed)
10001000 for (uint32_t il = 0 ; il < n_layer; ++il) {
1001- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s (il );
1001+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa (il) + hparams.n_embd_v_s ();
10021002
10031003 // Read type of value
10041004 int32_t v_type_i_ref;
0 commit comments