@@ -25,9 +25,6 @@ llama_memory_recurrent::llama_memory_recurrent(
2525 uint32_t n_seq_max) : hparams(model.hparams), n_seq_max(n_seq_max) {
2626 const int32_t n_layer = hparams.n_layer ;
2727
28- LLAMA_LOG_INFO (" %s: mem_size = %u, n_seq_max = %u, type_r = '%s', type_s = '%s', n_layer = %d\n " ,
29- __func__, mem_size, n_seq_max, ggml_type_name (type_r), ggml_type_name (type_s), n_layer);
30-
3128 head = 0 ;
3229 size = mem_size;
3330 used = 0 ;
@@ -84,7 +81,7 @@ llama_memory_recurrent::llama_memory_recurrent(
8481
8582 ggml_context * ctx = ctx_for_buft (buft);
8683 if (!ctx) {
87- throw std::runtime_error (" failed to create ggml context for kv cache" );
84+ throw std::runtime_error (" failed to create ggml context for rs cache" );
8885 }
8986
9087 ggml_tensor * r = ggml_new_tensor_1d (ctx, type_r, hparams.n_embd_r ()*mem_size);
@@ -102,19 +99,19 @@ llama_memory_recurrent::llama_memory_recurrent(
10299
103100 ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft (ctx, buft);
104101 if (!buf) {
105- throw std::runtime_error (" failed to allocate buffer for kv cache" );
102+ throw std::runtime_error (" failed to allocate buffer for rs cache" );
106103 }
107104 ggml_backend_buffer_clear (buf, 0 );
108- LLAMA_LOG_INFO (" %s: %10s KV buffer size = %8.2f MiB\n " , __func__, ggml_backend_buffer_name (buf), ggml_backend_buffer_get_size (buf)/1024.0 /1024.0 );
105+ LLAMA_LOG_INFO (" %s: %10s RS buffer size = %8.2f MiB\n " , __func__, ggml_backend_buffer_name (buf), ggml_backend_buffer_get_size (buf)/1024.0 /1024.0 );
109106 bufs.emplace_back (buf);
110107 }
111108
112109 {
113110 const size_t memory_size_r = size_r_bytes ();
114111 const size_t memory_size_s = size_s_bytes ();
115112
116- LLAMA_LOG_INFO (" %s: KV self size = %7.2f MiB, R (%s): %7.2f MiB, S (%s): %7.2f MiB\n " , __func__,
117- (float )(memory_size_r + memory_size_s) / (1024 .0f * 1024 .0f ),
113+ LLAMA_LOG_INFO (" %s: size = %7.2f MiB (%6u cells, %3d layers, %2u seqs) , R (%s): %7.2f MiB, S (%s): %7.2f MiB\n " , __func__,
114+ (float )(memory_size_r + memory_size_s) / (1024 .0f * 1024 .0f ), mem_size, n_layer, n_seq_max,
118115 ggml_type_name (type_r), (float )memory_size_r / (1024 .0f * 1024 .0f ),
119116 ggml_type_name (type_s), (float )memory_size_s / (1024 .0f * 1024 .0f ));
120117 }
0 commit comments