cont : update comments [no ci]

ggerganov · ggerganov · commit 2c2fbbd6dc43 · 2025-08-22T14:36:07.000+03:00
diff --git a/src/llama-hparams.h b/src/llama-hparams.h
@@ -41,7 +41,7 @@ struct llama_hparams {
     uint32_t n_embd;
     uint32_t n_embd_features = 0;
     uint32_t n_layer;
-     int32_t n_layer_kv_from_start = -1;
+     int32_t n_layer_kv_from_start = -1; // if non-negative, the first n_layer_kv_from_start layers have KV cache
     uint32_t n_rot;
     uint32_t n_embd_head_k; // dimension of keys (d_k). d_q is assumed to be the same, but there are n_head q heads, and only n_head_kv k-v heads
     uint32_t n_embd_head_v; // dimension of values (d_v) aka n_embd_head
diff --git a/src/llama-memory.h b/src/llama-memory.h
@@ -68,8 +68,8 @@ struct llama_memory_i {
     // this callback is used to filter out layers that should not be included in the cache
     using layer_filter_cb = std::function<bool(int32_t il)>;
 
-    // this callback is used to specify which layers should reuse KV cache from other layers
-    // return negative value to indicate that the layer il should not reuse KV cache
+    // this callback is used to specify which layers should reuse memory from other layers
+    // return negative value to indicate that the layer il should not reuse memory
     using layer_reuse_cb = std::function<int32_t(int32_t il)>;
 
     virtual ~llama_memory_i() = default;