We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 7cf5fdb commit 771ffebCopy full SHA for 771ffeb
lightllm/models/qwen3next/mem_manager.py
@@ -104,6 +104,12 @@ def free_all(self):
104
self.ssm_state_mem_manager.free_all()
105
return
106
107
+ @override
108
+ def get_cell_size(self):
109
+ # Only full attention layers and MTP layers have KV cache
110
+ kv_cache_layer_num = self.full_attn_layer_num + self.mtp_layer_num
111
+ return 2 * self.head_num * self.head_dim * kv_cache_layer_num * torch._utils._element_size(self.dtype)
112
+
113
@override
114
def get_buffer(self, layer_index) -> Tuple[torch.Tensor, torch.Tensor]:
115
assert layer_index < self.layer_num, "layer_index is out of range"
0 commit comments