We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 7010751 commit 90e72aeCopy full SHA for 90e72ae
xllm/core/runtime/llm_engine.cpp
100755
100644
@@ -263,7 +263,8 @@ Engine::KVCacheCapacity LLMEngine::estimate_kv_cache_capacity() {
263
slot_size =
264
dtype_size *
265
((args_.kv_lora_rank() + NZ_ALIGNMENT - 1) / NZ_ALIGNMENT +
266
- (args_.qk_rope_head_dim() + NZ_ALIGNMENT - 1) / NZ_ALIGNMENT);
+ (args_.qk_rope_head_dim() + NZ_ALIGNMENT - 1) / NZ_ALIGNMENT) *
267
+ NZ_ALIGNMENT;
268
} else {
269
270
dtype_size * (args_.kv_lora_rank() + args_.qk_rope_head_dim());
0 commit comments