Skip to content

Commit 81fd468

Browse files
authored
[None][fix] Correct KV cache percentage report out. (NVIDIA#7102)
Signed-off-by: Frank Di Natale <3429989+FrankD412@users.noreply.github.com>
1 parent b36460d commit 81fd468

File tree

1 file changed

+18
-12
lines changed

1 file changed

+18
-12
lines changed

tensorrt_llm/bench/dataclasses/reporting.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,22 @@ def get_statistics_dict(self) -> Dict[str, Any]:
273273
},
274274
}
275275

276+
# Retrieve KV cache information.
277+
kv_cache_config = self.kwargs.get("kv_cache_config", KvCacheConfig())
278+
if isinstance(kv_cache_config, KvCacheConfig):
279+
kv_cache_dtype = kv_cache_config.dtype
280+
kv_cache_mem_percent = kv_cache_config.free_gpu_memory_fraction
281+
elif isinstance(kv_cache_config, dict):
282+
kv_cache_dtype = kv_cache_config.get("dtype", "auto")
283+
kv_cache_mem_percent = kv_cache_config.get(
284+
"free_gpu_memory_fraction")
285+
else:
286+
raise ValueError(
287+
f"Invalid kv_cache_config type: {type(kv_cache_config)}.")
288+
289+
kv_cache_mem_percent = f"{kv_cache_mem_percent * 100.0:.2f}%" \
290+
if kv_cache_mem_percent is not None else "None"
291+
276292
# Engine/Backend details
277293
if self.rt_cfg.backend not in ('pytorch', '_autodeploy'):
278294
config_path = self.rt_cfg.engine_dir / "config.json"
@@ -302,15 +318,6 @@ def get_statistics_dict(self) -> Dict[str, Any]:
302318
model = self.rt_cfg.model_path or self.rt_cfg.model
303319
model_config = ModelConfig.from_pretrained(model,
304320
trust_remote_code=True)
305-
kv_cache_config = self.kwargs.get("kv_cache_config",
306-
KvCacheConfig())
307-
if isinstance(kv_cache_config, KvCacheConfig):
308-
kv_cache_dtype = kv_cache_config.dtype
309-
elif isinstance(kv_cache_config, dict):
310-
kv_cache_dtype = kv_cache_config.get("dtype", "auto")
311-
else:
312-
raise ValueError(
313-
f"Invalid kv_cache_config type: {type(kv_cache_config)}.")
314321

315322
validate_and_set_kv_cache_quant(model_config, kv_cache_dtype)
316323

@@ -336,8 +343,7 @@ def get_statistics_dict(self) -> Dict[str, Any]:
336343
"max_batch_size": self.rt_cfg.settings_config.max_batch_size,
337344
"max_num_tokens": self.rt_cfg.settings_config.max_num_tokens,
338345
"scheduling_policy": self.rt_cfg.settings_config.scheduler_policy,
339-
"kv_cache_percentage":
340-
self.rt_cfg.settings_config.kv_cache_percent * 100.0,
346+
"kv_cache_percentage": kv_cache_mem_percent,
341347
"issue_rate": self.convert_rate_to_s(self.statistics.issue_rate_ns)
342348
}
343349

@@ -526,7 +532,7 @@ def report_statistics(self) -> None:
526532
f"Max Runtime Batch Size: {world_info['max_batch_size']}\n"
527533
f"Max Runtime Tokens: {world_info['max_num_tokens']}\n"
528534
f"Scheduling Policy: {world_info['scheduling_policy']}\n"
529-
f"KV Memory Percentage: {world_info['kv_cache_percentage']:.2f}%\n"
535+
f"KV Memory Percentage: {world_info['kv_cache_percentage']}\n"
530536
f"Issue Rate (req/sec): {world_info['issue_rate']:.4E}\n"
531537
f"\n")
532538

0 commit comments

Comments
 (0)