Skip to content

Commit 255e34c

Browse files
authored
[Stability fix] turn off HMA allocator when connector is set (#27592)
Signed-off-by: KuntaiDu <[email protected]> Signed-off-by: Kuntai Du <[email protected]>
1 parent a8d2e32 commit 255e34c

File tree

1 file changed

+14
-0
lines changed

1 file changed

+14
-0
lines changed

vllm/config/vllm.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,20 @@ def __post_init__(self):
597597
if not current_platform.support_hybrid_kv_cache():
598598
# Hybrid KV cache manager is not supported on non-GPU platforms.
599599
self.scheduler_config.disable_hybrid_kv_cache_manager = True
600+
if self.kv_transfer_config is not None:
601+
# NOTE(Kuntai): turn HMA off for connector for now.
602+
# TODO(Kuntai): have a more elegent solution to check and
603+
# turn off HMA for connector that does not support HMA.
604+
logger.warning(
605+
"Turning off hybrid kv cache manager because "
606+
"`--kv-transfer-config` is set. This will reduce the "
607+
"performance of vLLM on LLMs with sliding window attention "
608+
"or Mamba attention. If you are a developer of kv connector"
609+
", please consider supporting hybrid kv cache manager for "
610+
"your connector by making sure your connector is a subclass"
611+
" of `SupportsHMA` defined in kv_connector/v1/base.py."
612+
)
613+
self.scheduler_config.disable_hybrid_kv_cache_manager = True
600614
if self.kv_events_config is not None:
601615
# Hybrid KV cache manager is not compatible with KV events.
602616
self.scheduler_config.disable_hybrid_kv_cache_manager = True

0 commit comments

Comments
 (0)