File tree Expand file tree Collapse file tree 1 file changed +14
-0
lines changed Expand file tree Collapse file tree 1 file changed +14
-0
lines changed Original file line number Diff line number Diff line change @@ -597,6 +597,20 @@ def __post_init__(self):
597597 if not current_platform .support_hybrid_kv_cache ():
598598 # Hybrid KV cache manager is not supported on non-GPU platforms.
599599 self .scheduler_config .disable_hybrid_kv_cache_manager = True
600+ if self .kv_transfer_config is not None :
601+ # NOTE(Kuntai): turn HMA off for connector for now.
602+ # TODO(Kuntai): have a more elegent solution to check and
603+ # turn off HMA for connector that does not support HMA.
604+ logger .warning (
605+ "Turning off hybrid kv cache manager because "
606+ "`--kv-transfer-config` is set. This will reduce the "
607+ "performance of vLLM on LLMs with sliding window attention "
608+ "or Mamba attention. If you are a developer of kv connector"
609+ ", please consider supporting hybrid kv cache manager for "
610+ "your connector by making sure your connector is a subclass"
611+ " of `SupportsHMA` defined in kv_connector/v1/base.py."
612+ )
613+ self .scheduler_config .disable_hybrid_kv_cache_manager = True
600614 if self .kv_events_config is not None :
601615 # Hybrid KV cache manager is not compatible with KV events.
602616 self .scheduler_config .disable_hybrid_kv_cache_manager = True
You can’t perform that action at this time.
0 commit comments