Allow fast prefill for hybrid memory allocator

sarckk · sarckk · commit 5e5bbbe015de · 2025-08-27T14:15:29.000-07:00
Signed-off-by: Yong Hoon Shin &lt;yhshin@meta.com&gt;
diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py
@@ -3741,12 +3741,6 @@ def __post_init__(self):
                     # local attention.
                     self.scheduler_config.disable_hybrid_kv_cache_manager = True
 
-            if self.cache_config.kv_sharing_fast_prefill:
-                # There is an IMA issue currently when using fast prefill with
-                # hybrid kv cache manager (e.g. interleaved sliding window)
-                # TODO(sarckk): investigate and fix
-                self.scheduler_config.disable_hybrid_kv_cache_manager = True
-
     def update_sizes_for_sequence_parallelism(self,
                                               possible_sizes: list) -> list:
         # remove the sizes that not multiple of tp_size when