Skip to content

Commit 5e5bbbe

Browse files
committed
Allow fast prefill for hybrid memory allocator
Signed-off-by: Yong Hoon Shin <[email protected]>
1 parent 6de0d1e commit 5e5bbbe

File tree

1 file changed

+0
-6
lines changed

1 file changed

+0
-6
lines changed

vllm/config/__init__.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3741,12 +3741,6 @@ def __post_init__(self):
37413741
# local attention.
37423742
self.scheduler_config.disable_hybrid_kv_cache_manager = True
37433743

3744-
if self.cache_config.kv_sharing_fast_prefill:
3745-
# There is an IMA issue currently when using fast prefill with
3746-
# hybrid kv cache manager (e.g. interleaved sliding window)
3747-
# TODO(sarckk): investigate and fix
3748-
self.scheduler_config.disable_hybrid_kv_cache_manager = True
3749-
37503744
def update_sizes_for_sequence_parallelism(self,
37513745
possible_sizes: list) -> list:
37523746
# remove the sizes that not multiple of tp_size when

0 commit comments

Comments
 (0)