We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 6de0d1e commit 5e5bbbeCopy full SHA for 5e5bbbe
vllm/config/__init__.py
@@ -3741,12 +3741,6 @@ def __post_init__(self):
3741
# local attention.
3742
self.scheduler_config.disable_hybrid_kv_cache_manager = True
3743
3744
- if self.cache_config.kv_sharing_fast_prefill:
3745
- # There is an IMA issue currently when using fast prefill with
3746
- # hybrid kv cache manager (e.g. interleaved sliding window)
3747
- # TODO(sarckk): investigate and fix
3748
- self.scheduler_config.disable_hybrid_kv_cache_manager = True
3749
-
3750
def update_sizes_for_sequence_parallelism(self,
3751
possible_sizes: list) -> list:
3752
# remove the sizes that not multiple of tp_size when
0 commit comments