We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 1e36983 commit 4f4d427Copy full SHA for 4f4d427
vllm/config.py
@@ -3252,6 +3252,16 @@ def __post_init__(self):
3252
3253
current_platform.check_and_update_config(self)
3254
3255
+ # If MLA is enabled, force disable chunked prefill and prefix caching
3256
+ if self.model_config and self.model_config.use_mla:
3257
+ logger.info("MLA is enabled; forcing chunked prefill and prefix "
3258
+ "caching to be disabled.")
3259
+ self.scheduler_config.enable_chunked_prefill = False
3260
+ self.scheduler_config.chunked_prefill_enabled = False
3261
+
3262
+ if self.cache_config is not None:
3263
+ self.cache_config.enable_prefix_caching = False
3264
3265
if not self.instance_id:
3266
self.instance_id = random_uuid()[:5]
3267
0 commit comments