We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 38e734e commit d6bf6deCopy full SHA for d6bf6de
fastdeploy/engine/common_engine.py
@@ -527,8 +527,14 @@ def _fetch_request():
527
self.cfg.max_prefill_batch,
528
)
529
530
+ if self.cfg.model_config.enable_mm:
531
+ self.resource_manager.check_and_free_block_tables()
532
+ available_blocks = self.resource_manager.available_block_num()
533
+ else:
534
+ available_blocks = self.cfg.cache_config.max_block_num_per_seq
535
+
536
tasks = self.scheduler.get_requests(
- available_blocks=self.cfg.cache_config.max_block_num_per_seq,
537
+ available_blocks=available_blocks,
538
block_size=self.cfg.cache_config.block_size,
539
reserved_output_blocks=self.cfg.cache_config.enc_dec_block_num,
540
max_num_batched_tokens=self.cfg.max_model_len,
0 commit comments