Skip to content

Commit c765625

Browse files
committed
delete kvcomp config max_batch_size but simply use vllm's config max_num_seqs
1 parent 7d7b2a2 commit c765625

File tree

1 file changed

+1
-8
lines changed

1 file changed

+1
-8
lines changed

ucm/sparse/kvcomp/kvcomp_hbm.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -197,14 +197,7 @@ def __init__(self, vllm_config: VllmConfig, role: UcmSparseRole):
197197
if not self.is_cuda: # NPU only variables
198198
self.decode_mask_npu = None
199199
self.is_tensor_computed = False
200-
self.max_batch_size = self.kvcompOnDevice_cfg["max_batch_size"]
201-
if self.max_batch_size is None:
202-
self.max_batch_size = vllm_config.scheduler_config.max_num_seqs
203-
else:
204-
self.max_batch_size = min(
205-
self.max_batch_size,
206-
vllm_config.scheduler_config.max_num_seqs,
207-
)
200+
self.max_batch_size = vllm_config.scheduler_config.max_num_seqs
208201

209202
self.hamming_keep_chunks_head = 1
210203
self.hamming_keep_chunks_tail = 4

0 commit comments

Comments
 (0)