Skip to content

Commit 41bf1d9

Browse files
committed
bug fix for updated seq_lens
1 parent bd58ac7 commit 41bf1d9

File tree

1 file changed

+3
-1
lines changed

1 file changed

+3
-1
lines changed

ucm/sparse/kvcomp/kvcomp_hbm.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,9 @@ def attention_begin(
419419
topk = self.hamming_output.shape[1]
420420
attn_metadata.block_table[decode_req_ids,:topk] = self.hamming_output[:len(decode_req_ids)]
421421
attn_metadata.block_table[decode_req_ids,topk:] = 0
422-
attn_metadata.seq_lens[self.decode_mask] = self.seq_lens_for_hamming
422+
423+
# we have already computed the topk_seq_lens_qwen in `build_decode_attention_meta_npu()`
424+
attn_metadata.seq_lens[self.decode_mask] = self.topk_seq_lens_qwen
423425

424426
# topk for skip layer
425427
self.topk_block_table = attn_metadata.block_table

0 commit comments

Comments
 (0)