We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent bd58ac7 commit 41bf1d9Copy full SHA for 41bf1d9
ucm/sparse/kvcomp/kvcomp_hbm.py
@@ -419,7 +419,9 @@ def attention_begin(
419
topk = self.hamming_output.shape[1]
420
attn_metadata.block_table[decode_req_ids,:topk] = self.hamming_output[:len(decode_req_ids)]
421
attn_metadata.block_table[decode_req_ids,topk:] = 0
422
- attn_metadata.seq_lens[self.decode_mask] = self.seq_lens_for_hamming
+
423
+ # we have already computed the topk_seq_lens_qwen in `build_decode_attention_meta_npu()`
424
+ attn_metadata.seq_lens[self.decode_mask] = self.topk_seq_lens_qwen
425
426
# topk for skip layer
427
self.topk_block_table = attn_metadata.block_table
0 commit comments