File tree Expand file tree Collapse file tree 1 file changed +22
-22
lines changed
Expand file tree Collapse file tree 1 file changed +22
-22
lines changed Original file line number Diff line number Diff line change @@ -442,28 +442,28 @@ def attention_begin(
442442
443443 topk_token = self .hash_topk_tokens
444444
445- block_table_decode = attn_metadata .block_table .index_select (
446- 0 , decode_req_ids
447- )
448- seq_len_decode = self .ori_seq_lens_decode .index_select (
449- 0 , decode_req_ids
450- )
451- block_table_decode = cuda_hamming_topk (
452- q_hash .unsqueeze (1 ),
453- k_hash ,
454- block_table_decode ,
455- seq_len_decode ,
456- topk_token = topk_token ,
457- sink_token = 64 ,
458- recent_token = 512 ,
459- is_mla = self .is_mla ,
460- )
461- # update topk_block_table
462- topk = block_table_decode .shape [1 ]
463- attn_metadata .block_table [decode_req_ids , :topk ] = (
464- block_table_decode
465- )
466- attn_metadata .block_table [decode_req_ids , topk :] = 0
445+ block_table_decode = attn_metadata .block_table .index_select (
446+ 0 , decode_req_ids
447+ )
448+ seq_len_decode = self .ori_seq_lens_decode .index_select (
449+ 0 , decode_req_ids
450+ )
451+ block_table_decode = cuda_hamming_topk (
452+ q_hash .unsqueeze (1 ),
453+ k_hash ,
454+ block_table_decode ,
455+ seq_len_decode ,
456+ topk_token = topk_token ,
457+ sink_token = 64 ,
458+ recent_token = 512 ,
459+ is_mla = self .is_mla ,
460+ )
461+ # update topk_block_table
462+ topk = block_table_decode .shape [1 ]
463+ attn_metadata .block_table [decode_req_ids , :topk ] = (
464+ block_table_decode
465+ )
466+ attn_metadata .block_table [decode_req_ids , topk :] = 0
467467
468468 attn_metadata .seq_lens [self .decode_mask ] = (
469469 self .topk_seq_lens_qwen
You can’t perform that action at this time.
0 commit comments