Skip to content

Commit 90d00f0

Browse files
committed
Fixes to the benchmark attention wrapper
1 parent df306f6 commit 90d00f0

File tree

1 file changed

+9
-0
lines changed

1 file changed

+9
-0
lines changed

benchmarks/routines/attention.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -798,6 +798,15 @@ def testBatchPrefillWithPagedKVCacheWrapper(args):
798798
.int()
799799
.to(device)
800800
)
801+
qo_indptr_cudnn = torch.cat(
802+
[
803+
torch.tensor([0], device=device),
804+
torch.cumsum(actual_seq_lens_q_device.view(-1), dim=0)
805+
* head_dim_qk
806+
* num_qo_heads,
807+
]
808+
).int()
809+
801810
# Because actual_seq_lens_kv is the same as actual_seq_lens_q, kv_indptr will become the same as qo_indptr
802811
kv_indptr = (
803812
torch.cat(

0 commit comments

Comments
 (0)