Fixes to the benchmark attention wrapper

Anerudhan · Anerudhan · commit 90d00f0b6783 · 2025-08-11T09:47:24.000-07:00
diff --git a/benchmarks/routines/attention.py b/benchmarks/routines/attention.py
@@ -798,6 +798,15 @@ def testBatchPrefillWithPagedKVCacheWrapper(args):
         .int()
         .to(device)
     )
+    qo_indptr_cudnn = torch.cat(
+        [
+            torch.tensor([0], device=device),
+            torch.cumsum(actual_seq_lens_q_device.view(-1), dim=0)
+            * head_dim_qk
+            * num_qo_heads,
+        ]
+    ).int()
+
     # Because actual_seq_lens_kv is the same as actual_seq_lens_q, kv_indptr will become the same as qo_indptr
     kv_indptr = (
         torch.cat(