We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 27940f1 commit ca7646eCopy full SHA for ca7646e
lightllm/models/qwen2_vl/triton_kernel/rotary_pos_emb.py
@@ -29,7 +29,7 @@ def rotary_kernel(
29
offs_d = tl.arange(0, BLOCK_D)
30
d = pid_blk * BLOCK_D + offs_d
31
mask = d < D
32
- for pid_l in tl.range(pid_l_start, total_len, step=tl.num_programs(axis=1)):
+ for pid_l in tl.range(pid_l_start, total_len, step=tl.num_programs(axis=1), num_stages=3):
33
34
base = pid_l * stride_l + pid_h * stride_h
35
0 commit comments