We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent af91bf1 commit 01e0440Copy full SHA for 01e0440
lightllm/models/deepseek2/triton_kernel/rotary_emb.py
@@ -81,7 +81,7 @@ def rotary_emb_fwd(q, k, cos, sin):
81
BLOCK_SEQ = 16
82
83
num_warps = 1
84
- num_stages = 5
+ num_stages = 3
85
86
grid = (triton.cdiv(total_len, BLOCK_SEQ),)
87
_rotary_kernel[grid](
0 commit comments