Skip to content

Commit 33faa6e

Browse files
authored
[BENCH] multiply gammas after fused act (#7003)
1 parent c109dc7 commit 33faa6e

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

python/triton_kernels/triton_kernels/matmul_ogs_details/_p_matmul_ogs.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -514,7 +514,6 @@ def _p_matmul_ogs(
514514
if SWAP_XW:
515515
acc_tile = acc_tile.T
516516
acc_tile = acc_tile + biases[a_i][None, :] * betas[:, None]
517-
acc_tile *= gammas[:, None]
518517
if out_alpha is not None:
519518
acc_tile *= out_alpha
520519

@@ -525,6 +524,8 @@ def _p_matmul_ogs(
525524
tl.static_assert(ACTIVATION_REDUCTION_N == 1, "Activation reduction must be 1 if no activation fn is provided")
526525
out = acc_tile
527526

527+
out *= gammas[:, None]
528+
528529
if MASK_ACC:
529530
out = tl.where(mask_m[:, None], out, 0.0)
530531
# Flexpoint

0 commit comments

Comments
 (0)