Skip to content

Commit a683145

Browse files
wjj19950828aiyi.wjj
andauthored
[HGEMM] fix naive hgemm f16x8 LDST error (#157)
Co-authored-by: aiyi.wjj <[email protected]>
1 parent 972fd42 commit a683145

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

kernels/hgemm/naive/hgemm.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -735,7 +735,7 @@ __global__ void hgemm_t_8x8_sliced_k_f16x8_pack_bcf_dbuf_kernel(
735735
s_a[smem_sel_next][load_a_smem_k + 1][load_a_smem_m] = r_load_a[1];
736736
s_a[smem_sel_next][load_a_smem_k + 2][load_a_smem_m] = r_load_a[2];
737737
s_a[smem_sel_next][load_a_smem_k + 3][load_a_smem_m] = r_load_a[3];
738-
LDST128BITS(s_b[smem_sel_next][load_b_smem_k][load_b_smem_n]) = LDST128BITS(r_load_b[0]);
738+
LDST64BITS(s_b[smem_sel_next][load_b_smem_k][load_b_smem_n]) = LDST64BITS(r_load_b[0]);
739739

740740
__syncthreads();
741741
}

0 commit comments

Comments
 (0)