We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 972fd42 commit a683145Copy full SHA for a683145
kernels/hgemm/naive/hgemm.cu
@@ -735,7 +735,7 @@ __global__ void hgemm_t_8x8_sliced_k_f16x8_pack_bcf_dbuf_kernel(
735
s_a[smem_sel_next][load_a_smem_k + 1][load_a_smem_m] = r_load_a[1];
736
s_a[smem_sel_next][load_a_smem_k + 2][load_a_smem_m] = r_load_a[2];
737
s_a[smem_sel_next][load_a_smem_k + 3][load_a_smem_m] = r_load_a[3];
738
- LDST128BITS(s_b[smem_sel_next][load_b_smem_k][load_b_smem_n]) = LDST128BITS(r_load_b[0]);
+ LDST64BITS(s_b[smem_sel_next][load_b_smem_k][load_b_smem_n]) = LDST64BITS(r_load_b[0]);
739
740
__syncthreads();
741
}
0 commit comments