Skip to content

Commit 4423421

Browse files
committed
Fix: Invoke f16f32 in WGMMA
1 parent ea4a3e0 commit 4423421

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

less_slow.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -603,7 +603,7 @@ __global__ void tops_f16f32_sm90wgmma_64x256x16_loop128_cuda_kernel() {
603603
std::uint64_t b_descriptor = wgmma_descriptor((std::uint64_t)b_shared, 128 * 256 / 8, 128, 0, 0);
604604
wgmma_fence();
605605
for (int i = 0; i != 128; ++i) {
606-
wgmma_bf16f32_64x256x16(c_registers, a_descriptor, b_descriptor);
606+
wgmma_f16f32_64x256x16(c_registers, a_descriptor, b_descriptor);
607607
wgmma_commit_group();
608608
}
609609
wgmma_sync_group();

0 commit comments

Comments
 (0)