Skip to content

Commit 7f63ef2

Browse files
committed
Improve: Drop small WGMMA for conciseness
1 parent 4423421 commit 7f63ef2

File tree

1 file changed

+3
-9
lines changed

1 file changed

+3
-9
lines changed

less_slow.cpp

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2394,16 +2394,10 @@ BENCHMARK_CAPTURE( //
23942394
16, 16, 8, 80, 128, tensor_core_scale_t::warp_k)
23952395
->MinTime(10);
23962396

2397-
BENCHMARK_CAPTURE( //
2398-
theoretic_tops_ptx, tf32f32_sm90wgmma_smallest, //
2399-
"less_slow_sm90a.ptx", "tops_tf32f32_sm90tc_m64n16k8_loop128_ptx_kernel", //
2400-
64, 16, 8, 90, 128, tensor_core_scale_t::warpgroup_k)
2401-
->MinTime(10);
2402-
24032397
BENCHMARK_CAPTURE( //
2404-
theoretic_tops_ptx, tf32f32_sm90wgmma, //
2405-
"less_slow_sm90a.ptx", "tops_tf32f32_sm90tc_m64n256k8_loop128_ptx_kernel", //
2406-
64, 256, 8, 90, 128)
2398+
theoretic_tops_ptx, f16f32_sm90wgmma, //
2399+
"less_slow_sm90a.ptx", "tops_f16f32_sm90tc_m64n256k16_loop128_ptx_kernel", //
2400+
64, 256, 16, 90, 128, tensor_core_scale_t::warpgroup_k)
24072401
->MinTime(10);
24082402

24092403
BENCHMARK_CAPTURE( //

0 commit comments

Comments
 (0)