@@ -2137,6 +2137,8 @@ extern __global__ void tops_f32f32_sm60fma_16x16x16_loop128_cuda_kernel();
21372137extern __global__ void tops_f64f64_sm60fma_16x16x16_loop128_cuda_kernel ();
21382138extern __global__ void tops_i32i32_sm60fma_16x16x16_loop128_cuda_kernel ();
21392139extern __global__ void tops_i64i64_sm60fma_16x16x16_loop128_cuda_kernel ();
2140+ extern __global__ void tops_u8u32_sm60fma_16x16x64_loop128_cuda_kernel ();
2141+ extern __global__ void tops_u24u32_sm60fma_16x16x16_loop128_cuda_kernel ();
21402142
21412143BENCHMARK_CAPTURE ( //
21422144 theoretic_tops_cuda, f32f32_sm60fma, tops_f32f32_sm60fma_16x16x16_loop128_cuda_kernel, //
@@ -2154,6 +2156,14 @@ BENCHMARK_CAPTURE(
21542156 theoretic_tops_cuda, i64i64_sm60fma, tops_i64i64_sm60fma_16x16x16_loop128_cuda_kernel, //
21552157 16 , 16 , 16 , 60 , 128 , tensor_core_scale_t ::single_k)
21562158 ->MinTime(10 );
2159+ BENCHMARK_CAPTURE ( //
2160+ theoretic_tops_cuda, u8u32_sm60fma, tops_u8u32_sm60fma_16x16x64_loop128_cuda_kernel, //
2161+ 16 , 16 , 64 , 60 , 128 , tensor_core_scale_t ::single_k)
2162+ ->MinTime(10 );
2163+ BENCHMARK_CAPTURE ( //
2164+ theoretic_tops_cuda, u24u32_sm60fma, tops_u24u32_sm60fma_16x16x16_loop128_cuda_kernel, //
2165+ 16 , 16 , 16 , 60 , 128 , tensor_core_scale_t ::single_k)
2166+ ->MinTime(10 );
21572167
21582168extern __global__ void tops_f16f16_sm70fma_16x16x16_loop128_cuda_kernel ();
21592169extern __global__ void tops_f16f16_sm70wmma_16x16x16_loop128_cuda_kernel ();
0 commit comments