From 5bfe5cb1515e3c961bcdf74cf67c3ad68d06fc48 Mon Sep 17 00:00:00 2001 From: "Si, Yudong" Date: Thu, 31 Oct 2024 05:18:41 +0000 Subject: [PATCH 1/2] Add new gemm tuned cfg --- benchmarks/triton_kernels_benchmark/gemm_benchmark.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/benchmarks/triton_kernels_benchmark/gemm_benchmark.py b/benchmarks/triton_kernels_benchmark/gemm_benchmark.py index 6aef756dcb..b550eb21a1 100644 --- a/benchmarks/triton_kernels_benchmark/gemm_benchmark.py +++ b/benchmarks/triton_kernels_benchmark/gemm_benchmark.py @@ -98,10 +98,18 @@ def matmul_kernel_with_block_pointers( triton.Config( {'BLOCK_SIZE_M': 256, 'BLOCK_SIZE_N': 128, 'BLOCK_SIZE_K': 32, 'GROUP_SIZE_M': 4, 'grf_mode': 'large'}, num_stages=s, num_warps=32) for s in [2] + ] + [ + triton.Config( + {'BLOCK_SIZE_M': 128, 'BLOCK_SIZE_N': 1024, 'BLOCK_SIZE_K': 16, 'GROUP_SIZE_M': 1, 'grf_mode': 'large'}, + num_stages=s, num_warps=32) for s in [2] ] + [ triton.Config( {'BLOCK_SIZE_M': 64, 'BLOCK_SIZE_N': 128, 'BLOCK_SIZE_K': 32, 'GROUP_SIZE_M': 4, 'grf_mode': 'large'}, num_stages=s, num_warps=32) for s in [2] + ] + [ + triton.Config( + {'BLOCK_SIZE_M': 16, 'BLOCK_SIZE_N': 1024, 'BLOCK_SIZE_K': 64, 'GROUP_SIZE_M': 32, 'grf_mode': 'large'}, + num_stages=s, num_warps=32) for s in [2, 3] ] + [ triton.Config( {'BLOCK_SIZE_M': 8, 'BLOCK_SIZE_N': 512, 'BLOCK_SIZE_K': 64, 'GROUP_SIZE_M': 1, 'grf_mode': 'large'}, From 196c0ed12f168497615f1f23182e66ad4f642746 Mon Sep 17 00:00:00 2001 From: ESI-SYD Date: Thu, 31 Oct 2024 09:11:34 +0000 Subject: [PATCH 2/2] update --- benchmarks/triton_kernels_benchmark/gemm_benchmark.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/benchmarks/triton_kernels_benchmark/gemm_benchmark.py b/benchmarks/triton_kernels_benchmark/gemm_benchmark.py index b550eb21a1..da41f1e447 100644 --- a/benchmarks/triton_kernels_benchmark/gemm_benchmark.py +++ b/benchmarks/triton_kernels_benchmark/gemm_benchmark.py @@ -100,16 +100,12 @@ def matmul_kernel_with_block_pointers( num_stages=s, num_warps=32) for s in [2] ] + [ triton.Config( - {'BLOCK_SIZE_M': 128, 'BLOCK_SIZE_N': 1024, 'BLOCK_SIZE_K': 16, 'GROUP_SIZE_M': 1, 'grf_mode': 'large'}, - num_stages=s, num_warps=32) for s in [2] + {'BLOCK_SIZE_M': 128, 'BLOCK_SIZE_N': 1024, 'BLOCK_SIZE_K': 16, 'GROUP_SIZE_M': 4, 'grf_mode': 'large'}, + num_stages=s, num_warps=32) for s in [2, 3] ] + [ triton.Config( {'BLOCK_SIZE_M': 64, 'BLOCK_SIZE_N': 128, 'BLOCK_SIZE_K': 32, 'GROUP_SIZE_M': 4, 'grf_mode': 'large'}, num_stages=s, num_warps=32) for s in [2] - ] + [ - triton.Config( - {'BLOCK_SIZE_M': 16, 'BLOCK_SIZE_N': 1024, 'BLOCK_SIZE_K': 64, 'GROUP_SIZE_M': 32, 'grf_mode': 'large'}, - num_stages=s, num_warps=32) for s in [2, 3] ] + [ triton.Config( {'BLOCK_SIZE_M': 8, 'BLOCK_SIZE_N': 512, 'BLOCK_SIZE_K': 64, 'GROUP_SIZE_M': 1, 'grf_mode': 'large'},