[GEMM] Cleanup advanced path code (#3947)

whitneywhtsang · web-flow · commit 50df3927ef2a · 2025-04-16T21:05:04.000Z
Starting #3724, GEMM is no longer run on advanced path. This PR cleanup code added for advanced path. Signed-off-by: Whitney Tsang <whitney.tsang@intel.com>
diff --git a/benchmarks/triton_kernels_benchmark/gemm_benchmark.py b/benchmarks/triton_kernels_benchmark/gemm_benchmark.py
@@ -16,8 +16,6 @@
 import triton_kernels_benchmark as benchmark_suite
 from triton_kernels_benchmark import xetla_kernel
 
-SMALL_GRF = os.getenv('TRITON_INTEL_ADVANCED_PATH', '0') == '0'
-
 
 @triton.autotune(
     configs=[
@@ -26,18 +24,14 @@
             num_stages=s, num_warps=32) for s in [1, 2, 3]
     ] + [
         triton.Config({'BLOCK_SIZE_M': 256, 'BLOCK_SIZE_N': 128, 'BLOCK_SIZE_K': 32, 'GROUP_SIZE_M': 4, 'grf_mode': m},
-                      num_stages=s, num_warps=w)
-        for s in [2, 3, 4]
-        for (m, w) in ([('large', 32), ('small', 64)] if SMALL_GRF else [('large', 32)])
+                      num_stages=s, num_warps=w) for s in [2, 3, 4] for (m, w) in ([('large', 32), ('small', 64)])
     ] + [
         triton.Config(
             {'BLOCK_SIZE_M': 64, 'BLOCK_SIZE_N': 128, 'BLOCK_SIZE_K': 32, 'GROUP_SIZE_M': 4, 'grf_mode': 'large'},
             num_stages=s, num_warps=32) for s in [2]
     ] + [
         triton.Config({'BLOCK_SIZE_M': 8, 'BLOCK_SIZE_N': 512, 'BLOCK_SIZE_K': 64, 'GROUP_SIZE_M': 1, 'grf_mode': m},
-                      num_stages=s, num_warps=w)
-        for s in [2, 3]
-        for (m, w) in ([('large', 32), ('small', 64)] if SMALL_GRF else [('large', 32)])
+                      num_stages=s, num_warps=w) for s in [2, 3] for (m, w) in ([('large', 32), ('small', 64)])
     ],
     key=['M', 'N', 'K'],
 )
@@ -93,9 +87,7 @@ def matmul_kernel_with_block_pointers(
             num_stages=s, num_warps=32) for s in [2, 3]
     ] + [
         triton.Config({'BLOCK_SIZE_M': 256, 'BLOCK_SIZE_N': 128, 'BLOCK_SIZE_K': 32, 'GROUP_SIZE_M': 4, 'grf_mode': m},
-                      num_stages=s, num_warps=w)
-        for s in [2]
-        for (m, w) in ([('large', 32), ('small', 64)] if SMALL_GRF else [('large', 32)])
+                      num_stages=s, num_warps=w) for s in [2] for (m, w) in ([('large', 32), ('small', 64)])
     ] + [
         triton.Config(
             {'BLOCK_SIZE_M': 128, 'BLOCK_SIZE_N': 1024, 'BLOCK_SIZE_K': 16, 'GROUP_SIZE_M': 4, 'grf_mode': 'large'},
diff --git a/benchmarks/triton_kernels_benchmark/gemm_tensor_of_ptr_benchmark.py b/benchmarks/triton_kernels_benchmark/gemm_tensor_of_ptr_benchmark.py
@@ -18,7 +18,6 @@
 TRANSPOSE_A = os.getenv('TRANSPOSE_A', '0') == '1'
 TRANSPOSE_B = os.getenv('TRANSPOSE_B', '0') == '1'
 use_xetla = not (TRANSPOSE_A or TRANSPOSE_B)
-SMALL_GRF = os.getenv('TRITON_INTEL_ADVANCED_PATH', '0') == '0'
 
 
 @triton.autotune(
@@ -28,18 +27,14 @@
             num_stages=s, num_warps=32) for s in [1, 2, 3]
     ] + [
         triton.Config({'BLOCK_SIZE_M': 256, 'BLOCK_SIZE_N': 128, 'BLOCK_SIZE_K': 32, 'GROUP_SIZE_M': 4, 'grf_mode': m},
-                      num_stages=s, num_warps=w)
-        for s in [2, 3, 4]
-        for (m, w) in ([('large', 32), ('small', 64)] if SMALL_GRF else [('large', 32)])
+                      num_stages=s, num_warps=w) for s in [2, 3, 4] for (m, w) in ([('large', 32), ('small', 64)])
     ] + [
         triton.Config(
             {'BLOCK_SIZE_M': 64, 'BLOCK_SIZE_N': 128, 'BLOCK_SIZE_K': 32, 'GROUP_SIZE_M': 4, 'grf_mode': 'large'},
             num_stages=s, num_warps=32) for s in [2]
     ] + [
         triton.Config({'BLOCK_SIZE_M': 8, 'BLOCK_SIZE_N': 512, 'BLOCK_SIZE_K': 64, 'GROUP_SIZE_M': 1, 'grf_mode': m},
-                      num_stages=s, num_warps=w)
-        for s in [2, 3]
-        for (m, w) in ([('large', 32), ('small', 64)] if SMALL_GRF else [('large', 32)])
+                      num_stages=s, num_warps=w) for s in [2, 3] for (m, w) in ([('large', 32), ('small', 64)])
     ],
     key=['M', 'N', 'K'],
 )
@@ -97,9 +92,7 @@ def matmul_kernel(
             num_stages=s, num_warps=32) for s in [2, 3]
     ] + [
         triton.Config({'BLOCK_SIZE_M': 256, 'BLOCK_SIZE_N': 128, 'BLOCK_SIZE_K': 32, 'GROUP_SIZE_M': 4, 'grf_mode': m},
-                      num_stages=s, num_warps=w)
-        for s in [2]
-        for (m, w) in ([('large', 32), ('small', 64)] if SMALL_GRF else [('large', 32)])
+                      num_stages=s, num_warps=w) for s in [2] for (m, w) in ([('large', 32), ('small', 64)])
     ] + [
         triton.Config(
             {'BLOCK_SIZE_M': 128, 'BLOCK_SIZE_N': 1024, 'BLOCK_SIZE_K': 16, 'GROUP_SIZE_M': 4, 'grf_mode': 'large'},