address review, remove dump ir from decorator

castigli · castigli · commit 6c0aa6f8ebe5 · 2025-10-28T15:33:32.000+01:00
diff --git a/mlir/test/Examples/NVGPU/Ch0.py b/mlir/test/Examples/NVGPU/Ch0.py
@@ -1,8 +1,9 @@
 # RUN: env SUPPORT_LIB=%mlir_cuda_runtime \
-# RUN: env MLIR_RUN_CUDA_SM90_TESTS=%mlir_run_cuda_sm90_tests \
-# RUN: sh -c 'if [[ "$MLIR_RUN_CUDA_SM90_TESTS" == "1" ]]; \
+# RUN: sh -c 'if [[ "%mlir_run_cuda_sm90_tests" == "1" ]]; \
 # RUN: then %PYTHON %s | FileCheck %s; \
-# RUN: else %PYTHON %s | FileCheck %s --check-prefix=DUMPIR; fi'
+# RUN: else export MLIR_NVDSL_PRINT_IR=1; \
+# RUN: %PYTHON %s | FileCheck %s --check-prefix=DUMPIR; fi'
+
 
 # ===----------------------------------------------------------------------===//
 #  Chapter 0 : Hello World
@@ -21,12 +22,10 @@
 from tools.nvdsl import *
 
 
-dump_only = os.getenv("MLIR_RUN_CUDA_SM90_TESTS") != "1"
-
 # 1. The decorator generates a MLIR func.func.
 # Everything inside the Python function becomes the body of the func.
 # The decorator also translates `alpha` to an `index` type.
-@NVDSL.mlir_func(dump_only)
+@NVDSL.mlir_func
 def main(alpha):
     # 2. The decorator generates a MLIR gpu.launch.
     # Everything inside the Python function becomes the body of the gpu.launch.
diff --git a/mlir/test/Examples/NVGPU/Ch1.py b/mlir/test/Examples/NVGPU/Ch1.py
@@ -1,8 +1,9 @@
 # RUN: env SUPPORT_LIB=%mlir_cuda_runtime \
-# RUN: env MLIR_RUN_CUDA_SM90_TESTS=%mlir_run_cuda_sm90_tests \
-# RUN: sh -c 'if [[ "$MLIR_RUN_CUDA_SM90_TESTS" == "1" ]]; \
+# RUN: sh -c 'if [[ "%mlir_run_cuda_sm90_tests" == "1" ]]; \
 # RUN: then %PYTHON %s | FileCheck %s; \
-# RUN: else %PYTHON %s | FileCheck %s --check-prefix=DUMPIR; fi'
+# RUN: else export MLIR_NVDSL_PRINT_IR=1; \
+# RUN: %PYTHON %s | FileCheck %s --check-prefix=DUMPIR; fi'
+
 
 # ===----------------------------------------------------------------------===//
 #  Chapter 1 : 2D Saxpy
@@ -22,9 +23,9 @@
 from tools.nvdsl import *
 import numpy as np
 
-dump_only = os.getenv("MLIR_RUN_CUDA_SM90_TESTS") != "1"
 
-@NVDSL.mlir_func(dump_only)
+
+@NVDSL.mlir_func
 def saxpy(x, y, alpha):
     # 1. Use MLIR GPU dialect to allocate and copy memory
     token_ty = gpu.AsyncTokenType.get()
@@ -63,7 +64,7 @@ def saxpy_kernel():
 
 saxpy(x, y, alpha)
 
-if not dump_only:
+if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
     # 4. Verify MLIR with reference computation
     ref = np.ones((M, N), np.float32)
     ref += x * alpha
diff --git a/mlir/test/Examples/NVGPU/Ch2.py b/mlir/test/Examples/NVGPU/Ch2.py
@@ -1,8 +1,9 @@
 # RUN: env SUPPORT_LIB=%mlir_cuda_runtime \
-# RUN: env MLIR_RUN_CUDA_SM90_TESTS=%mlir_run_cuda_sm90_tests \
-# RUN: sh -c 'if [[ "$MLIR_RUN_CUDA_SM90_TESTS" == "1" ]]; \
+# RUN: sh -c 'if [[ "%mlir_run_cuda_sm90_tests" == "1" ]]; \
 # RUN: then %PYTHON %s | FileCheck %s; \
-# RUN: else %PYTHON %s | FileCheck %s --check-prefix=DUMPIR; fi'
+# RUN: else export MLIR_NVDSL_PRINT_IR=1; \
+# RUN: %PYTHON %s | FileCheck %s --check-prefix=DUMPIR; fi'
+
 
 # ===----------------------------------------------------------------------===//
 #  Chapter 2 : 2D Saxpy with TMA
@@ -27,9 +28,7 @@
 from mlir.extras import types as T
 import numpy as np
 
-dump_only = os.getenv("MLIR_RUN_CUDA_SM90_TESTS") != "1"
-
-@NVDSL.mlir_func(dump_only)
+@NVDSL.mlir_func
 def saxpy(x, y, alpha):
     token_ty = gpu.AsyncTokenType.get()
     t1 = gpu.wait(token_ty, [])
@@ -89,7 +88,7 @@ def saxpy_tma_kernel():
 y = np.ones((M, N), np.float32)
 saxpy(x, y, alpha)
 
-if not dump_only:
+if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
     #  4. Verify MLIR with reference computation
     ref = np.ones((M, N), np.float32)
     ref += x * alpha
diff --git a/mlir/test/Examples/NVGPU/Ch3.py b/mlir/test/Examples/NVGPU/Ch3.py
@@ -1,8 +1,9 @@
 # RUN: env SUPPORT_LIB=%mlir_cuda_runtime \
-# RUN: env MLIR_RUN_CUDA_SM90_TESTS=%mlir_run_cuda_sm90_tests \
-# RUN: sh -c 'if [[ "$MLIR_RUN_CUDA_SM90_TESTS" == "1" ]]; \
+# RUN: sh -c 'if [[ "%mlir_run_cuda_sm90_tests" == "1" ]]; \
 # RUN: then %PYTHON %s | FileCheck %s; \
-# RUN: else %PYTHON %s | FileCheck %s --check-prefix=DUMPIR; fi'
+# RUN: else export MLIR_NVDSL_PRINT_IR=1; \
+# RUN: %PYTHON %s | FileCheck %s --check-prefix=DUMPIR; fi'
+
 
 # ===----------------------------------------------------------------------===//
 #  Chapter 3 : GEMM 128x128x64 with Tensor Core
@@ -24,8 +25,6 @@
 from mlir.extras import types as T
 import numpy as np
 
-dump_only = os.getenv("MLIR_RUN_CUDA_SM90_TESTS") != "1"
-
 def tma_load(
     mbar_group: Mbarriers,
     a_tma: TMA,
@@ -61,7 +60,7 @@ def tma_load(
     b_tma.load(b2, mbar_group[0], coords=[64, 0], predicate=p)
 
 
-@NVDSL.mlir_func(dump_only)
+@NVDSL.mlir_func
 def gemm_128_128_64(a, b, d):
     token_ty = gpu.AsyncTokenType.get()
     t1 = gpu.wait(token_ty, [])
@@ -127,7 +126,7 @@ def gemm_tma_kernel():
 d = np.zeros((M, N), np.float32)
 gemm_128_128_64(a, b, d)
 
-if not dump_only:
+if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
     # Verify MLIR program with reference computation in python
     ref_d = a.astype(np.float16) @ b.astype(np.float16)
     np.testing.assert_allclose(d, ref_d, rtol=5e-03, atol=1e-01)
diff --git a/mlir/test/Examples/NVGPU/Ch4.py b/mlir/test/Examples/NVGPU/Ch4.py
@@ -1,8 +1,8 @@
 # RUN: env SUPPORT_LIB=%mlir_cuda_runtime \
-# RUN: env MLIR_RUN_CUDA_SM90_TESTS=%mlir_run_cuda_sm90_tests \
-# RUN: sh -c 'if [[ "$MLIR_RUN_CUDA_SM90_TESTS" == "1" ]]; \
+# RUN: sh -c 'if [[ "%mlir_run_cuda_sm90_tests" == "1" ]]; \
 # RUN: then %PYTHON %s | FileCheck %s; \
-# RUN: else %PYTHON %s | FileCheck %s --check-prefix=DUMPIR; fi'
+# RUN: else export MLIR_NVDSL_PRINT_IR=1; \
+# RUN: %PYTHON %s | FileCheck %s --check-prefix=DUMPIR; fi'
 
 
 # ===----------------------------------------------------------------------===//
@@ -51,7 +51,7 @@
 from tools.nvdsl import *
 import numpy as np
 
-dump_only = os.getenv("MLIR_RUN_CUDA_SM90_TESTS") != "1"
+
 
 def partition_shape():
     """
@@ -261,7 +261,7 @@ def epilogue(D: WGMMAMatrix, d_dev):
 #   a -> memref<MxKxf16>
 #   b -> memref<NxKf16>
 #   d -> memref<MxNxf32>
-@NVDSL.mlir_func(dump_only)
+@NVDSL.mlir_func
 def gemm_multistage(a, b, d, num_stages):
     token_ty = gpu.AsyncTokenType.get()
     t1 = gpu.wait(token_ty, [])
@@ -318,8 +318,7 @@ def gemm_multistage_kernel():
 
 gemm_multistage(a, b, d, num_stages=7)
 
-
-if not dump_only:
+if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
     # Verify MLIR with reference computation
     ref_d = a.astype(np.float16) @ b.astype(np.float16)
     np.testing.assert_allclose(d, ref_d, rtol=5e-03, atol=1e-01)
diff --git a/mlir/test/Examples/NVGPU/Ch5.py b/mlir/test/Examples/NVGPU/Ch5.py
@@ -1,8 +1,9 @@
 # RUN: env SUPPORT_LIB=%mlir_cuda_runtime \
-# RUN: env MLIR_RUN_CUDA_SM90_TESTS=%mlir_run_cuda_sm90_tests \
-# RUN: sh -c 'if [[ "$MLIR_RUN_CUDA_SM90_TESTS" == "1" ]]; \
+# RUN: sh -c 'if [[ "%mlir_run_cuda_sm90_tests" == "1" ]]; \
 # RUN: then %PYTHON %s | FileCheck %s; \
-# RUN: else %PYTHON %s | FileCheck %s --check-prefix=DUMPIR; fi'
+# RUN: else export MLIR_NVDSL_PRINT_IR=1; \
+# RUN: %PYTHON %s | FileCheck %s --check-prefix=DUMPIR; fi'
+
 
 # ===----------------------------------------------------------------------===//
 #  Chapter 5 : Warp Specialized GEMM with Tensor Core
@@ -50,7 +51,7 @@
 from tools.nvdsl import *
 import numpy as np
 
-dump_only = os.getenv("MLIR_RUN_CUDA_SM90_TESTS") != "1"
+
 
 def partition_shape():
     """
@@ -254,7 +255,7 @@ def epilogue(D: WGMMAMatrix, d_dev):
         scf.yield_([])
 
 
-@NVDSL.mlir_func(dump_only)
+@NVDSL.mlir_func
 def gemm_warp_specialized(a, b, d, num_stages):
     token_ty = gpu.AsyncTokenType.get()
     t1 = gpu.wait(token_ty, [])
@@ -315,7 +316,7 @@ def gemm_warp_specialized_kernel():
 
 gemm_warp_specialized(a, b, d, num_stages=7)
 
-if not dump_only:
+if os.getenv("MLIR_NVDSL_PRINT_IR") != "1":
     # Verify MLIR with reference computation
     ref_d = a.astype(np.float16) @ b.astype(np.float16)
     np.testing.assert_allclose(d, ref_d, rtol=5e-03, atol=1e-01)
diff --git a/mlir/test/Examples/NVGPU/lit.local.cfg b/mlir/test/Examples/NVGPU/lit.local.cfg
@@ -1,4 +1,4 @@
 config.unsupported = False
-if not config.enable_cuda_runner or not config.mlir_run_cuda_sm90_tests:
+if not config.enable_cuda_runner:
   config.unsupported = True
   
diff --git a/mlir/test/Examples/NVGPU/tools/nvdsl.py b/mlir/test/Examples/NVGPU/tools/nvdsl.py