|
1 | 1 | # RUN: env SUPPORT_LIB=%mlir_cuda_runtime \ |
2 | | -# RUN: env MLIR_RUN_CUDA_SM90_TESTS=%mlir_run_cuda_sm90_tests \ |
3 | | -# RUN: sh -c 'if [[ "$MLIR_RUN_CUDA_SM90_TESTS" == "1" ]]; \ |
| 2 | +# RUN: sh -c 'if [[ "%mlir_run_cuda_sm90_tests" == "1" ]]; \ |
4 | 3 | # RUN: then %PYTHON %s | FileCheck %s; \ |
5 | | -# RUN: else %PYTHON %s | FileCheck %s --check-prefix=DUMPIR; fi' |
| 4 | +# RUN: else export MLIR_NVDSL_PRINT_IR=1; \ |
| 5 | +# RUN: %PYTHON %s | FileCheck %s --check-prefix=DUMPIR; fi' |
| 6 | + |
6 | 7 |
|
7 | 8 | # ===----------------------------------------------------------------------===// |
8 | 9 | # Chapter 3 : GEMM 128x128x64 with Tensor Core |
|
24 | 25 | from mlir.extras import types as T |
25 | 26 | import numpy as np |
26 | 27 |
|
27 | | -dump_only = os.getenv("MLIR_RUN_CUDA_SM90_TESTS") != "1" |
28 | | - |
29 | 28 | def tma_load( |
30 | 29 | mbar_group: Mbarriers, |
31 | 30 | a_tma: TMA, |
@@ -61,7 +60,7 @@ def tma_load( |
61 | 60 | b_tma.load(b2, mbar_group[0], coords=[64, 0], predicate=p) |
62 | 61 |
|
63 | 62 |
|
64 | | -@NVDSL.mlir_func(dump_only) |
| 63 | +@NVDSL.mlir_func |
65 | 64 | def gemm_128_128_64(a, b, d): |
66 | 65 | token_ty = gpu.AsyncTokenType.get() |
67 | 66 | t1 = gpu.wait(token_ty, []) |
@@ -127,7 +126,7 @@ def gemm_tma_kernel(): |
127 | 126 | d = np.zeros((M, N), np.float32) |
128 | 127 | gemm_128_128_64(a, b, d) |
129 | 128 |
|
130 | | -if not dump_only: |
| 129 | +if os.getenv("MLIR_NVDSL_PRINT_IR") != "1": |
131 | 130 | # Verify MLIR program with reference computation in python |
132 | 131 | ref_d = a.astype(np.float16) @ b.astype(np.float16) |
133 | 132 | np.testing.assert_allclose(d, ref_d, rtol=5e-03, atol=1e-01) |
|
0 commit comments