-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[MLIR][GPU] Fix gpu.printf #121940
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MLIR][GPU] Fix gpu.printf #121940
Conversation
|
@llvm/pr-subscribers-mlir-spirv @llvm/pr-subscribers-mlir Author: Guray Ozen (grypp) ChangesFollowing code fails with the error below. It looks like a parser error. This PR attempts to fix that by changing the assembly format of Full diff: https://github.com/llvm/llvm-project/pull/121940.diff 17 Files Affected:
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 42a017db300af6..3adfd5f4f2c436 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -1055,7 +1055,7 @@ def GPU_PrintfOp : GPU_Op<"printf", [MemoryEffects<[MemWrite]>]>,
imposed by one's target platform.
}];
let assemblyFormat = [{
- $format attr-dict ($args^ `:` type($args))?
+ $format attr-dict (`,` $args^ `:` type($args))?
}];
}
diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
index 318f0f78efa5b7..f52dd6c0d0ce30 100644
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@@ -633,7 +633,7 @@ gpu.module @test_module_29 {
// CHECK-NEXT: %[[EL1:.*]] = llvm.getelementptr %[[ALLOC]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(i32, f64)>
// CHECK-NEXT: llvm.store %[[EXT]], %[[EL1]] : f64, !llvm.ptr
// CHECK-NEXT: llvm.call @vprintf(%[[FORMATSTART]], %[[ALLOC]]) : (!llvm.ptr, !llvm.ptr) -> i32
- gpu.printf "Hello: %d\n" %arg0, %arg1 : i32, f32
+ gpu.printf "Hello: %d\n", %arg0, %arg1 : i32, f32
gpu.return
}
}
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir
index 1b904fa142bad3..2dc6a5ab2a86ce 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir
@@ -36,7 +36,7 @@ gpu.module @test_module {
// CHECK-NEXT: %[[NARGS1:.*]] = llvm.mlir.constant(1 : i32) : i32
// CHECK-NEXT: %[[ARG0_64:.*]] = llvm.zext %[[ARG0]] : i32 to i64
// CHECK-NEXT: %{{.*}} = llvm.call @__ockl_printf_append_args(%[[DESC1]], %[[NARGS1]], %[[ARG0_64]], %[[CST0]], %[[CST0]], %[[CST0]], %[[CST0]], %[[CST0]], %[[CST0]], %[[ISLAST]]) : (i64, i32, i64, i64, i64, i64, i64, i64, i64, i32) -> i64
- gpu.printf "Hello: %d\n" %arg0 : i32
+ gpu.printf "Hello: %d\n", %arg0 : i32
gpu.return
}
}
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir
index 870f5c5016ecef..00d1d7d8526809 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir
@@ -9,7 +9,7 @@ gpu.module @test_module {
// CHECK: %[[IMM0:.*]] = llvm.mlir.addressof @[[$PRINT_GLOBAL]] : !llvm.ptr<4>
// CHECK-NEXT: %[[IMM2:.*]] = llvm.getelementptr %[[IMM0]][0, 0] : (!llvm.ptr<4>) -> !llvm.ptr<4>, !llvm.array<11 x i8>
// CHECK-NEXT: %{{.*}} = llvm.call @printf(%[[IMM2]], %[[ARG0]]) vararg(!llvm.func<i32 (ptr<4>, ...)>) : (!llvm.ptr<4>, i32) -> i32
- gpu.printf "Hello: %d\n" %arg0 : i32
+ gpu.printf "Hello: %d\n", %arg0 : i32
gpu.return
}
}
diff --git a/mlir/test/Conversion/GPUToSPIRV/printf.mlir b/mlir/test/Conversion/GPUToSPIRV/printf.mlir
index bc091124ea4c6f..7fe9752b088dba 100644
--- a/mlir/test/Conversion/GPUToSPIRV/printf.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/printf.mlir
@@ -62,7 +62,7 @@ module attributes {
// CHECK: [[FMTSTR_ADDR:%.*]] = spirv.mlir.addressof [[PRINTMSG]] : !spirv.ptr<!spirv.array<[[ARRAYSIZE]] x i8>, UniformConstant>
// CHECK-NEXT: [[FMTSTR_PTR1:%.*]] = spirv.Bitcast [[FMTSTR_ADDR]] : !spirv.ptr<!spirv.array<[[ARRAYSIZE]] x i8>, UniformConstant> to !spirv.ptr<i8, UniformConstant>
// CHECK-NEXT: {{%.*}} = spirv.CL.printf [[FMTSTR_PTR1]] {{%.*}}, {{%.*}}, {{%.*}} : !spirv.ptr<i8, UniformConstant>, i32, f32, i32 -> i32
- gpu.printf "\nHello, world : %d %f \n Thread id: %d\n" %arg0, %arg1, %2: i32, f32, index
+ gpu.printf "\nHello, world : %d %f \n Thread id: %d\n", %arg0, %arg1, %2: i32, f32, index
// CHECK: spirv.Return
gpu.return
diff --git a/mlir/test/Dialect/GPU/indirect-device-func-call.mlir b/mlir/test/Dialect/GPU/indirect-device-func-call.mlir
index 91d7f1cd6c67d9..85805da3ac10e1 100644
--- a/mlir/test/Dialect/GPU/indirect-device-func-call.mlir
+++ b/mlir/test/Dialect/GPU/indirect-device-func-call.mlir
@@ -6,7 +6,7 @@ gpu.module @kernels {
func.func @hello(%arg0 : f32) {
%tid_x = gpu.thread_id x
%csti8 = arith.constant 2 : i8
- gpu.printf "Hello from %lld, %d, %f\n" %tid_x, %csti8, %arg0 : index, i8, f32
+ gpu.printf "Hello from %lld, %d, %f\n", %tid_x, %csti8, %arg0 : index, i8, f32
return
}
// CHECK-LABEL: @hello_indirect
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index c0ff2044b76c40..99915c493ea465 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -229,9 +229,22 @@ module attributes {gpu.container_module} {
// CHECK-LABEL: gpu.func @printf_test
// CHECK: (%[[ARG0:.*]]: i32)
- // CHECK: gpu.printf "Value: %d" %[[ARG0]] : i32
+ // CHECK: gpu.printf "Value: %d", %[[ARG0]] : i32
gpu.func @printf_test(%arg0 : i32) {
- gpu.printf "Value: %d" %arg0 : i32
+ gpu.printf "Value: %d", %arg0 : i32
+ gpu.return
+ }
+
+ // CHECK-LABEL: gpu.func @printf_empty
+ // CHECK: gpu.printf "]"
+ // CHECK: scf.if
+ // CHECK: gpu.printf ", "
+ gpu.func @printf_empty(%arg0 : i32) {
+ gpu.printf "]"
+ %1 = arith.cmpi slt, %arg0, %arg0 : i32
+ scf.if %1 {
+ gpu.printf ", "
+ }
gpu.return
}
diff --git a/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir b/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir
index 732f40c4333df2..f02b26dba97d59 100644
--- a/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir
+++ b/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir
@@ -23,7 +23,7 @@ func.func @test_math(%arg0 : f32) {
threads(%6, %7, %8) in (%9 = %c2, %10 = %c1, %11 = %c1) {
// CHECK-NVVM: __nv_expf
%s1 = math.exp %arg0 : f32
- gpu.printf "%f" %s1 : f32
+ gpu.printf "%f", %s1 : f32
gpu.terminator
}
return
diff --git a/mlir/test/Dialect/GPU/test_printf.mlir b/mlir/test/Dialect/GPU/test_printf.mlir
new file mode 100644
index 00000000000000..2a332530355d48
--- /dev/null
+++ b/mlir/test/Dialect/GPU/test_printf.mlir
@@ -0,0 +1,12 @@
+func.func @gemm_no_scf_sm100_1cta(%133 : i32, %arg1: i32, %c127_i32:i32) {
+ %134 = llvm.bitcast %133 : i32 to f32
+ gpu.printf "]"
+ %135 = arith.cmpi slt, %arg1, %c127_i32 : i32
+ scf.if %135 {
+ gpu.printf ", "
+ }
+
+ %0 = gpu.thread_id x
+ gpu.printf "Hello from %d\n", %0 : index
+ func.return
+}
diff --git a/mlir/test/Integration/GPU/CUDA/assert.mlir b/mlir/test/Integration/GPU/CUDA/assert.mlir
index 06a9c1ca0d114b..3d6527fe59b2c1 100644
--- a/mlir/test/Integration/GPU/CUDA/assert.mlir
+++ b/mlir/test/Integration/GPU/CUDA/assert.mlir
@@ -16,10 +16,10 @@ gpu.module @kernels {
gpu.func @test_assert(%c0: i1, %c1: i1) kernel {
%0 = gpu.thread_id x
cf.assert %c1, "passing assertion"
- gpu.printf "thread %lld: print after passing assertion\n" %0 : index
+ gpu.printf "thread %lld: print after passing assertion\n", %0 : index
// Test callsite(callsite(name)) location.
cf.assert %c0, "failing assertion" loc(callsite(callsite("callee_func_name"("callee_file.cc":7:9) at "caller_file.cc":10:8) at "caller2_file.cc":11:12))
- gpu.printf "thread %lld: print after failing assertion\n" %0 : index
+ gpu.printf "thread %lld: print after failing assertion\n", %0 : index
gpu.return
}
}
diff --git a/mlir/test/Integration/GPU/CUDA/printf.mlir b/mlir/test/Integration/GPU/CUDA/printf.mlir
index 99ea1208e9c5e7..15b0bf02d911a5 100644
--- a/mlir/test/Integration/GPU/CUDA/printf.mlir
+++ b/mlir/test/Integration/GPU/CUDA/printf.mlir
@@ -14,7 +14,7 @@ module attributes {gpu.container_module} {
%0 = gpu.thread_id x
%csti8 = arith.constant 2 : i8
%cstf32 = arith.constant 3.0 : f32
- gpu.printf "Hello from %lld, %d, %f\n" %0, %csti8, %cstf32 : index, i8, f32
+ gpu.printf "Hello from %lld, %d, %f\n", %0, %csti8, %cstf32 : index, i8, f32
gpu.return
}
}
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir b/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir
index c70c940564a264..a22a34b9393a3f 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir
@@ -43,7 +43,7 @@ module attributes {gpu.container_module} {
%cnd2 = arith.cmpi eq, %bidY, %c3 : index
scf.if %cnd1 {
scf.if %cnd2 {
- gpu.printf "clusterIdx: (%d, %d, %d) in Cluster Dimension: (%d, %d, %d) blockIdx: (%d, %d, %d) \n"
+ gpu.printf "clusterIdx: (%d, %d, %d) in Cluster Dimension: (%d, %d, %d) blockIdx: (%d, %d, %d) \n",
%cidX_i32,
%cidY_i32,
%cidZ_i32,
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir
index b50772f8249fb7..95bde40deb48ee 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir
@@ -85,7 +85,7 @@ module @mymod {
// Step 7. First thread does TMA load
scf.if %10 {
- gpu.printf "[GPU] TMA SIZE %d\0A" %c8192 : index
+ gpu.printf "[GPU] TMA SIZE %d\0A", %c8192 : index
nvgpu.tma.async.load %3[%c0, %c0], %9[%c0] to %7 : !lhsTensorMap, !barrierType -> !shmemlhs
nvgpu.mbarrier.arrive.expect_tx %9[%c0], %c8192 : !barrierType
} else {
@@ -98,16 +98,16 @@ module @mymod {
// Step 9. Print loaded data in 128b swizzled
scf.if %10 {
- gpu.printf "===--- Matrix A ---=== %d \0A" %c-1_i32 : i32
+ gpu.printf "===--- Matrix A ---=== %d \0A", %c-1_i32 : i32
scf.for %arg12 = %c0 to %c128 step %c1 {
scf.for %arg13 = %c0 to %c64 step %c1 {
%15 = memref.load %7[%arg12, %arg13] : !shmemlhs
%16 = arith.extf %15 : f16 to f32
- gpu.printf "%.0f, " %16 : f32
+ gpu.printf "%.0f, ", %16 : f32
}
- gpu.printf "%d\0A" %c-1_i32 : i32
+ gpu.printf "%d\0A", %c-1_i32 : i32
}
- gpu.printf "===----------------=== %d \0A" %c-1_i32 : i32
+ gpu.printf "===----------------=== %d \0A", %c-1_i32 : i32
}
gpu.terminator
}
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir
index 65e5fc0aff6aa3..fce16f3df23686 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir
@@ -109,7 +109,7 @@ module @mymod {
// Step 6. First thread does TMA load
scf.if %10 {
- gpu.printf "[GPU] TMA SIZE %d\0A" %c32768 : index
+ gpu.printf "[GPU] TMA SIZE %d\0A", %c32768 : index
nvgpu.tma.async.load %d_lhsTensorMap[%c0, %c0], %9[%c0] to %lhsShmem : !lhsTensorMap, !barrierType -> !shmemlhs
nvgpu.tma.async.load %d_rhsTensorMap[%c0, %c0], %9[%c0] to %rhsShmem1 : !rhsTensorMap, !barrierType -> memref<64x64xf16, strided<[128, 1]>, 3>
nvgpu.tma.async.load %d_rhsTensorMap[%c64, %c0], %9[%c0] to %rhsShmem2 : !rhsTensorMap, !barrierType -> memref<64x64xf16, strided<[128, 1], offset: 4096>, 3>
@@ -124,7 +124,7 @@ module @mymod {
// Step 8. Print loaded data in 128b swizzled
scf.if %10 {
- gpu.printf "===--- Matrix B ---=== %d \n" %c-1_i32 : i32
+ gpu.printf "===--- Matrix B ---=== %d \n", %c-1_i32 : i32
scf.for %ii = %c0 to %c64 step %c1 {
scf.for %j = %c0 to %c128 step %c1 {
%lhs0 = memref.load %rhsShmem[%ii, %j] : !shmemrhs
@@ -133,7 +133,7 @@ module @mymod {
}
gpu.printf "%d\n" %c-1_i32 : i32
}
- gpu.printf "===----------------=== %d \n" %c-1_i32 : i32
+ gpu.printf "===----------------=== %d \n", %c-1_i32 : i32
}
gpu.barrier
gpu.terminator
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir
index 391fda82e1e197..acca9811f5702e 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir
@@ -80,8 +80,8 @@ module @mymod {
nvgpu.mbarrier.arrive.expect_tx %9[%c0], %c6144 : <memorySpace = #gpu.address_space<workgroup>>
%11 = memref.load %7[%c0, %c0] : memref<64x8xf32, 3>
%12 = memref.load %8[%c0, %c0] : memref<8x128xf32, 3>
- gpu.printf "[GPU] TMA BEFORE lhs[45][7] %f\0A" %11 : f32
- gpu.printf "[GPU] TMA BEFORE rhs[7][0] %f\0A" %12 : f32
+ gpu.printf "[GPU] TMA BEFORE lhs[45][7] %f\0A", %11 : f32
+ gpu.printf "[GPU] TMA BEFORE rhs[7][0] %f\0A", %12 : f32
nvgpu.tma.async.load %3[%c0, %c0], %9[%c0] to %7 : <tensor = memref<64x8xf32, 3>, swizzle = none, l2promo = none, oob = zero, interleave = none>, <memorySpace = #gpu.address_space<workgroup>> -> memref<64x8xf32, 3>
nvgpu.tma.async.load %4[%c0, %c0], %9[%c0] to %8 : <tensor = memref<8x128xf32, 3>, swizzle = none, l2promo = none, oob = zero, interleave = none>, <memorySpace = #gpu.address_space<workgroup>> -> memref<8x128xf32, 3>
} else {
@@ -92,8 +92,8 @@ module @mymod {
scf.if %10 {
%11 = memref.load %7[%c45, %c7] : memref<64x8xf32, 3>
%12 = memref.load %8[%c7, %c0] : memref<8x128xf32, 3>
- gpu.printf "[GPU] TMA LOADED lhs[45][7] %f\0A" %11 : f32
- gpu.printf "[GPU] TMA LOADED rhs[7][0] %f\0A" %12 : f32
+ gpu.printf "[GPU] TMA LOADED lhs[45][7] %f\0A", %11 : f32
+ gpu.printf "[GPU] TMA LOADED rhs[7][0] %f\0A", %12 : f32
}
gpu.terminator
}
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/transform-dialect/tma_load_64x8_8x128_noswizzle-transform.mlir b/mlir/test/Integration/GPU/CUDA/sm90/transform-dialect/tma_load_64x8_8x128_noswizzle-transform.mlir
index f83f65bb2963ca..fe6c645357ecb3 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/transform-dialect/tma_load_64x8_8x128_noswizzle-transform.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/transform-dialect/tma_load_64x8_8x128_noswizzle-transform.mlir
@@ -96,8 +96,8 @@ func.func @main() {
scf.if %10 {
%11 = memref.load %out[%c45, %c7] : memref<64x8xf32, 3>
%12 = memref.load %out_1[%c7, %c0] : memref<8x128xf32, 3>
- gpu.printf "[GPU] TMA LOADED lhs[45][7] %f\0A" %11 : f32
- gpu.printf "[GPU] TMA LOADED rhs[7][0] %f\0A" %12 : f32
+ gpu.printf "[GPU] TMA LOADED lhs[45][7] %f\0A", %11 : f32
+ gpu.printf "[GPU] TMA LOADED rhs[7][0] %f\0A", %12 : f32
}
gpu.terminator
}
diff --git a/mlir/test/Integration/GPU/ROCM/printf.mlir b/mlir/test/Integration/GPU/ROCM/printf.mlir
index d5e6e3757540b2..4a0e4d34bfab5e 100644
--- a/mlir/test/Integration/GPU/ROCM/printf.mlir
+++ b/mlir/test/Integration/GPU/ROCM/printf.mlir
@@ -13,7 +13,7 @@ module attributes {gpu.container_module} {
gpu.module @kernels {
gpu.func @hello() kernel {
%0 = gpu.thread_id x
- gpu.printf "Hello from %d\n" %0 : index
+ gpu.printf "Hello from %d\n", %0 : index
gpu.return
}
}
|
kuhar
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems reasonable
Following code fails with the error below. It looks like a parser error. This PR attemps to fix that. ``` test_printf.mlir:5:9: error: expected ':' %135 = arith.cmpi slt, %arg1, %c127_i32 : i32 ``` ``` func.func @test(%133 : i32, %arg1: i32, %c127_i32:i32) { %134 = llvm.bitcast %133 : i32 to f32 gpu.printf "]" %135 = arith.cmpi slt, %arg1, %c127_i32 : i32 scf.if %135 { gpu.printf ", " } .... } ```
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/140/builds/14235 Here is the relevant piece of the build log for the reference |
Following code fails with the error below. It looks like a parser error.
This PR attempts to fix that by changing the assembly format of
printf