[MLIR][GPU] Fix gpu.printf #121940

grypp · 2025-01-07T14:20:49Z

Following code fails with the error below. It looks like a parser error.

test_printf.mlir:5:9: error: expected ':'
%135 = arith.cmpi slt, %arg1, %c127_i32 : i32

func.func @test(%133 : i32, %arg1: i32, %c127_i32:i32) {
    %134 = llvm.bitcast %133 : i32 to f32
    gpu.printf "]"
    %135 = arith.cmpi slt, %arg1, %c127_i32 : i32
    scf.if %135 {
        gpu.printf ", "
    }
    ....
}

This PR attempts to fix that by changing the assembly format of printf

llvmbot · 2025-01-07T14:21:23Z

@llvm/pr-subscribers-mlir-spirv
@llvm/pr-subscribers-mlir-gpu

@llvm/pr-subscribers-mlir

Author: Guray Ozen (grypp)

Changes

Following code fails with the error below. It looks like a parser error.

test_printf.mlir:5:9: error: expected ':'
%135 = arith.cmpi slt, %arg1, %c127_i32 : i32

func.func @<!-- -->test(%133 : i32, %arg1: i32, %c127_i32:i32) {
    %134 = llvm.bitcast %133 : i32 to f32
    gpu.printf "]"
    %135 = arith.cmpi slt, %arg1, %c127_i32 : i32
    scf.if %135 {
        gpu.printf ", "
    }
    ....
}

This PR attempts to fix that by changing the assembly format of printf

Full diff: https://github.com/llvm/llvm-project/pull/121940.diff

17 Files Affected:

(modified) mlir/include/mlir/Dialect/GPU/IR/GPUOps.td (+1-1)
(modified) mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir (+1-1)
(modified) mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir (+1-1)
(modified) mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir (+1-1)
(modified) mlir/test/Conversion/GPUToSPIRV/printf.mlir (+1-1)
(modified) mlir/test/Dialect/GPU/indirect-device-func-call.mlir (+1-1)
(modified) mlir/test/Dialect/GPU/ops.mlir (+15-2)
(modified) mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir (+1-1)
(added) mlir/test/Dialect/GPU/test_printf.mlir (+12)
(modified) mlir/test/Integration/GPU/CUDA/assert.mlir (+2-2)
(modified) mlir/test/Integration/GPU/CUDA/printf.mlir (+1-1)
(modified) mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir (+1-1)
(modified) mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir (+5-5)
(modified) mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir (+3-3)
(modified) mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir (+4-4)
(modified) mlir/test/Integration/GPU/CUDA/sm90/transform-dialect/tma_load_64x8_8x128_noswizzle-transform.mlir (+2-2)
(modified) mlir/test/Integration/GPU/ROCM/printf.mlir (+1-1)

diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 42a017db300af6..3adfd5f4f2c436 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -1055,7 +1055,7 @@ def GPU_PrintfOp : GPU_Op<"printf", [MemoryEffects<[MemWrite]>]>,
     imposed by one's target platform.
   }];
   let assemblyFormat = [{
-    $format attr-dict ($args^ `:` type($args))?
+    $format attr-dict (`,` $args^ `:` type($args))?
   }];
 }
 
diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
index 318f0f78efa5b7..f52dd6c0d0ce30 100644
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@@ -633,7 +633,7 @@ gpu.module @test_module_29 {
     // CHECK-NEXT: %[[EL1:.*]] = llvm.getelementptr %[[ALLOC]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(i32, f64)>
     // CHECK-NEXT: llvm.store %[[EXT]], %[[EL1]] : f64, !llvm.ptr
     // CHECK-NEXT: llvm.call @vprintf(%[[FORMATSTART]], %[[ALLOC]]) : (!llvm.ptr, !llvm.ptr) -> i32
-    gpu.printf "Hello: %d\n" %arg0, %arg1 : i32, f32
+    gpu.printf "Hello: %d\n", %arg0, %arg1 : i32, f32
     gpu.return
   }
 }
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir
index 1b904fa142bad3..2dc6a5ab2a86ce 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir
@@ -36,7 +36,7 @@ gpu.module @test_module {
     // CHECK-NEXT: %[[NARGS1:.*]] = llvm.mlir.constant(1 : i32) : i32
     // CHECK-NEXT: %[[ARG0_64:.*]] = llvm.zext %[[ARG0]] : i32 to i64
     // CHECK-NEXT: %{{.*}} = llvm.call @__ockl_printf_append_args(%[[DESC1]], %[[NARGS1]], %[[ARG0_64]], %[[CST0]], %[[CST0]], %[[CST0]], %[[CST0]], %[[CST0]], %[[CST0]], %[[ISLAST]]) : (i64, i32, i64, i64, i64, i64, i64, i64, i64, i32) -> i64
-    gpu.printf "Hello: %d\n" %arg0 : i32
+    gpu.printf "Hello: %d\n", %arg0 : i32
     gpu.return
   }
 }
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir
index 870f5c5016ecef..00d1d7d8526809 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir
@@ -9,7 +9,7 @@ gpu.module @test_module {
     // CHECK: %[[IMM0:.*]] = llvm.mlir.addressof @[[$PRINT_GLOBAL]] : !llvm.ptr<4>
     // CHECK-NEXT: %[[IMM2:.*]] = llvm.getelementptr %[[IMM0]][0, 0] : (!llvm.ptr<4>) -> !llvm.ptr<4>, !llvm.array<11 x i8>
     // CHECK-NEXT: %{{.*}} = llvm.call @printf(%[[IMM2]], %[[ARG0]]) vararg(!llvm.func<i32 (ptr<4>, ...)>) : (!llvm.ptr<4>, i32) -> i32
-    gpu.printf "Hello: %d\n" %arg0 : i32
+    gpu.printf "Hello: %d\n", %arg0 : i32
     gpu.return
   }
 }
diff --git a/mlir/test/Conversion/GPUToSPIRV/printf.mlir b/mlir/test/Conversion/GPUToSPIRV/printf.mlir
index bc091124ea4c6f..7fe9752b088dba 100644
--- a/mlir/test/Conversion/GPUToSPIRV/printf.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/printf.mlir
@@ -62,7 +62,7 @@ module attributes {
         // CHECK: [[FMTSTR_ADDR:%.*]] = spirv.mlir.addressof [[PRINTMSG]] : !spirv.ptr<!spirv.array<[[ARRAYSIZE]] x i8>, UniformConstant>
         // CHECK-NEXT: [[FMTSTR_PTR1:%.*]] = spirv.Bitcast [[FMTSTR_ADDR]] : !spirv.ptr<!spirv.array<[[ARRAYSIZE]] x i8>, UniformConstant> to !spirv.ptr<i8, UniformConstant>
         // CHECK-NEXT:  {{%.*}} = spirv.CL.printf [[FMTSTR_PTR1]] {{%.*}}, {{%.*}}, {{%.*}} : !spirv.ptr<i8, UniformConstant>, i32, f32, i32 -> i32
-        gpu.printf "\nHello, world : %d %f \n Thread id: %d\n" %arg0, %arg1, %2: i32, f32, index
+        gpu.printf "\nHello, world : %d %f \n Thread id: %d\n", %arg0, %arg1, %2: i32, f32, index
 
         // CHECK: spirv.Return
         gpu.return
diff --git a/mlir/test/Dialect/GPU/indirect-device-func-call.mlir b/mlir/test/Dialect/GPU/indirect-device-func-call.mlir
index 91d7f1cd6c67d9..85805da3ac10e1 100644
--- a/mlir/test/Dialect/GPU/indirect-device-func-call.mlir
+++ b/mlir/test/Dialect/GPU/indirect-device-func-call.mlir
@@ -6,7 +6,7 @@ gpu.module @kernels {
     func.func @hello(%arg0 : f32) {
         %tid_x = gpu.thread_id x
         %csti8 = arith.constant 2 : i8
-        gpu.printf "Hello from %lld, %d, %f\n" %tid_x, %csti8, %arg0  : index, i8, f32
+        gpu.printf "Hello from %lld, %d, %f\n", %tid_x, %csti8, %arg0  : index, i8, f32
         return
     }
     // CHECK-LABEL: @hello_indirect
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index c0ff2044b76c40..99915c493ea465 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -229,9 +229,22 @@ module attributes {gpu.container_module} {
 
     // CHECK-LABEL: gpu.func @printf_test
     // CHECK: (%[[ARG0:.*]]: i32)
-    // CHECK: gpu.printf "Value: %d" %[[ARG0]] : i32
+    // CHECK: gpu.printf "Value: %d", %[[ARG0]] : i32
     gpu.func @printf_test(%arg0 : i32) {
-      gpu.printf "Value: %d" %arg0 : i32
+      gpu.printf "Value: %d", %arg0 : i32
+      gpu.return
+    }
+
+    // CHECK-LABEL: gpu.func @printf_empty
+    // CHECK: gpu.printf  "]"
+    // CHECK: scf.if
+    // CHECK: gpu.printf ", "
+    gpu.func @printf_empty(%arg0 : i32) {
+      gpu.printf "]"
+      %1 = arith.cmpi slt, %arg0, %arg0 : i32
+      scf.if %1 {
+        gpu.printf ", "
+      } 
       gpu.return
     }
 
diff --git a/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir b/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir
index 732f40c4333df2..f02b26dba97d59 100644
--- a/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir
+++ b/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir
@@ -23,7 +23,7 @@ func.func @test_math(%arg0 : f32) {
         threads(%6, %7, %8) in (%9 = %c2, %10 = %c1, %11 = %c1) { 
         // CHECK-NVVM: __nv_expf 
         %s1 = math.exp %arg0 : f32
-        gpu.printf "%f" %s1 : f32
+        gpu.printf "%f", %s1 : f32
         gpu.terminator
     }
     return
diff --git a/mlir/test/Dialect/GPU/test_printf.mlir b/mlir/test/Dialect/GPU/test_printf.mlir
new file mode 100644
index 00000000000000..2a332530355d48
--- /dev/null
+++ b/mlir/test/Dialect/GPU/test_printf.mlir
@@ -0,0 +1,12 @@
+func.func @gemm_no_scf_sm100_1cta(%133 : i32, %arg1: i32, %c127_i32:i32) {
+    %134 = llvm.bitcast %133 : i32 to f32        
+    gpu.printf "]"
+    %135 = arith.cmpi slt, %arg1, %c127_i32 : i32
+    scf.if %135 {
+        gpu.printf ", "
+    } 
+
+    %0 = gpu.thread_id x
+    gpu.printf "Hello from %d\n", %0 : index
+    func.return
+}
diff --git a/mlir/test/Integration/GPU/CUDA/assert.mlir b/mlir/test/Integration/GPU/CUDA/assert.mlir
index 06a9c1ca0d114b..3d6527fe59b2c1 100644
--- a/mlir/test/Integration/GPU/CUDA/assert.mlir
+++ b/mlir/test/Integration/GPU/CUDA/assert.mlir
@@ -16,10 +16,10 @@ gpu.module @kernels {
 gpu.func @test_assert(%c0: i1, %c1: i1) kernel {
   %0 = gpu.thread_id x
   cf.assert %c1, "passing assertion"
-  gpu.printf "thread %lld: print after passing assertion\n" %0 : index
+  gpu.printf "thread %lld: print after passing assertion\n", %0 : index
   // Test callsite(callsite(name)) location.
   cf.assert %c0, "failing assertion" loc(callsite(callsite("callee_func_name"("callee_file.cc":7:9) at "caller_file.cc":10:8) at "caller2_file.cc":11:12))
-  gpu.printf "thread %lld: print after failing assertion\n" %0 : index
+  gpu.printf "thread %lld: print after failing assertion\n", %0 : index
   gpu.return
 }
 }
diff --git a/mlir/test/Integration/GPU/CUDA/printf.mlir b/mlir/test/Integration/GPU/CUDA/printf.mlir
index 99ea1208e9c5e7..15b0bf02d911a5 100644
--- a/mlir/test/Integration/GPU/CUDA/printf.mlir
+++ b/mlir/test/Integration/GPU/CUDA/printf.mlir
@@ -14,7 +14,7 @@ module attributes {gpu.container_module} {
             %0 = gpu.thread_id x
             %csti8 = arith.constant 2 : i8
             %cstf32 = arith.constant 3.0 : f32
-            gpu.printf "Hello from %lld, %d, %f\n" %0, %csti8, %cstf32  : index, i8, f32
+            gpu.printf "Hello from %lld, %d, %f\n", %0, %csti8, %cstf32  : index, i8, f32
             gpu.return
         }
     }
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir b/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir
index c70c940564a264..a22a34b9393a3f 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir
@@ -43,7 +43,7 @@ module attributes {gpu.container_module} {
       %cnd2 =  arith.cmpi eq, %bidY, %c3 : index
       scf.if %cnd1 {
         scf.if %cnd2 {
-          gpu.printf "clusterIdx: (%d, %d, %d) in Cluster Dimension: (%d, %d, %d) blockIdx: (%d, %d, %d) \n" 
+          gpu.printf "clusterIdx: (%d, %d, %d) in Cluster Dimension: (%d, %d, %d) blockIdx: (%d, %d, %d) \n",
             %cidX_i32,
             %cidY_i32,
             %cidZ_i32,
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir
index b50772f8249fb7..95bde40deb48ee 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir
@@ -85,7 +85,7 @@ module @mymod {
       
       // Step 7. First thread does TMA load
       scf.if %10 {
-        gpu.printf "[GPU] TMA SIZE %d\0A" %c8192 : index
+        gpu.printf "[GPU] TMA SIZE %d\0A", %c8192 : index
         nvgpu.tma.async.load %3[%c0, %c0], %9[%c0] to %7 : !lhsTensorMap, !barrierType -> !shmemlhs
         nvgpu.mbarrier.arrive.expect_tx %9[%c0], %c8192 : !barrierType
       } else {
@@ -98,16 +98,16 @@ module @mymod {
 
       // Step 9. Print loaded data in 128b swizzled
       scf.if %10 {        
-        gpu.printf "===--- Matrix A ---=== %d \0A" %c-1_i32 : i32
+        gpu.printf "===--- Matrix A ---=== %d \0A", %c-1_i32 : i32
         scf.for %arg12 = %c0 to %c128 step %c1 {
           scf.for %arg13 = %c0 to %c64 step %c1 {
             %15 = memref.load %7[%arg12, %arg13] : !shmemlhs
             %16 = arith.extf %15 : f16 to f32
-            gpu.printf "%.0f,   " %16 : f32
+            gpu.printf "%.0f,   ", %16 : f32
           }
-          gpu.printf "%d\0A" %c-1_i32 : i32
+          gpu.printf "%d\0A", %c-1_i32 : i32
         }
-        gpu.printf "===----------------=== %d \0A" %c-1_i32 : i32
+        gpu.printf "===----------------=== %d \0A", %c-1_i32 : i32
       }
       gpu.terminator
     }
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir
index 65e5fc0aff6aa3..fce16f3df23686 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir
@@ -109,7 +109,7 @@ module @mymod {
       
       // Step 6. First thread does TMA load
       scf.if %10 {
-        gpu.printf "[GPU] TMA SIZE %d\0A" %c32768 : index
+        gpu.printf "[GPU] TMA SIZE %d\0A", %c32768 : index
         nvgpu.tma.async.load %d_lhsTensorMap[%c0, %c0], %9[%c0] to %lhsShmem : !lhsTensorMap, !barrierType -> !shmemlhs
         nvgpu.tma.async.load %d_rhsTensorMap[%c0, %c0], %9[%c0] to %rhsShmem1 : !rhsTensorMap, !barrierType -> memref<64x64xf16, strided<[128, 1]>, 3>
         nvgpu.tma.async.load %d_rhsTensorMap[%c64, %c0], %9[%c0] to %rhsShmem2 : !rhsTensorMap, !barrierType -> memref<64x64xf16, strided<[128, 1], offset: 4096>, 3>
@@ -124,7 +124,7 @@ module @mymod {
 
       // Step 8. Print loaded data in 128b swizzled
       scf.if %10 {        
-        gpu.printf "===--- Matrix B ---=== %d \n" %c-1_i32 : i32
+        gpu.printf "===--- Matrix B ---=== %d \n", %c-1_i32 : i32
         scf.for %ii = %c0 to %c64 step %c1 {
           scf.for %j = %c0 to %c128 step %c1 {
             %lhs0 = memref.load %rhsShmem[%ii, %j] : !shmemrhs
@@ -133,7 +133,7 @@ module @mymod {
           }
           gpu.printf "%d\n" %c-1_i32 : i32
         }
-        gpu.printf "===----------------=== %d \n" %c-1_i32 : i32
+        gpu.printf "===----------------=== %d \n", %c-1_i32 : i32
       }
       gpu.barrier
       gpu.terminator
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir
index 391fda82e1e197..acca9811f5702e 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir
@@ -80,8 +80,8 @@ module @mymod {
         nvgpu.mbarrier.arrive.expect_tx %9[%c0], %c6144 : <memorySpace = #gpu.address_space<workgroup>>
         %11 = memref.load %7[%c0, %c0] : memref<64x8xf32, 3>
         %12 = memref.load %8[%c0, %c0] : memref<8x128xf32, 3>
-        gpu.printf "[GPU] TMA BEFORE lhs[45][7] %f\0A" %11 : f32
-        gpu.printf "[GPU] TMA BEFORE rhs[7][0] %f\0A" %12 : f32
+        gpu.printf "[GPU] TMA BEFORE lhs[45][7] %f\0A", %11 : f32
+        gpu.printf "[GPU] TMA BEFORE rhs[7][0] %f\0A", %12 : f32
         nvgpu.tma.async.load %3[%c0, %c0], %9[%c0] to %7 : <tensor = memref<64x8xf32, 3>, swizzle = none, l2promo = none, oob = zero, interleave = none>, <memorySpace = #gpu.address_space<workgroup>> -> memref<64x8xf32, 3>
         nvgpu.tma.async.load %4[%c0, %c0], %9[%c0] to %8 : <tensor = memref<8x128xf32, 3>, swizzle = none, l2promo = none, oob = zero, interleave = none>, <memorySpace = #gpu.address_space<workgroup>> -> memref<8x128xf32, 3>
       } else {
@@ -92,8 +92,8 @@ module @mymod {
       scf.if %10 {
         %11 = memref.load %7[%c45, %c7] : memref<64x8xf32, 3>
         %12 = memref.load %8[%c7, %c0] : memref<8x128xf32, 3>
-        gpu.printf "[GPU] TMA LOADED lhs[45][7] %f\0A" %11 : f32
-        gpu.printf "[GPU] TMA LOADED rhs[7][0] %f\0A" %12 : f32
+        gpu.printf "[GPU] TMA LOADED lhs[45][7] %f\0A", %11 : f32
+        gpu.printf "[GPU] TMA LOADED rhs[7][0] %f\0A", %12 : f32
       }
       gpu.terminator
     }
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/transform-dialect/tma_load_64x8_8x128_noswizzle-transform.mlir b/mlir/test/Integration/GPU/CUDA/sm90/transform-dialect/tma_load_64x8_8x128_noswizzle-transform.mlir
index f83f65bb2963ca..fe6c645357ecb3 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/transform-dialect/tma_load_64x8_8x128_noswizzle-transform.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/transform-dialect/tma_load_64x8_8x128_noswizzle-transform.mlir
@@ -96,8 +96,8 @@ func.func @main() {
     scf.if %10 {
       %11 = memref.load %out[%c45, %c7] : memref<64x8xf32, 3>
       %12 = memref.load %out_1[%c7, %c0] : memref<8x128xf32, 3>
-      gpu.printf "[GPU] TMA LOADED lhs[45][7] %f\0A" %11 : f32
-      gpu.printf "[GPU] TMA LOADED rhs[7][0] %f\0A" %12 : f32
+      gpu.printf "[GPU] TMA LOADED lhs[45][7] %f\0A", %11 : f32
+      gpu.printf "[GPU] TMA LOADED rhs[7][0] %f\0A", %12 : f32
     }
     gpu.terminator
   }
diff --git a/mlir/test/Integration/GPU/ROCM/printf.mlir b/mlir/test/Integration/GPU/ROCM/printf.mlir
index d5e6e3757540b2..4a0e4d34bfab5e 100644
--- a/mlir/test/Integration/GPU/ROCM/printf.mlir
+++ b/mlir/test/Integration/GPU/ROCM/printf.mlir
@@ -13,7 +13,7 @@ module attributes {gpu.container_module} {
     gpu.module @kernels {
         gpu.func @hello() kernel {
             %0 = gpu.thread_id x
-            gpu.printf "Hello from %d\n" %0 : index
+            gpu.printf "Hello from %d\n", %0 : index
             gpu.return
         }
     }

kuhar

Seems reasonable

@test

Following code fails with the error below. It looks like a parser error. This PR attemps to fix that. ``` test_printf.mlir:5:9: error: expected ':' %135 = arith.cmpi slt, %arg1, %c127_i32 : i32 ``` ``` func.func @test(%133 : i32, %arg1: i32, %c127_i32:i32) { %134 = llvm.bitcast %133 : i32 to f32 gpu.printf "]" %135 = arith.cmpi slt, %arg1, %c127_i32 : i32 scf.if %135 { gpu.printf ", " } .... } ```

llvm-ci · 2025-01-08T07:34:08Z

LLVM Buildbot has detected a new failure on builder openmp-offload-sles-build-only running on rocm-worker-hw-04-sles while building mlir at step 8 "Add check check-llvm".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/140/builds/14235

Here is the relevant piece of the build log for the reference

Step 8 (Add check check-llvm) failure: test (failure)
******************** TEST 'LLVM :: ExecutionEngine/JITLink/x86-64/ELF_R_X86_64_16.s' FAILED ********************
Exit Code: 134

Command Output (stderr):
--
RUN: at line 1: /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-mc -triple=x86_64-unknown-linux -position-independent      -filetype=obj -o /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/test/ExecutionEngine/JITLink/x86-64/Output/ELF_R_X86_64_16.s.tmp.o /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_R_X86_64_16.s
+ /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-mc -triple=x86_64-unknown-linux -position-independent -filetype=obj -o /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/test/ExecutionEngine/JITLink/x86-64/Output/ELF_R_X86_64_16.s.tmp.o /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_R_X86_64_16.s
RUN: at line 3: /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-jitlink -noexec -abs X=0x1234 -check=/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_R_X86_64_16.s /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/test/ExecutionEngine/JITLink/x86-64/Output/ELF_R_X86_64_16.s.tmp.o
+ /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-jitlink -noexec -abs X=0x1234 -check=/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_R_X86_64_16.s /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/test/ExecutionEngine/JITLink/x86-64/Output/ELF_R_X86_64_16.s.tmp.o
llvm-jitlink error: Resource tracker 0x484aee0 became defunct
llvm-jitlink: /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h:285: llvm::orc::SymbolStringPool::~SymbolStringPool(): Assertion `Pool.empty() && "Dangling references at pool destruction time"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-jitlink -noexec -abs X=0x1234 -check=/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_R_X86_64_16.s /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/test/ExecutionEngine/JITLink/x86-64/Output/ELF_R_X86_64_16.s.tmp.o
 #0 0x0000000000d18ea8 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-jitlink+0xd18ea8)
 #1 0x0000000000d163fc SignalHandler(int) Signals.cpp:0:0
 #2 0x00007f07ea82d910 __restore_rt (/lib64/libpthread.so.0+0x16910)
 #3 0x00007f07ea15bd2b raise (/lib64/libc.so.6+0x4ad2b)
 #4 0x00007f07ea15d3e5 abort (/lib64/libc.so.6+0x4c3e5)
 #5 0x00007f07ea153c6a __assert_fail_base (/lib64/libc.so.6+0x42c6a)
 #6 0x00007f07ea153cf2 (/lib64/libc.so.6+0x42cf2)
 #7 0x000000000066b25b (/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-jitlink+0x66b25b)
 #8 0x0000000000be4681 llvm::orc::ExecutorProcessControl::~ExecutorProcessControl() (/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-jitlink+0xbe4681)
 #9 0x0000000000be49a3 llvm::orc::SelfExecutorProcessControl::~SelfExecutorProcessControl() (/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-jitlink+0xbe49a3)
#10 0x0000000000b0e6e9 llvm::orc::ExecutionSession::~ExecutionSession() (/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-jitlink+0xb0e6e9)
#11 0x0000000000667cf3 llvm::Session::~Session() (/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-jitlink+0x667cf3)
#12 0x000000000062e729 main (/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-jitlink+0x62e729)
#13 0x00007f07ea14624d __libc_start_main (/lib64/libc.so.6+0x3524d)
#14 0x000000000065ceea _start /home/abuild/rpmbuild/BUILD/glibc-2.31/csu/../sysdeps/x86_64/start.S:122:0
/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/test/ExecutionEngine/JITLink/x86-64/Output/ELF_R_X86_64_16.s.script: line 3: 2781911 Aborted                 (core dumped) /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-jitlink -noexec -abs X=0x1234 -check=/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_R_X86_64_16.s /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/test/ExecutionEngine/JITLink/x86-64/Output/ELF_R_X86_64_16.s.tmp.o

--

********************

grypp requested review from antiagainst and kuhar as code owners January 7, 2025 14:20

grypp requested review from matthias-springer and removed request for antiagainst and kuhar January 7, 2025 14:20

llvmbot added mlir:gpu mlir:spirv mlir labels Jan 7, 2025

grypp requested review from Groverkss and kuhar January 7, 2025 14:21

matthias-springer approved these changes Jan 7, 2025

View reviewed changes

kuhar approved these changes Jan 7, 2025

View reviewed changes

grypp force-pushed the fix-if branch from 860ed03 to 0a66ba2 Compare January 7, 2025 16:53

grypp merged commit f50f969 into llvm:main Jan 8, 2025
8 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[MLIR][GPU] Fix gpu.printf #121940

[MLIR][GPU] Fix gpu.printf #121940

Uh oh!

grypp commented Jan 7, 2025

Uh oh!

llvmbot commented Jan 7, 2025 •

edited

Loading

Uh oh!

kuhar left a comment

Uh oh!

Uh oh!

llvm-ci commented Jan 8, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

[MLIR][GPU] Fix gpu.printf #121940

[MLIR][GPU] Fix gpu.printf #121940

Uh oh!

Conversation

grypp commented Jan 7, 2025

Uh oh!

llvmbot commented Jan 7, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

kuhar left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented Jan 8, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

llvmbot commented Jan 7, 2025 •

edited

Loading