Skip to content

Conversation

@grypp
Copy link
Member

@grypp grypp commented Jan 7, 2025

Following code fails with the error below. It looks like a parser error.

test_printf.mlir:5:9: error: expected ':'
%135 = arith.cmpi slt, %arg1, %c127_i32 : i32
func.func @test(%133 : i32, %arg1: i32, %c127_i32:i32) {
    %134 = llvm.bitcast %133 : i32 to f32
    gpu.printf "]"
    %135 = arith.cmpi slt, %arg1, %c127_i32 : i32
    scf.if %135 {
        gpu.printf ", "
    }
    ....
}

This PR attempts to fix that by changing the assembly format of printf

@grypp grypp requested review from matthias-springer and removed request for antiagainst and kuhar January 7, 2025 14:20
@grypp grypp requested review from Groverkss and kuhar January 7, 2025 14:21
@llvmbot
Copy link
Member

llvmbot commented Jan 7, 2025

@llvm/pr-subscribers-mlir-spirv
@llvm/pr-subscribers-mlir-gpu

@llvm/pr-subscribers-mlir

Author: Guray Ozen (grypp)

Changes

Following code fails with the error below. It looks like a parser error.

test_printf.mlir:5:9: error: expected ':'
%135 = arith.cmpi slt, %arg1, %c127_i32 : i32
func.func @<!-- -->test(%133 : i32, %arg1: i32, %c127_i32:i32) {
    %134 = llvm.bitcast %133 : i32 to f32
    gpu.printf "]"
    %135 = arith.cmpi slt, %arg1, %c127_i32 : i32
    scf.if %135 {
        gpu.printf ", "
    }
    ....
}

This PR attempts to fix that by changing the assembly format of printf


Full diff: https://github.com/llvm/llvm-project/pull/121940.diff

17 Files Affected:

  • (modified) mlir/include/mlir/Dialect/GPU/IR/GPUOps.td (+1-1)
  • (modified) mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir (+1-1)
  • (modified) mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir (+1-1)
  • (modified) mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir (+1-1)
  • (modified) mlir/test/Conversion/GPUToSPIRV/printf.mlir (+1-1)
  • (modified) mlir/test/Dialect/GPU/indirect-device-func-call.mlir (+1-1)
  • (modified) mlir/test/Dialect/GPU/ops.mlir (+15-2)
  • (modified) mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir (+1-1)
  • (added) mlir/test/Dialect/GPU/test_printf.mlir (+12)
  • (modified) mlir/test/Integration/GPU/CUDA/assert.mlir (+2-2)
  • (modified) mlir/test/Integration/GPU/CUDA/printf.mlir (+1-1)
  • (modified) mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir (+1-1)
  • (modified) mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir (+5-5)
  • (modified) mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir (+3-3)
  • (modified) mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir (+4-4)
  • (modified) mlir/test/Integration/GPU/CUDA/sm90/transform-dialect/tma_load_64x8_8x128_noswizzle-transform.mlir (+2-2)
  • (modified) mlir/test/Integration/GPU/ROCM/printf.mlir (+1-1)
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 42a017db300af6..3adfd5f4f2c436 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -1055,7 +1055,7 @@ def GPU_PrintfOp : GPU_Op<"printf", [MemoryEffects<[MemWrite]>]>,
     imposed by one's target platform.
   }];
   let assemblyFormat = [{
-    $format attr-dict ($args^ `:` type($args))?
+    $format attr-dict (`,` $args^ `:` type($args))?
   }];
 }
 
diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
index 318f0f78efa5b7..f52dd6c0d0ce30 100644
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@@ -633,7 +633,7 @@ gpu.module @test_module_29 {
     // CHECK-NEXT: %[[EL1:.*]] = llvm.getelementptr %[[ALLOC]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(i32, f64)>
     // CHECK-NEXT: llvm.store %[[EXT]], %[[EL1]] : f64, !llvm.ptr
     // CHECK-NEXT: llvm.call @vprintf(%[[FORMATSTART]], %[[ALLOC]]) : (!llvm.ptr, !llvm.ptr) -> i32
-    gpu.printf "Hello: %d\n" %arg0, %arg1 : i32, f32
+    gpu.printf "Hello: %d\n", %arg0, %arg1 : i32, f32
     gpu.return
   }
 }
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir
index 1b904fa142bad3..2dc6a5ab2a86ce 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir
@@ -36,7 +36,7 @@ gpu.module @test_module {
     // CHECK-NEXT: %[[NARGS1:.*]] = llvm.mlir.constant(1 : i32) : i32
     // CHECK-NEXT: %[[ARG0_64:.*]] = llvm.zext %[[ARG0]] : i32 to i64
     // CHECK-NEXT: %{{.*}} = llvm.call @__ockl_printf_append_args(%[[DESC1]], %[[NARGS1]], %[[ARG0_64]], %[[CST0]], %[[CST0]], %[[CST0]], %[[CST0]], %[[CST0]], %[[CST0]], %[[ISLAST]]) : (i64, i32, i64, i64, i64, i64, i64, i64, i64, i32) -> i64
-    gpu.printf "Hello: %d\n" %arg0 : i32
+    gpu.printf "Hello: %d\n", %arg0 : i32
     gpu.return
   }
 }
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir
index 870f5c5016ecef..00d1d7d8526809 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir
@@ -9,7 +9,7 @@ gpu.module @test_module {
     // CHECK: %[[IMM0:.*]] = llvm.mlir.addressof @[[$PRINT_GLOBAL]] : !llvm.ptr<4>
     // CHECK-NEXT: %[[IMM2:.*]] = llvm.getelementptr %[[IMM0]][0, 0] : (!llvm.ptr<4>) -> !llvm.ptr<4>, !llvm.array<11 x i8>
     // CHECK-NEXT: %{{.*}} = llvm.call @printf(%[[IMM2]], %[[ARG0]]) vararg(!llvm.func<i32 (ptr<4>, ...)>) : (!llvm.ptr<4>, i32) -> i32
-    gpu.printf "Hello: %d\n" %arg0 : i32
+    gpu.printf "Hello: %d\n", %arg0 : i32
     gpu.return
   }
 }
diff --git a/mlir/test/Conversion/GPUToSPIRV/printf.mlir b/mlir/test/Conversion/GPUToSPIRV/printf.mlir
index bc091124ea4c6f..7fe9752b088dba 100644
--- a/mlir/test/Conversion/GPUToSPIRV/printf.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/printf.mlir
@@ -62,7 +62,7 @@ module attributes {
         // CHECK: [[FMTSTR_ADDR:%.*]] = spirv.mlir.addressof [[PRINTMSG]] : !spirv.ptr<!spirv.array<[[ARRAYSIZE]] x i8>, UniformConstant>
         // CHECK-NEXT: [[FMTSTR_PTR1:%.*]] = spirv.Bitcast [[FMTSTR_ADDR]] : !spirv.ptr<!spirv.array<[[ARRAYSIZE]] x i8>, UniformConstant> to !spirv.ptr<i8, UniformConstant>
         // CHECK-NEXT:  {{%.*}} = spirv.CL.printf [[FMTSTR_PTR1]] {{%.*}}, {{%.*}}, {{%.*}} : !spirv.ptr<i8, UniformConstant>, i32, f32, i32 -> i32
-        gpu.printf "\nHello, world : %d %f \n Thread id: %d\n" %arg0, %arg1, %2: i32, f32, index
+        gpu.printf "\nHello, world : %d %f \n Thread id: %d\n", %arg0, %arg1, %2: i32, f32, index
 
         // CHECK: spirv.Return
         gpu.return
diff --git a/mlir/test/Dialect/GPU/indirect-device-func-call.mlir b/mlir/test/Dialect/GPU/indirect-device-func-call.mlir
index 91d7f1cd6c67d9..85805da3ac10e1 100644
--- a/mlir/test/Dialect/GPU/indirect-device-func-call.mlir
+++ b/mlir/test/Dialect/GPU/indirect-device-func-call.mlir
@@ -6,7 +6,7 @@ gpu.module @kernels {
     func.func @hello(%arg0 : f32) {
         %tid_x = gpu.thread_id x
         %csti8 = arith.constant 2 : i8
-        gpu.printf "Hello from %lld, %d, %f\n" %tid_x, %csti8, %arg0  : index, i8, f32
+        gpu.printf "Hello from %lld, %d, %f\n", %tid_x, %csti8, %arg0  : index, i8, f32
         return
     }
     // CHECK-LABEL: @hello_indirect
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index c0ff2044b76c40..99915c493ea465 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -229,9 +229,22 @@ module attributes {gpu.container_module} {
 
     // CHECK-LABEL: gpu.func @printf_test
     // CHECK: (%[[ARG0:.*]]: i32)
-    // CHECK: gpu.printf "Value: %d" %[[ARG0]] : i32
+    // CHECK: gpu.printf "Value: %d", %[[ARG0]] : i32
     gpu.func @printf_test(%arg0 : i32) {
-      gpu.printf "Value: %d" %arg0 : i32
+      gpu.printf "Value: %d", %arg0 : i32
+      gpu.return
+    }
+
+    // CHECK-LABEL: gpu.func @printf_empty
+    // CHECK: gpu.printf  "]"
+    // CHECK: scf.if
+    // CHECK: gpu.printf ", "
+    gpu.func @printf_empty(%arg0 : i32) {
+      gpu.printf "]"
+      %1 = arith.cmpi slt, %arg0, %arg0 : i32
+      scf.if %1 {
+        gpu.printf ", "
+      } 
       gpu.return
     }
 
diff --git a/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir b/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir
index 732f40c4333df2..f02b26dba97d59 100644
--- a/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir
+++ b/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir
@@ -23,7 +23,7 @@ func.func @test_math(%arg0 : f32) {
         threads(%6, %7, %8) in (%9 = %c2, %10 = %c1, %11 = %c1) { 
         // CHECK-NVVM: __nv_expf 
         %s1 = math.exp %arg0 : f32
-        gpu.printf "%f" %s1 : f32
+        gpu.printf "%f", %s1 : f32
         gpu.terminator
     }
     return
diff --git a/mlir/test/Dialect/GPU/test_printf.mlir b/mlir/test/Dialect/GPU/test_printf.mlir
new file mode 100644
index 00000000000000..2a332530355d48
--- /dev/null
+++ b/mlir/test/Dialect/GPU/test_printf.mlir
@@ -0,0 +1,12 @@
+func.func @gemm_no_scf_sm100_1cta(%133 : i32, %arg1: i32, %c127_i32:i32) {
+    %134 = llvm.bitcast %133 : i32 to f32        
+    gpu.printf "]"
+    %135 = arith.cmpi slt, %arg1, %c127_i32 : i32
+    scf.if %135 {
+        gpu.printf ", "
+    } 
+
+    %0 = gpu.thread_id x
+    gpu.printf "Hello from %d\n", %0 : index
+    func.return
+}
diff --git a/mlir/test/Integration/GPU/CUDA/assert.mlir b/mlir/test/Integration/GPU/CUDA/assert.mlir
index 06a9c1ca0d114b..3d6527fe59b2c1 100644
--- a/mlir/test/Integration/GPU/CUDA/assert.mlir
+++ b/mlir/test/Integration/GPU/CUDA/assert.mlir
@@ -16,10 +16,10 @@ gpu.module @kernels {
 gpu.func @test_assert(%c0: i1, %c1: i1) kernel {
   %0 = gpu.thread_id x
   cf.assert %c1, "passing assertion"
-  gpu.printf "thread %lld: print after passing assertion\n" %0 : index
+  gpu.printf "thread %lld: print after passing assertion\n", %0 : index
   // Test callsite(callsite(name)) location.
   cf.assert %c0, "failing assertion" loc(callsite(callsite("callee_func_name"("callee_file.cc":7:9) at "caller_file.cc":10:8) at "caller2_file.cc":11:12))
-  gpu.printf "thread %lld: print after failing assertion\n" %0 : index
+  gpu.printf "thread %lld: print after failing assertion\n", %0 : index
   gpu.return
 }
 }
diff --git a/mlir/test/Integration/GPU/CUDA/printf.mlir b/mlir/test/Integration/GPU/CUDA/printf.mlir
index 99ea1208e9c5e7..15b0bf02d911a5 100644
--- a/mlir/test/Integration/GPU/CUDA/printf.mlir
+++ b/mlir/test/Integration/GPU/CUDA/printf.mlir
@@ -14,7 +14,7 @@ module attributes {gpu.container_module} {
             %0 = gpu.thread_id x
             %csti8 = arith.constant 2 : i8
             %cstf32 = arith.constant 3.0 : f32
-            gpu.printf "Hello from %lld, %d, %f\n" %0, %csti8, %cstf32  : index, i8, f32
+            gpu.printf "Hello from %lld, %d, %f\n", %0, %csti8, %cstf32  : index, i8, f32
             gpu.return
         }
     }
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir b/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir
index c70c940564a264..a22a34b9393a3f 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir
@@ -43,7 +43,7 @@ module attributes {gpu.container_module} {
       %cnd2 =  arith.cmpi eq, %bidY, %c3 : index
       scf.if %cnd1 {
         scf.if %cnd2 {
-          gpu.printf "clusterIdx: (%d, %d, %d) in Cluster Dimension: (%d, %d, %d) blockIdx: (%d, %d, %d) \n" 
+          gpu.printf "clusterIdx: (%d, %d, %d) in Cluster Dimension: (%d, %d, %d) blockIdx: (%d, %d, %d) \n",
             %cidX_i32,
             %cidY_i32,
             %cidZ_i32,
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir
index b50772f8249fb7..95bde40deb48ee 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir
@@ -85,7 +85,7 @@ module @mymod {
       
       // Step 7. First thread does TMA load
       scf.if %10 {
-        gpu.printf "[GPU] TMA SIZE %d\0A" %c8192 : index
+        gpu.printf "[GPU] TMA SIZE %d\0A", %c8192 : index
         nvgpu.tma.async.load %3[%c0, %c0], %9[%c0] to %7 : !lhsTensorMap, !barrierType -> !shmemlhs
         nvgpu.mbarrier.arrive.expect_tx %9[%c0], %c8192 : !barrierType
       } else {
@@ -98,16 +98,16 @@ module @mymod {
 
       // Step 9. Print loaded data in 128b swizzled
       scf.if %10 {        
-        gpu.printf "===--- Matrix A ---=== %d \0A" %c-1_i32 : i32
+        gpu.printf "===--- Matrix A ---=== %d \0A", %c-1_i32 : i32
         scf.for %arg12 = %c0 to %c128 step %c1 {
           scf.for %arg13 = %c0 to %c64 step %c1 {
             %15 = memref.load %7[%arg12, %arg13] : !shmemlhs
             %16 = arith.extf %15 : f16 to f32
-            gpu.printf "%.0f,   " %16 : f32
+            gpu.printf "%.0f,   ", %16 : f32
           }
-          gpu.printf "%d\0A" %c-1_i32 : i32
+          gpu.printf "%d\0A", %c-1_i32 : i32
         }
-        gpu.printf "===----------------=== %d \0A" %c-1_i32 : i32
+        gpu.printf "===----------------=== %d \0A", %c-1_i32 : i32
       }
       gpu.terminator
     }
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir
index 65e5fc0aff6aa3..fce16f3df23686 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir
@@ -109,7 +109,7 @@ module @mymod {
       
       // Step 6. First thread does TMA load
       scf.if %10 {
-        gpu.printf "[GPU] TMA SIZE %d\0A" %c32768 : index
+        gpu.printf "[GPU] TMA SIZE %d\0A", %c32768 : index
         nvgpu.tma.async.load %d_lhsTensorMap[%c0, %c0], %9[%c0] to %lhsShmem : !lhsTensorMap, !barrierType -> !shmemlhs
         nvgpu.tma.async.load %d_rhsTensorMap[%c0, %c0], %9[%c0] to %rhsShmem1 : !rhsTensorMap, !barrierType -> memref<64x64xf16, strided<[128, 1]>, 3>
         nvgpu.tma.async.load %d_rhsTensorMap[%c64, %c0], %9[%c0] to %rhsShmem2 : !rhsTensorMap, !barrierType -> memref<64x64xf16, strided<[128, 1], offset: 4096>, 3>
@@ -124,7 +124,7 @@ module @mymod {
 
       // Step 8. Print loaded data in 128b swizzled
       scf.if %10 {        
-        gpu.printf "===--- Matrix B ---=== %d \n" %c-1_i32 : i32
+        gpu.printf "===--- Matrix B ---=== %d \n", %c-1_i32 : i32
         scf.for %ii = %c0 to %c64 step %c1 {
           scf.for %j = %c0 to %c128 step %c1 {
             %lhs0 = memref.load %rhsShmem[%ii, %j] : !shmemrhs
@@ -133,7 +133,7 @@ module @mymod {
           }
           gpu.printf "%d\n" %c-1_i32 : i32
         }
-        gpu.printf "===----------------=== %d \n" %c-1_i32 : i32
+        gpu.printf "===----------------=== %d \n", %c-1_i32 : i32
       }
       gpu.barrier
       gpu.terminator
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir
index 391fda82e1e197..acca9811f5702e 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir
@@ -80,8 +80,8 @@ module @mymod {
         nvgpu.mbarrier.arrive.expect_tx %9[%c0], %c6144 : <memorySpace = #gpu.address_space<workgroup>>
         %11 = memref.load %7[%c0, %c0] : memref<64x8xf32, 3>
         %12 = memref.load %8[%c0, %c0] : memref<8x128xf32, 3>
-        gpu.printf "[GPU] TMA BEFORE lhs[45][7] %f\0A" %11 : f32
-        gpu.printf "[GPU] TMA BEFORE rhs[7][0] %f\0A" %12 : f32
+        gpu.printf "[GPU] TMA BEFORE lhs[45][7] %f\0A", %11 : f32
+        gpu.printf "[GPU] TMA BEFORE rhs[7][0] %f\0A", %12 : f32
         nvgpu.tma.async.load %3[%c0, %c0], %9[%c0] to %7 : <tensor = memref<64x8xf32, 3>, swizzle = none, l2promo = none, oob = zero, interleave = none>, <memorySpace = #gpu.address_space<workgroup>> -> memref<64x8xf32, 3>
         nvgpu.tma.async.load %4[%c0, %c0], %9[%c0] to %8 : <tensor = memref<8x128xf32, 3>, swizzle = none, l2promo = none, oob = zero, interleave = none>, <memorySpace = #gpu.address_space<workgroup>> -> memref<8x128xf32, 3>
       } else {
@@ -92,8 +92,8 @@ module @mymod {
       scf.if %10 {
         %11 = memref.load %7[%c45, %c7] : memref<64x8xf32, 3>
         %12 = memref.load %8[%c7, %c0] : memref<8x128xf32, 3>
-        gpu.printf "[GPU] TMA LOADED lhs[45][7] %f\0A" %11 : f32
-        gpu.printf "[GPU] TMA LOADED rhs[7][0] %f\0A" %12 : f32
+        gpu.printf "[GPU] TMA LOADED lhs[45][7] %f\0A", %11 : f32
+        gpu.printf "[GPU] TMA LOADED rhs[7][0] %f\0A", %12 : f32
       }
       gpu.terminator
     }
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/transform-dialect/tma_load_64x8_8x128_noswizzle-transform.mlir b/mlir/test/Integration/GPU/CUDA/sm90/transform-dialect/tma_load_64x8_8x128_noswizzle-transform.mlir
index f83f65bb2963ca..fe6c645357ecb3 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/transform-dialect/tma_load_64x8_8x128_noswizzle-transform.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/transform-dialect/tma_load_64x8_8x128_noswizzle-transform.mlir
@@ -96,8 +96,8 @@ func.func @main() {
     scf.if %10 {
       %11 = memref.load %out[%c45, %c7] : memref<64x8xf32, 3>
       %12 = memref.load %out_1[%c7, %c0] : memref<8x128xf32, 3>
-      gpu.printf "[GPU] TMA LOADED lhs[45][7] %f\0A" %11 : f32
-      gpu.printf "[GPU] TMA LOADED rhs[7][0] %f\0A" %12 : f32
+      gpu.printf "[GPU] TMA LOADED lhs[45][7] %f\0A", %11 : f32
+      gpu.printf "[GPU] TMA LOADED rhs[7][0] %f\0A", %12 : f32
     }
     gpu.terminator
   }
diff --git a/mlir/test/Integration/GPU/ROCM/printf.mlir b/mlir/test/Integration/GPU/ROCM/printf.mlir
index d5e6e3757540b2..4a0e4d34bfab5e 100644
--- a/mlir/test/Integration/GPU/ROCM/printf.mlir
+++ b/mlir/test/Integration/GPU/ROCM/printf.mlir
@@ -13,7 +13,7 @@ module attributes {gpu.container_module} {
     gpu.module @kernels {
         gpu.func @hello() kernel {
             %0 = gpu.thread_id x
-            gpu.printf "Hello from %d\n" %0 : index
+            gpu.printf "Hello from %d\n", %0 : index
             gpu.return
         }
     }

Copy link
Member

@kuhar kuhar left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems reasonable

Following code fails with the error below. It looks like a parser error. This PR attemps to fix that.
```
test_printf.mlir:5:9: error: expected ':'
%135 = arith.cmpi slt, %arg1, %c127_i32 : i32
```

```
func.func @test(%133 : i32, %arg1: i32, %c127_i32:i32) {
    %134 = llvm.bitcast %133 : i32 to f32
    gpu.printf "]"
    %135 = arith.cmpi slt, %arg1, %c127_i32 : i32
    scf.if %135 {
        gpu.printf ", "
    }
    ....
}
```
@grypp grypp merged commit f50f969 into llvm:main Jan 8, 2025
8 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Jan 8, 2025

LLVM Buildbot has detected a new failure on builder openmp-offload-sles-build-only running on rocm-worker-hw-04-sles while building mlir at step 8 "Add check check-llvm".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/140/builds/14235

Here is the relevant piece of the build log for the reference
Step 8 (Add check check-llvm) failure: test (failure)
******************** TEST 'LLVM :: ExecutionEngine/JITLink/x86-64/ELF_R_X86_64_16.s' FAILED ********************
Exit Code: 134

Command Output (stderr):
--
RUN: at line 1: /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-mc -triple=x86_64-unknown-linux -position-independent      -filetype=obj -o /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/test/ExecutionEngine/JITLink/x86-64/Output/ELF_R_X86_64_16.s.tmp.o /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_R_X86_64_16.s
+ /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-mc -triple=x86_64-unknown-linux -position-independent -filetype=obj -o /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/test/ExecutionEngine/JITLink/x86-64/Output/ELF_R_X86_64_16.s.tmp.o /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_R_X86_64_16.s
RUN: at line 3: /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-jitlink -noexec -abs X=0x1234 -check=/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_R_X86_64_16.s /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/test/ExecutionEngine/JITLink/x86-64/Output/ELF_R_X86_64_16.s.tmp.o
+ /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-jitlink -noexec -abs X=0x1234 -check=/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_R_X86_64_16.s /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/test/ExecutionEngine/JITLink/x86-64/Output/ELF_R_X86_64_16.s.tmp.o
llvm-jitlink error: Resource tracker 0x484aee0 became defunct
llvm-jitlink: /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h:285: llvm::orc::SymbolStringPool::~SymbolStringPool(): Assertion `Pool.empty() && "Dangling references at pool destruction time"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-jitlink -noexec -abs X=0x1234 -check=/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_R_X86_64_16.s /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/test/ExecutionEngine/JITLink/x86-64/Output/ELF_R_X86_64_16.s.tmp.o
 #0 0x0000000000d18ea8 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-jitlink+0xd18ea8)
 #1 0x0000000000d163fc SignalHandler(int) Signals.cpp:0:0
 #2 0x00007f07ea82d910 __restore_rt (/lib64/libpthread.so.0+0x16910)
 #3 0x00007f07ea15bd2b raise (/lib64/libc.so.6+0x4ad2b)
 #4 0x00007f07ea15d3e5 abort (/lib64/libc.so.6+0x4c3e5)
 #5 0x00007f07ea153c6a __assert_fail_base (/lib64/libc.so.6+0x42c6a)
 #6 0x00007f07ea153cf2 (/lib64/libc.so.6+0x42cf2)
 #7 0x000000000066b25b (/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-jitlink+0x66b25b)
 #8 0x0000000000be4681 llvm::orc::ExecutorProcessControl::~ExecutorProcessControl() (/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-jitlink+0xbe4681)
 #9 0x0000000000be49a3 llvm::orc::SelfExecutorProcessControl::~SelfExecutorProcessControl() (/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-jitlink+0xbe49a3)
#10 0x0000000000b0e6e9 llvm::orc::ExecutionSession::~ExecutionSession() (/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-jitlink+0xb0e6e9)
#11 0x0000000000667cf3 llvm::Session::~Session() (/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-jitlink+0x667cf3)
#12 0x000000000062e729 main (/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-jitlink+0x62e729)
#13 0x00007f07ea14624d __libc_start_main (/lib64/libc.so.6+0x3524d)
#14 0x000000000065ceea _start /home/abuild/rpmbuild/BUILD/glibc-2.31/csu/../sysdeps/x86_64/start.S:122:0
/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/test/ExecutionEngine/JITLink/x86-64/Output/ELF_R_X86_64_16.s.script: line 3: 2781911 Aborted                 (core dumped) /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llvm-jitlink -noexec -abs X=0x1234 -check=/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_R_X86_64_16.s /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/test/ExecutionEngine/JITLink/x86-64/Output/ELF_R_X86_64_16.s.tmp.o

--

********************


Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants