Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -1055,7 +1055,7 @@ def GPU_PrintfOp : GPU_Op<"printf", [MemoryEffects<[MemWrite]>]>,
imposed by one's target platform.
}];
let assemblyFormat = [{
$format attr-dict ($args^ `:` type($args))?
$format attr-dict (`,` $args^ `:` type($args))?
}];
}

Expand Down
2 changes: 1 addition & 1 deletion mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -633,7 +633,7 @@ gpu.module @test_module_29 {
// CHECK-NEXT: %[[EL1:.*]] = llvm.getelementptr %[[ALLOC]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(i32, f64)>
// CHECK-NEXT: llvm.store %[[EXT]], %[[EL1]] : f64, !llvm.ptr
// CHECK-NEXT: llvm.call @vprintf(%[[FORMATSTART]], %[[ALLOC]]) : (!llvm.ptr, !llvm.ptr) -> i32
gpu.printf "Hello: %d\n" %arg0, %arg1 : i32, f32
gpu.printf "Hello: %d\n", %arg0, %arg1 : i32, f32
gpu.return
}
}
Expand Down
2 changes: 1 addition & 1 deletion mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ gpu.module @test_module {
// CHECK-NEXT: %[[NARGS1:.*]] = llvm.mlir.constant(1 : i32) : i32
// CHECK-NEXT: %[[ARG0_64:.*]] = llvm.zext %[[ARG0]] : i32 to i64
// CHECK-NEXT: %{{.*}} = llvm.call @__ockl_printf_append_args(%[[DESC1]], %[[NARGS1]], %[[ARG0_64]], %[[CST0]], %[[CST0]], %[[CST0]], %[[CST0]], %[[CST0]], %[[CST0]], %[[ISLAST]]) : (i64, i32, i64, i64, i64, i64, i64, i64, i64, i32) -> i64
gpu.printf "Hello: %d\n" %arg0 : i32
gpu.printf "Hello: %d\n", %arg0 : i32
gpu.return
}
}
2 changes: 1 addition & 1 deletion mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ gpu.module @test_module {
// CHECK: %[[IMM0:.*]] = llvm.mlir.addressof @[[$PRINT_GLOBAL]] : !llvm.ptr<4>
// CHECK-NEXT: %[[IMM2:.*]] = llvm.getelementptr %[[IMM0]][0, 0] : (!llvm.ptr<4>) -> !llvm.ptr<4>, !llvm.array<11 x i8>
// CHECK-NEXT: %{{.*}} = llvm.call @printf(%[[IMM2]], %[[ARG0]]) vararg(!llvm.func<i32 (ptr<4>, ...)>) : (!llvm.ptr<4>, i32) -> i32
gpu.printf "Hello: %d\n" %arg0 : i32
gpu.printf "Hello: %d\n", %arg0 : i32
gpu.return
}
}
2 changes: 1 addition & 1 deletion mlir/test/Conversion/GPUToSPIRV/printf.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ module attributes {
// CHECK: [[FMTSTR_ADDR:%.*]] = spirv.mlir.addressof [[PRINTMSG]] : !spirv.ptr<!spirv.array<[[ARRAYSIZE]] x i8>, UniformConstant>
// CHECK-NEXT: [[FMTSTR_PTR1:%.*]] = spirv.Bitcast [[FMTSTR_ADDR]] : !spirv.ptr<!spirv.array<[[ARRAYSIZE]] x i8>, UniformConstant> to !spirv.ptr<i8, UniformConstant>
// CHECK-NEXT: {{%.*}} = spirv.CL.printf [[FMTSTR_PTR1]] {{%.*}}, {{%.*}}, {{%.*}} : !spirv.ptr<i8, UniformConstant>, i32, f32, i32 -> i32
gpu.printf "\nHello, world : %d %f \n Thread id: %d\n" %arg0, %arg1, %2: i32, f32, index
gpu.printf "\nHello, world : %d %f \n Thread id: %d\n", %arg0, %arg1, %2: i32, f32, index

// CHECK: spirv.Return
gpu.return
Expand Down
2 changes: 1 addition & 1 deletion mlir/test/Dialect/GPU/indirect-device-func-call.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ gpu.module @kernels {
func.func @hello(%arg0 : f32) {
%tid_x = gpu.thread_id x
%csti8 = arith.constant 2 : i8
gpu.printf "Hello from %lld, %d, %f\n" %tid_x, %csti8, %arg0 : index, i8, f32
gpu.printf "Hello from %lld, %d, %f\n", %tid_x, %csti8, %arg0 : index, i8, f32
return
}
// CHECK-LABEL: @hello_indirect
Expand Down
17 changes: 15 additions & 2 deletions mlir/test/Dialect/GPU/ops.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -229,9 +229,22 @@ module attributes {gpu.container_module} {

// CHECK-LABEL: gpu.func @printf_test
// CHECK: (%[[ARG0:.*]]: i32)
// CHECK: gpu.printf "Value: %d" %[[ARG0]] : i32
// CHECK: gpu.printf "Value: %d", %[[ARG0]] : i32
gpu.func @printf_test(%arg0 : i32) {
gpu.printf "Value: %d" %arg0 : i32
gpu.printf "Value: %d", %arg0 : i32
gpu.return
}

// CHECK-LABEL: gpu.func @printf_empty
// CHECK: gpu.printf "]"
// CHECK: scf.if
// CHECK: gpu.printf ", "
gpu.func @printf_empty(%arg0 : i32) {
gpu.printf "]"
%1 = arith.cmpi slt, %arg0, %arg0 : i32
scf.if %1 {
gpu.printf ", "
}
gpu.return
}

Expand Down
2 changes: 1 addition & 1 deletion mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ func.func @test_math(%arg0 : f32) {
threads(%6, %7, %8) in (%9 = %c2, %10 = %c1, %11 = %c1) {
// CHECK-NVVM: __nv_expf
%s1 = math.exp %arg0 : f32
gpu.printf "%f" %s1 : f32
gpu.printf "%f", %s1 : f32
gpu.terminator
}
return
Expand Down
4 changes: 2 additions & 2 deletions mlir/test/Integration/GPU/CUDA/assert.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ gpu.module @kernels {
gpu.func @test_assert(%c0: i1, %c1: i1) kernel {
%0 = gpu.thread_id x
cf.assert %c1, "passing assertion"
gpu.printf "thread %lld: print after passing assertion\n" %0 : index
gpu.printf "thread %lld: print after passing assertion\n", %0 : index
// Test callsite(callsite(name)) location.
cf.assert %c0, "failing assertion" loc(callsite(callsite("callee_func_name"("callee_file.cc":7:9) at "caller_file.cc":10:8) at "caller2_file.cc":11:12))
gpu.printf "thread %lld: print after failing assertion\n" %0 : index
gpu.printf "thread %lld: print after failing assertion\n", %0 : index
gpu.return
}
}
Expand Down
2 changes: 1 addition & 1 deletion mlir/test/Integration/GPU/CUDA/printf.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ module attributes {gpu.container_module} {
%0 = gpu.thread_id x
%csti8 = arith.constant 2 : i8
%cstf32 = arith.constant 3.0 : f32
gpu.printf "Hello from %lld, %d, %f\n" %0, %csti8, %cstf32 : index, i8, f32
gpu.printf "Hello from %lld, %d, %f\n", %0, %csti8, %cstf32 : index, i8, f32
gpu.return
}
}
Expand Down
2 changes: 1 addition & 1 deletion mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ module attributes {gpu.container_module} {
%cnd2 = arith.cmpi eq, %bidY, %c3 : index
scf.if %cnd1 {
scf.if %cnd2 {
gpu.printf "clusterIdx: (%d, %d, %d) in Cluster Dimension: (%d, %d, %d) blockIdx: (%d, %d, %d) \n"
gpu.printf "clusterIdx: (%d, %d, %d) in Cluster Dimension: (%d, %d, %d) blockIdx: (%d, %d, %d) \n",
%cidX_i32,
%cidY_i32,
%cidZ_i32,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ module @mymod {

// Step 7. First thread does TMA load
scf.if %10 {
gpu.printf "[GPU] TMA SIZE %d\0A" %c8192 : index
gpu.printf "[GPU] TMA SIZE %d\0A", %c8192 : index
nvgpu.tma.async.load %3[%c0, %c0], %9[%c0] to %7 : !lhsTensorMap, !barrierType -> !shmemlhs
nvgpu.mbarrier.arrive.expect_tx %9[%c0], %c8192 : !barrierType
} else {
Expand All @@ -98,16 +98,16 @@ module @mymod {

// Step 9. Print loaded data in 128b swizzled
scf.if %10 {
gpu.printf "===--- Matrix A ---=== %d \0A" %c-1_i32 : i32
gpu.printf "===--- Matrix A ---=== %d \0A", %c-1_i32 : i32
scf.for %arg12 = %c0 to %c128 step %c1 {
scf.for %arg13 = %c0 to %c64 step %c1 {
%15 = memref.load %7[%arg12, %arg13] : !shmemlhs
%16 = arith.extf %15 : f16 to f32
gpu.printf "%.0f, " %16 : f32
gpu.printf "%.0f, ", %16 : f32
}
gpu.printf "%d\0A" %c-1_i32 : i32
gpu.printf "%d\0A", %c-1_i32 : i32
}
gpu.printf "===----------------=== %d \0A" %c-1_i32 : i32
gpu.printf "===----------------=== %d \0A", %c-1_i32 : i32
}
gpu.terminator
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ module @mymod {

// Step 6. First thread does TMA load
scf.if %10 {
gpu.printf "[GPU] TMA SIZE %d\0A" %c32768 : index
gpu.printf "[GPU] TMA SIZE %d\0A", %c32768 : index
nvgpu.tma.async.load %d_lhsTensorMap[%c0, %c0], %9[%c0] to %lhsShmem : !lhsTensorMap, !barrierType -> !shmemlhs
nvgpu.tma.async.load %d_rhsTensorMap[%c0, %c0], %9[%c0] to %rhsShmem1 : !rhsTensorMap, !barrierType -> memref<64x64xf16, strided<[128, 1]>, 3>
nvgpu.tma.async.load %d_rhsTensorMap[%c64, %c0], %9[%c0] to %rhsShmem2 : !rhsTensorMap, !barrierType -> memref<64x64xf16, strided<[128, 1], offset: 4096>, 3>
Expand All @@ -124,7 +124,7 @@ module @mymod {

// Step 8. Print loaded data in 128b swizzled
scf.if %10 {
gpu.printf "===--- Matrix B ---=== %d \n" %c-1_i32 : i32
gpu.printf "===--- Matrix B ---=== %d \n", %c-1_i32 : i32
scf.for %ii = %c0 to %c64 step %c1 {
scf.for %j = %c0 to %c128 step %c1 {
%lhs0 = memref.load %rhsShmem[%ii, %j] : !shmemrhs
Expand All @@ -133,7 +133,7 @@ module @mymod {
}
gpu.printf "%d\n" %c-1_i32 : i32
}
gpu.printf "===----------------=== %d \n" %c-1_i32 : i32
gpu.printf "===----------------=== %d \n", %c-1_i32 : i32
}
gpu.barrier
gpu.terminator
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ module @mymod {
nvgpu.mbarrier.arrive.expect_tx %9[%c0], %c6144 : <memorySpace = #gpu.address_space<workgroup>>
%11 = memref.load %7[%c0, %c0] : memref<64x8xf32, 3>
%12 = memref.load %8[%c0, %c0] : memref<8x128xf32, 3>
gpu.printf "[GPU] TMA BEFORE lhs[45][7] %f\0A" %11 : f32
gpu.printf "[GPU] TMA BEFORE rhs[7][0] %f\0A" %12 : f32
gpu.printf "[GPU] TMA BEFORE lhs[45][7] %f\0A", %11 : f32
gpu.printf "[GPU] TMA BEFORE rhs[7][0] %f\0A", %12 : f32
nvgpu.tma.async.load %3[%c0, %c0], %9[%c0] to %7 : <tensor = memref<64x8xf32, 3>, swizzle = none, l2promo = none, oob = zero, interleave = none>, <memorySpace = #gpu.address_space<workgroup>> -> memref<64x8xf32, 3>
nvgpu.tma.async.load %4[%c0, %c0], %9[%c0] to %8 : <tensor = memref<8x128xf32, 3>, swizzle = none, l2promo = none, oob = zero, interleave = none>, <memorySpace = #gpu.address_space<workgroup>> -> memref<8x128xf32, 3>
} else {
Expand All @@ -92,8 +92,8 @@ module @mymod {
scf.if %10 {
%11 = memref.load %7[%c45, %c7] : memref<64x8xf32, 3>
%12 = memref.load %8[%c7, %c0] : memref<8x128xf32, 3>
gpu.printf "[GPU] TMA LOADED lhs[45][7] %f\0A" %11 : f32
gpu.printf "[GPU] TMA LOADED rhs[7][0] %f\0A" %12 : f32
gpu.printf "[GPU] TMA LOADED lhs[45][7] %f\0A", %11 : f32
gpu.printf "[GPU] TMA LOADED rhs[7][0] %f\0A", %12 : f32
}
gpu.terminator
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ func.func @main() {
scf.if %10 {
%11 = memref.load %out[%c45, %c7] : memref<64x8xf32, 3>
%12 = memref.load %out_1[%c7, %c0] : memref<8x128xf32, 3>
gpu.printf "[GPU] TMA LOADED lhs[45][7] %f\0A" %11 : f32
gpu.printf "[GPU] TMA LOADED rhs[7][0] %f\0A" %12 : f32
gpu.printf "[GPU] TMA LOADED lhs[45][7] %f\0A", %11 : f32
gpu.printf "[GPU] TMA LOADED rhs[7][0] %f\0A", %12 : f32
}
gpu.terminator
}
Expand Down
2 changes: 1 addition & 1 deletion mlir/test/Integration/GPU/ROCM/printf.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ module attributes {gpu.container_module} {
gpu.module @kernels {
gpu.func @hello() kernel {
%0 = gpu.thread_id x
gpu.printf "Hello from %d\n" %0 : index
gpu.printf "Hello from %d\n", %0 : index
gpu.return
}
}
Expand Down
Loading