Skip to content

Commit 3df10d0

Browse files
silee2aokblast
authored andcommitted
[MLIR][GPU] Generalize gpu.printf op lowering to LLVM call pattern. (llvm#164297)
Existing pattern for lowering gpu.printf op to LLVM call uses fixed function name and calling convention. Those two should be exposed as pass option to allow supporting Intel Compute Runtime for GPU. Also adds gpu.printf op pattern to GPU to LLVMSPV pass. It may appear out of place, but integration test is added to XeVM integration test as that is the current best folder for testing with Intel Compute Runtime. Test should be moved in the future if a better test folder is added.
1 parent 46a5cd7 commit 3df10d0

File tree

5 files changed

+71
-7
lines changed

5 files changed

+71
-7
lines changed

mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,8 @@ LogicalResult GPUPrintfOpToLLVMCallLowering::matchAndRewrite(
507507
LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {ptrType},
508508
/*isVarArg=*/true);
509509
LLVM::LLVMFuncOp printfDecl =
510-
getOrDefineFunction(moduleOp, loc, rewriter, "printf", printfType);
510+
getOrDefineFunction(moduleOp, loc, rewriter, funcName, printfType);
511+
printfDecl.setCConv(callingConvention);
511512

512513
// Create the global op or find an existing one.
513514
LLVM::GlobalOp global = getOrCreateStringConstant(
@@ -530,7 +531,8 @@ LogicalResult GPUPrintfOpToLLVMCallLowering::matchAndRewrite(
530531
printfArgs.push_back(stringStart);
531532
printfArgs.append(argsRange.begin(), argsRange.end());
532533

533-
LLVM::CallOp::create(rewriter, loc, printfDecl, printfArgs);
534+
auto call = LLVM::CallOp::create(rewriter, loc, printfDecl, printfArgs);
535+
call.setCConv(callingConvention);
534536
rewriter.eraseOp(gpuPrintfOp);
535537
return success();
536538
}

mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
#include "mlir/Conversion/LLVMCommon/Pattern.h"
1212
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
13+
#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
1314
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
1415

1516
namespace mlir {
@@ -142,20 +143,32 @@ struct GPUPrintfOpToHIPLowering : public ConvertOpToLLVMPattern<gpu::PrintfOp> {
142143
/// This pass will add a declaration of printf() to the GPUModule if needed
143144
/// and separate out the format strings into global constants. For some
144145
/// runtimes, such as OpenCL on AMD, this is sufficient setup, as the compiler
145-
/// will lower printf calls to appropriate device-side code
146+
/// will lower printf calls to appropriate device-side code.
147+
/// However not all backends use the same calling convention and function
148+
/// naming.
149+
/// For example, the LLVM SPIRV backend requires calling convention
150+
/// LLVM::cconv::CConv::SPIR_FUNC and function name needs to be
151+
/// mangled as "_Z6printfPU3AS2Kcz".
152+
/// Default callingConvention is LLVM::cconv::CConv::C and
153+
/// funcName is "printf" but they can be customized as needed.
146154
struct GPUPrintfOpToLLVMCallLowering
147155
: public ConvertOpToLLVMPattern<gpu::PrintfOp> {
148-
GPUPrintfOpToLLVMCallLowering(const LLVMTypeConverter &converter,
149-
int addressSpace = 0)
156+
GPUPrintfOpToLLVMCallLowering(
157+
const LLVMTypeConverter &converter, int addressSpace = 0,
158+
LLVM::cconv::CConv callingConvention = LLVM::cconv::CConv::C,
159+
StringRef funcName = "printf")
150160
: ConvertOpToLLVMPattern<gpu::PrintfOp>(converter),
151-
addressSpace(addressSpace) {}
161+
addressSpace(addressSpace), callingConvention(callingConvention),
162+
funcName(funcName) {}
152163

153164
LogicalResult
154165
matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
155166
ConversionPatternRewriter &rewriter) const override;
156167

157168
private:
158169
int addressSpace;
170+
LLVM::cconv::CConv callingConvention;
171+
StringRef funcName;
159172
};
160173

161174
/// Lowering of gpu.printf to a vprintf standard library.

mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -470,10 +470,13 @@ struct GPUToLLVMSPVConversionPass final
470470
gpu::GPUFuncOp, gpu::GlobalIdOp, gpu::GridDimOp,
471471
gpu::LaneIdOp, gpu::NumSubgroupsOp, gpu::ReturnOp,
472472
gpu::ShuffleOp, gpu::SubgroupIdOp, gpu::SubgroupSizeOp,
473-
gpu::ThreadIdOp>();
473+
gpu::ThreadIdOp, gpu::PrintfOp>();
474474

475475
populateGpuToLLVMSPVConversionPatterns(converter, patterns);
476476
populateGpuMemorySpaceAttributeConversions(converter);
477+
patterns.add<GPUPrintfOpToLLVMCallLowering>(converter, /*addressSpace=*/2,
478+
LLVM::cconv::CConv::SPIR_FUNC,
479+
"_Z6printfPU3AS2Kcz");
477480

478481
if (failed(applyPartialConversion(getOperation(), target,
479482
std::move(patterns))))
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// RUN: mlir-opt %s -convert-gpu-to-llvm-spv | FileCheck %s
2+
3+
gpu.module @test_module {
4+
// CHECK: llvm.mlir.global internal constant @[[$PRINT_GLOBAL:[A-Za-z0-9_]+]]("Hello: %d\0A\00") {addr_space = 2 : i32}
5+
// CHECK: llvm.func spir_funccc @_Z6printfPU3AS2Kcz(!llvm.ptr<2>, ...) -> i32
6+
// CHECK-LABEL: llvm.func spir_funccc @test_printf
7+
// CHECK: (%[[ARG0:.*]]: i32)
8+
gpu.func @test_printf(%arg0: i32) {
9+
// CHECK: %[[IMM0:.*]] = llvm.mlir.addressof @[[$PRINT_GLOBAL]] : !llvm.ptr<2>
10+
// CHECK-NEXT: %[[IMM2:.*]] = llvm.getelementptr %[[IMM0]][0, 0] : (!llvm.ptr<2>) -> !llvm.ptr<2>, !llvm.array<11 x i8>
11+
// CHECK-NEXT: %{{.*}} = llvm.call spir_funccc @_Z6printfPU3AS2Kcz(%[[IMM2]], %[[ARG0]]) vararg(!llvm.func<i32 (ptr<2>, ...)>) : (!llvm.ptr<2>, i32) -> i32
12+
gpu.printf "Hello: %d\n", %arg0 : i32
13+
gpu.return
14+
}
15+
}
16+
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// RUN: mlir-opt %s \
2+
// RUN: | mlir-opt -pass-pipeline='builtin.module(cse,func.func(gpu-async-region),xevm-attach-target,gpu.module(convert-gpu-to-llvm-spv{use-64bit-index=true},convert-xevm-to-llvm,cse))' \
3+
// RUN: | mlir-opt -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \
4+
// RUN: | mlir-opt -gpu-to-llvm -reconcile-unrealized-casts -cse -gpu-module-to-binary \
5+
// RUN: | mlir-runner \
6+
// RUN: --shared-libs=%mlir_sycl_runtime \
7+
// RUN: --shared-libs=%mlir_runner_utils \
8+
// RUN: --shared-libs=%mlir_c_runner_utils \
9+
// RUN: --entry-point-result=void \
10+
// RUN: | FileCheck %s
11+
12+
module @test attributes {gpu.container_module} {
13+
gpu.module @test_module {
14+
gpu.func @test_printf(%arg0: i32, %arg1: f32) kernel {
15+
gpu.printf "Hello: %d\n", %arg0 : i32
16+
gpu.printf "Hello: %f\n", %arg1 : f32
17+
gpu.return
18+
}
19+
}
20+
21+
func.func @main() attributes {llvm.emit_c_interface} {
22+
%c1 = arith.constant 1 : index
23+
%c11 = arith.constant 11 : i32
24+
%c4 = arith.constant 4.0 : f32
25+
// CHECK: Hello: 11
26+
// CHECK: Hello: 4.000000
27+
gpu.launch_func @test_module::@test_printf blocks in (%c1, %c1, %c1) threads in (%c1, %c1, %c1) args(%c11 : i32, %c4 : f32)
28+
return
29+
}
30+
}

0 commit comments

Comments
 (0)