From 21a61f493cc8124f8bfa1f3ef47cdcbc3579ef7c Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Thu, 3 Oct 2024 18:34:21 +0000 Subject: [PATCH 1/6] Update llvm --- .../TritonAMDGPUToLLVM/BuiltinFuncToLLVM.cpp | 4 +++- .../lib/TritonAMDGPUToLLVM/DotOpToLLVM/WMMA.cpp | 3 ++- .../TritonAMDGPUToLLVM/SchedInstructions.cpp | 17 +++++++++++------ .../amd/lib/TritonAMDGPUToLLVM/TargetInfo.cpp | 10 ++++++---- 4 files changed, 22 insertions(+), 12 deletions(-) diff --git a/third_party/amd/lib/TritonAMDGPUToLLVM/BuiltinFuncToLLVM.cpp b/third_party/amd/lib/TritonAMDGPUToLLVM/BuiltinFuncToLLVM.cpp index edf74cea4259..3d9e14eb78d3 100644 --- a/third_party/amd/lib/TritonAMDGPUToLLVM/BuiltinFuncToLLVM.cpp +++ b/third_party/amd/lib/TritonAMDGPUToLLVM/BuiltinFuncToLLVM.cpp @@ -190,7 +190,9 @@ class CallOpConversion : public mlir::RewritePattern { auto name = StringAttr::get(callOp.getContext(), "llvm.amdgcn.rcp.f32"); LLVM::FastmathFlagsAttr defaultFlags{}; auto rcpOp = rewriter.create( - loc, returnType, name, operands[1], defaultFlags); + loc, returnType, name, operands[1], defaultFlags, + /*op_bundle_operands=*/ArrayRef{}, + /*op_bundle_tags=*/ArrayRef{}); replacementOp = rewriter.create( loc, returnType, operands[0], rcpOp->getResult(0), defaultFlags); diff --git a/third_party/amd/lib/TritonAMDGPUToLLVM/DotOpToLLVM/WMMA.cpp b/third_party/amd/lib/TritonAMDGPUToLLVM/DotOpToLLVM/WMMA.cpp index 9368443255f0..3dcd3ac0ac83 100644 --- a/third_party/amd/lib/TritonAMDGPUToLLVM/DotOpToLLVM/WMMA.cpp +++ b/third_party/amd/lib/TritonAMDGPUToLLVM/DotOpToLLVM/WMMA.cpp @@ -221,7 +221,8 @@ Value generateWMMAIntrinsic(ConversionPatternRewriter &rewriter, Location loc, } auto wmmaIntrinsic = rewriter.create( loc, TypeRange{valC.getType()}, StringAttr::get(loc.getContext(), name), - operands, defaultFlags); + operands, defaultFlags, /*op_bundle_operands=*/ArrayRef{}, + /*op_bundle_tags=*/ArrayRef{}); return wmmaIntrinsic.getResult(0); } diff --git a/third_party/amd/lib/TritonAMDGPUToLLVM/SchedInstructions.cpp b/third_party/amd/lib/TritonAMDGPUToLLVM/SchedInstructions.cpp index c9413a52f515..acf5dacfb068 100644 --- a/third_party/amd/lib/TritonAMDGPUToLLVM/SchedInstructions.cpp +++ b/third_party/amd/lib/TritonAMDGPUToLLVM/SchedInstructions.cpp @@ -48,9 +48,10 @@ void createSchedGroupBarrier(PatternRewriter &rewriter, Location loc, static_cast(groupIdValue)); LLVM::FastmathFlagsAttr defaultFlags{}; - rewriter.create(loc, TypeRange{}, intrinsicName, - ValueRange{mask, size, groupId}, - defaultFlags); + rewriter.create( + loc, TypeRange{}, intrinsicName, ValueRange{mask, size, groupId}, + defaultFlags, /*op_bundle_operands=*/ArrayRef{}, + /*op_bundle_tags=*/ArrayRef{}); } // Insert intrinsic that controls the types of instructions that may be @@ -63,8 +64,10 @@ Operation *createSchedBarrier(PatternRewriter &rewriter, Location loc, Value mask = LLVM::createConstantI32(loc, rewriter, static_cast(maskValue)); - return rewriter.create(loc, TypeRange{}, intrinsicName, - ValueRange{mask}, defaultFlags); + return rewriter.create( + loc, TypeRange{}, intrinsicName, ValueRange{mask}, defaultFlags, + /*op_bundle_operands=*/ArrayRef{}, + /*op_bundle_tags=*/ArrayRef{}); } // Insert an experimental intrinsic for instruction group level parallelism. @@ -76,7 +79,9 @@ Operation *createIglpOpt(PatternRewriter &rewriter, Location loc, int value) { Value iglpValue = LLVM::createConstantI32(loc, rewriter, static_cast(value)); return rewriter.create( - loc, TypeRange{}, intrinsicName, ValueRange{iglpValue}, defaultFlags); + loc, TypeRange{}, intrinsicName, ValueRange{iglpValue}, defaultFlags, + /*op_bundle_operands=*/ArrayRef{}, + /*op_bundle_tags=*/ArrayRef{}); } struct InstructionSchedHintsRewriter diff --git a/third_party/amd/lib/TritonAMDGPUToLLVM/TargetInfo.cpp b/third_party/amd/lib/TritonAMDGPUToLLVM/TargetInfo.cpp index 8462eb5fc9b8..5128300a4a85 100644 --- a/third_party/amd/lib/TritonAMDGPUToLLVM/TargetInfo.cpp +++ b/third_party/amd/lib/TritonAMDGPUToLLVM/TargetInfo.cpp @@ -71,10 +71,12 @@ Value TargetInfo::ballot(RewriterBase &rewriter, Location loc, Type type, Value cmp) const { auto stringAttr = rewriter.getStringAttr("llvm.amdgcn.ballot"); SmallVector operands = {cmp}; - Value asmResult = - rewriter.create(loc, type, stringAttr, operands) - ->getResult(0); - return asmResult; + LLVM::FastmathFlagsAttr defaultFlags{}; + auto callOp = rewriter.create( + loc, type, stringAttr, operands, defaultFlags, + /*op_bundle_operands=*/ArrayRef{}, + /*op_bundle_tags=*/ArrayRef{}); + return callOp->getResult(0); } void TargetInfo::storeDShared(RewriterBase &rewriter, Location loc, Value ptr, From d494346b92b5efde1098cca2529ce598837a6f57 Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Thu, 3 Oct 2024 18:36:30 +0000 Subject: [PATCH 2/6] Update llvm/llvm-project@61f8a7f61890 --- cmake/llvm-hash.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/llvm-hash.txt b/cmake/llvm-hash.txt index 36344442bd3a..454a94e1f4cb 100644 --- a/cmake/llvm-hash.txt +++ b/cmake/llvm-hash.txt @@ -1 +1 @@ -df0864e761107b07e38f5503e0cbee0cebb4c5e8 +61f8a7f618901797ee8663389a29722f29216a96 From 688c495a4a1171aa5ac6a76e8ab12a43d702db21 Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Thu, 3 Oct 2024 14:40:52 -0700 Subject: [PATCH 3/6] Trigger build From 305bacef7c1d5a37a21317f4700ffbf12921b1ab Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Thu, 3 Oct 2024 23:40:27 +0000 Subject: [PATCH 4/6] Fix builds --- .../Conversion/TritonGPUToLLVM/Utility.h | 4 +++ lib/Conversion/TritonGPUToLLVM/Utility.cpp | 16 ++++++++++++ .../TritonAMDGPUToLLVM/BuiltinFuncToLLVM.cpp | 11 ++++---- .../TritonAMDGPUToLLVM/DotOpToLLVM/WMMA.cpp | 8 +++--- .../TritonAMDGPUToLLVM/SchedInstructions.cpp | 25 +++++++------------ .../amd/lib/TritonAMDGPUToLLVM/TargetInfo.cpp | 11 +++----- 6 files changed, 40 insertions(+), 35 deletions(-) diff --git a/include/triton/Conversion/TritonGPUToLLVM/Utility.h b/include/triton/Conversion/TritonGPUToLLVM/Utility.h index 1862c03b9c7e..fa81ba6562fd 100644 --- a/include/triton/Conversion/TritonGPUToLLVM/Utility.h +++ b/include/triton/Conversion/TritonGPUToLLVM/Utility.h @@ -228,6 +228,10 @@ Value createIndexConstant(OpBuilder &builder, Location loc, Value createLLVMIntegerConstant(OpBuilder &builder, Location loc, short width, int64_t value); +LLVM::CallIntrinsicOp createLLVMIntrinsicCall(OpBuilder &builder, Location loc, + StringRef intrinsic, + TypeRange types, ValueRange args); + // Is v an integer or floating-point scalar constant equal to 0? bool isConstantZero(Value v); diff --git a/lib/Conversion/TritonGPUToLLVM/Utility.cpp b/lib/Conversion/TritonGPUToLLVM/Utility.cpp index 9de4434528fa..22ff0c1757d8 100644 --- a/lib/Conversion/TritonGPUToLLVM/Utility.cpp +++ b/lib/Conversion/TritonGPUToLLVM/Utility.cpp @@ -518,6 +518,22 @@ Value createLLVMIntegerConstant(OpBuilder &builder, Location loc, short width, builder.getIntegerAttr(ty, value)); } +LLVM::CallIntrinsicOp createLLVMIntrinsicCall(OpBuilder &builder, Location loc, + StringRef intrinsic, + TypeRange types, + ValueRange args) { + llvm::SmallVector attrs; + attrs.push_back( + builder.getNamedAttr("intrin", builder.getStringAttr(intrinsic))); + attrs.push_back(builder.getNamedAttr("op_bundle_sizes", + builder.getDenseI32ArrayAttr({}))); + attrs.push_back(builder.getNamedAttr( + "operandSegmentSizes", + builder.getDenseI32ArrayAttr({static_cast(args.size()), 0}))); + + return builder.create(loc, types, args, attrs); +} + bool isConstantZero(Value v) { if (auto constantOp = v.getDefiningOp()) { if (auto attr = dyn_cast(constantOp.getValue())) { diff --git a/third_party/amd/lib/TritonAMDGPUToLLVM/BuiltinFuncToLLVM.cpp b/third_party/amd/lib/TritonAMDGPUToLLVM/BuiltinFuncToLLVM.cpp index 3d9e14eb78d3..23debfe0f495 100644 --- a/third_party/amd/lib/TritonAMDGPUToLLVM/BuiltinFuncToLLVM.cpp +++ b/third_party/amd/lib/TritonAMDGPUToLLVM/BuiltinFuncToLLVM.cpp @@ -4,6 +4,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "triton/Conversion/TritonGPUToLLVM/Utility.h" namespace mlir { namespace triton { @@ -187,13 +188,11 @@ class CallOpConversion : public mlir::RewritePattern { rewriter.create(loc, returnType, op->getResult(0)); } else if (calleeName == "__triton_hip_fast_fdividef") { assert(operands.size() == 2); - auto name = StringAttr::get(callOp.getContext(), "llvm.amdgcn.rcp.f32"); - LLVM::FastmathFlagsAttr defaultFlags{}; - auto rcpOp = rewriter.create( - loc, returnType, name, operands[1], defaultFlags, - /*op_bundle_operands=*/ArrayRef{}, - /*op_bundle_tags=*/ArrayRef{}); + const char *intrinsic = "llvm.amdgcn.rcp.f32"; + auto rcpOp = LLVM::createLLVMIntrinsicCall(rewriter, loc, intrinsic, + returnType, operands[1]); + LLVM::FastmathFlagsAttr defaultFlags{}; replacementOp = rewriter.create( loc, returnType, operands[0], rcpOp->getResult(0), defaultFlags); } diff --git a/third_party/amd/lib/TritonAMDGPUToLLVM/DotOpToLLVM/WMMA.cpp b/third_party/amd/lib/TritonAMDGPUToLLVM/DotOpToLLVM/WMMA.cpp index 3dcd3ac0ac83..c8907f40a166 100644 --- a/third_party/amd/lib/TritonAMDGPUToLLVM/DotOpToLLVM/WMMA.cpp +++ b/third_party/amd/lib/TritonAMDGPUToLLVM/DotOpToLLVM/WMMA.cpp @@ -24,6 +24,7 @@ #include "../PatternTritonGPUOpToLLVM.h" #include "Utility.h" #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" +#include "triton/Conversion/TritonGPUToLLVM/Utility.h" namespace mlir::triton::AMD { namespace { @@ -219,11 +220,8 @@ Value generateWMMAIntrinsic(ConversionPatternRewriter &rewriter, Location loc, if (32 / dElType.getIntOrFloatBitWidth() > 1 || dElType.isInteger(32)) { operands.push_back(int_val(1, false)); } - auto wmmaIntrinsic = rewriter.create( - loc, TypeRange{valC.getType()}, StringAttr::get(loc.getContext(), name), - operands, defaultFlags, /*op_bundle_operands=*/ArrayRef{}, - /*op_bundle_tags=*/ArrayRef{}); - + auto wmmaIntrinsic = LLVM::createLLVMIntrinsicCall(rewriter, loc, name, + valC.getType(), operands); return wmmaIntrinsic.getResult(0); } diff --git a/third_party/amd/lib/TritonAMDGPUToLLVM/SchedInstructions.cpp b/third_party/amd/lib/TritonAMDGPUToLLVM/SchedInstructions.cpp index acf5dacfb068..fa5de9c2985e 100644 --- a/third_party/amd/lib/TritonAMDGPUToLLVM/SchedInstructions.cpp +++ b/third_party/amd/lib/TritonAMDGPUToLLVM/SchedInstructions.cpp @@ -38,7 +38,7 @@ void createSchedGroupBarrier(PatternRewriter &rewriter, Location loc, InstructionKindMask maskValue, int sizeValue, int groupIdValue) { MLIRContext *ctx = rewriter.getContext(); - auto intrinsicName = str_attr("llvm.amdgcn.sched.group.barrier"); + const char *intrinsicName = "llvm.amdgcn.sched.group.barrier"; Value mask = LLVM::createConstantI32(loc, rewriter, static_cast(maskValue)); @@ -47,11 +47,8 @@ void createSchedGroupBarrier(PatternRewriter &rewriter, Location loc, Value groupId = LLVM::createConstantI32(loc, rewriter, static_cast(groupIdValue)); - LLVM::FastmathFlagsAttr defaultFlags{}; - rewriter.create( - loc, TypeRange{}, intrinsicName, ValueRange{mask, size, groupId}, - defaultFlags, /*op_bundle_operands=*/ArrayRef{}, - /*op_bundle_tags=*/ArrayRef{}); + LLVM::createLLVMIntrinsicCall(rewriter, loc, intrinsicName, TypeRange{}, + ValueRange{mask, size, groupId}); } // Insert intrinsic that controls the types of instructions that may be @@ -59,29 +56,25 @@ void createSchedGroupBarrier(PatternRewriter &rewriter, Location loc, Operation *createSchedBarrier(PatternRewriter &rewriter, Location loc, int64_t maskValue) { MLIRContext *ctx = rewriter.getContext(); - auto intrinsicName = str_attr("llvm.amdgcn.sched.barrier"); + const char *intrinsicName = "llvm.amdgcn.sched.barrier"; LLVM::FastmathFlagsAttr defaultFlags{}; Value mask = LLVM::createConstantI32(loc, rewriter, static_cast(maskValue)); - return rewriter.create( - loc, TypeRange{}, intrinsicName, ValueRange{mask}, defaultFlags, - /*op_bundle_operands=*/ArrayRef{}, - /*op_bundle_tags=*/ArrayRef{}); + return LLVM::createLLVMIntrinsicCall(rewriter, loc, intrinsicName, + TypeRange{}, ValueRange{mask}); } // Insert an experimental intrinsic for instruction group level parallelism. // The intrinsic takes a value that specifies the strategy. Operation *createIglpOpt(PatternRewriter &rewriter, Location loc, int value) { MLIRContext *ctx = rewriter.getContext(); - auto intrinsicName = str_attr("llvm.amdgcn.iglp.opt"); + const char *intrinsicName = "llvm.amdgcn.iglp.opt"; LLVM::FastmathFlagsAttr defaultFlags{}; Value iglpValue = LLVM::createConstantI32(loc, rewriter, static_cast(value)); - return rewriter.create( - loc, TypeRange{}, intrinsicName, ValueRange{iglpValue}, defaultFlags, - /*op_bundle_operands=*/ArrayRef{}, - /*op_bundle_tags=*/ArrayRef{}); + return LLVM::createLLVMIntrinsicCall(rewriter, loc, intrinsicName, + TypeRange{}, ValueRange{iglpValue}); } struct InstructionSchedHintsRewriter diff --git a/third_party/amd/lib/TritonAMDGPUToLLVM/TargetInfo.cpp b/third_party/amd/lib/TritonAMDGPUToLLVM/TargetInfo.cpp index 5128300a4a85..ac4622e85f78 100644 --- a/third_party/amd/lib/TritonAMDGPUToLLVM/TargetInfo.cpp +++ b/third_party/amd/lib/TritonAMDGPUToLLVM/TargetInfo.cpp @@ -69,14 +69,9 @@ Value TargetInfo::getClusterCTAId(RewriterBase &rewriter, Location loc) const { Value TargetInfo::ballot(RewriterBase &rewriter, Location loc, Type type, Value cmp) const { - auto stringAttr = rewriter.getStringAttr("llvm.amdgcn.ballot"); - SmallVector operands = {cmp}; - LLVM::FastmathFlagsAttr defaultFlags{}; - auto callOp = rewriter.create( - loc, type, stringAttr, operands, defaultFlags, - /*op_bundle_operands=*/ArrayRef{}, - /*op_bundle_tags=*/ArrayRef{}); - return callOp->getResult(0); + return LLVM::createLLVMIntrinsicCall(rewriter, loc, "llvm.amdgcn.ballot", + type, cmp) + ->getResult(0); } void TargetInfo::storeDShared(RewriterBase &rewriter, Location loc, Value ptr, From 630af0986197c43d8094b81d31f37ba2c338aab5 Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Fri, 4 Oct 2024 01:30:52 +0000 Subject: [PATCH 5/6] Fix more --- .../Conversion/TritonGPUToLLVM/Utility.h | 10 +++--- .../TritonGPUToLLVM/ControlFlowOpToLLVM.cpp | 4 +++ .../TritonGPUToLLVM/ElementwiseOpToLLVM.cpp | 4 +-- lib/Conversion/TritonGPUToLLVM/Utility.cpp | 33 ++++++++++--------- .../TritonAMDGPUToLLVM/BuiltinFuncToLLVM.cpp | 4 +-- .../TritonAMDGPUToLLVM/DotOpToLLVM/WMMA.cpp | 4 +-- .../ElementwiseOpToLLVM.cpp | 4 +-- .../TritonAMDGPUToLLVM/SchedInstructions.cpp | 12 +++---- .../amd/lib/TritonAMDGPUToLLVM/TargetInfo.cpp | 4 +-- .../amd/lib/TritonAMDGPUToLLVM/Utility.cpp | 10 +++--- .../ElementwiseOpToLLVM.cpp | 3 +- 11 files changed, 49 insertions(+), 43 deletions(-) diff --git a/include/triton/Conversion/TritonGPUToLLVM/Utility.h b/include/triton/Conversion/TritonGPUToLLVM/Utility.h index fa81ba6562fd..d9ebe7ccc1e8 100644 --- a/include/triton/Conversion/TritonGPUToLLVM/Utility.h +++ b/include/triton/Conversion/TritonGPUToLLVM/Utility.h @@ -101,7 +101,7 @@ using namespace mlir::triton; #define barrier() rewriter.create(loc) #define undef(...) rewriter.create(loc, __VA_ARGS__) #define null(...) rewriter.create(loc, __VA_ARGS__) -#define call(...) rewriter.create(loc, __VA_ARGS__) +#define call(...) LLVM::createLLVMCallOp(rewriter, loc, __VA_ARGS__) // Types #define int_ty(width) rewriter.getIntegerType(width) @@ -228,9 +228,11 @@ Value createIndexConstant(OpBuilder &builder, Location loc, Value createLLVMIntegerConstant(OpBuilder &builder, Location loc, short width, int64_t value); -LLVM::CallIntrinsicOp createLLVMIntrinsicCall(OpBuilder &builder, Location loc, - StringRef intrinsic, - TypeRange types, ValueRange args); +LLVM::CallOp createLLVMCallOp(OpBuilder &builder, Location loc, + LLVMFuncOp funcOp, ValueRange args); +LLVM::CallIntrinsicOp +createLLVMIntrinsicCallOp(OpBuilder &builder, Location loc, StringRef intrinsic, + TypeRange types, ValueRange args); // Is v an integer or floating-point scalar constant equal to 0? bool isConstantZero(Value v); diff --git a/lib/Conversion/TritonGPUToLLVM/ControlFlowOpToLLVM.cpp b/lib/Conversion/TritonGPUToLLVM/ControlFlowOpToLLVM.cpp index 4d42db426838..8d5a63eb1465 100644 --- a/lib/Conversion/TritonGPUToLLVM/ControlFlowOpToLLVM.cpp +++ b/lib/Conversion/TritonGPUToLLVM/ControlFlowOpToLLVM.cpp @@ -109,6 +109,10 @@ struct CallOpConversion : public ConvertOpToLLVMPattern { auto newCallOp = rewriter.create( callOp.getLoc(), packedResult ? TypeRange(packedResult) : TypeRange(), promotedOperands, callOp->getAttrs()); + newCallOp.getProperties().setOpBundleSizes( + rewriter.getDenseI32ArrayAttr({})); + newCallOp.getProperties().setOperandSegmentSizes( + {static_cast(promotedOperands.size()), 0}); return newCallOp; } diff --git a/lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp b/lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp index 787dee35fb25..8762942c311c 100644 --- a/lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp +++ b/lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp @@ -299,7 +299,7 @@ struct MulhiUIOpConversion LLVM::LLVMFuncOp funcOp = appendOrGetExternFuncOp(rewriter, op, funcName, funcType); return { - rewriter.create(loc, funcOp, operands[0]).getResult()}; + LLVM::createLLVMCallOp(rewriter, loc, funcOp, operands[0]).getResult()}; } protected: @@ -327,7 +327,7 @@ struct ExternElementwiseOpConversion LLVM::LLVMFuncOp funcOp = appendOrGetExternFuncOp( rewriter, op, funcName, funcType, op.getLibname(), op.getLibpath()); return { - rewriter.create(loc, funcOp, operands[0]).getResult()}; + LLVM::createLLVMCallOp(rewriter, loc, funcOp, operands[0]).getResult()}; } }; diff --git a/lib/Conversion/TritonGPUToLLVM/Utility.cpp b/lib/Conversion/TritonGPUToLLVM/Utility.cpp index 22ff0c1757d8..e857dd36f6cb 100644 --- a/lib/Conversion/TritonGPUToLLVM/Utility.cpp +++ b/lib/Conversion/TritonGPUToLLVM/Utility.cpp @@ -1,8 +1,7 @@ #include "triton/Conversion/TritonGPUToLLVM/Utility.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" -#include "mlir/Dialect/LLVMIR/NVVMDialect.h" +#include "mlir/IR/Attributes.h" #include "triton/Conversion/TritonGPUToLLVM/TargetInfoBase.h" -#include "triton/Conversion/TritonGPUToLLVM/TypeConverter.h" #include "triton/Dialect/TritonGPU/IR/Attributes.h" #include "triton/Dialect/TritonGPU/IR/LinearLayoutConversions.h" #include "llvm/ADT/STLExtras.h" @@ -518,20 +517,22 @@ Value createLLVMIntegerConstant(OpBuilder &builder, Location loc, short width, builder.getIntegerAttr(ty, value)); } -LLVM::CallIntrinsicOp createLLVMIntrinsicCall(OpBuilder &builder, Location loc, - StringRef intrinsic, - TypeRange types, - ValueRange args) { - llvm::SmallVector attrs; - attrs.push_back( - builder.getNamedAttr("intrin", builder.getStringAttr(intrinsic))); - attrs.push_back(builder.getNamedAttr("op_bundle_sizes", - builder.getDenseI32ArrayAttr({}))); - attrs.push_back(builder.getNamedAttr( - "operandSegmentSizes", - builder.getDenseI32ArrayAttr({static_cast(args.size()), 0}))); - - return builder.create(loc, types, args, attrs); +LLVM::CallOp createLLVMCallOp(OpBuilder &builder, Location loc, + LLVMFuncOp funcOp, ValueRange args) { + auto op = builder.create(loc, funcOp, args); + op.getProperties().setOpBundleSizes(builder.getDenseI32ArrayAttr({})); + op.getProperties().setOperandSegmentSizes({static_cast(args.size()), 0}); + return op; +} + +LLVM::CallIntrinsicOp +createLLVMIntrinsicCallOp(OpBuilder &builder, Location loc, StringRef intrinsic, + TypeRange types, ValueRange args) { + auto op = builder.create(loc, types, args); + op.getProperties().setIntrin(builder.getStringAttr(intrinsic)); + op.getProperties().setOpBundleSizes(builder.getDenseI32ArrayAttr({})); + op.getProperties().setOperandSegmentSizes({static_cast(args.size()), 0}); + return op; } bool isConstantZero(Value v) { diff --git a/third_party/amd/lib/TritonAMDGPUToLLVM/BuiltinFuncToLLVM.cpp b/third_party/amd/lib/TritonAMDGPUToLLVM/BuiltinFuncToLLVM.cpp index 23debfe0f495..18364b67e1bd 100644 --- a/third_party/amd/lib/TritonAMDGPUToLLVM/BuiltinFuncToLLVM.cpp +++ b/third_party/amd/lib/TritonAMDGPUToLLVM/BuiltinFuncToLLVM.cpp @@ -189,8 +189,8 @@ class CallOpConversion : public mlir::RewritePattern { } else if (calleeName == "__triton_hip_fast_fdividef") { assert(operands.size() == 2); const char *intrinsic = "llvm.amdgcn.rcp.f32"; - auto rcpOp = LLVM::createLLVMIntrinsicCall(rewriter, loc, intrinsic, - returnType, operands[1]); + auto rcpOp = LLVM::createLLVMIntrinsicCallOp(rewriter, loc, intrinsic, + returnType, operands[1]); LLVM::FastmathFlagsAttr defaultFlags{}; replacementOp = rewriter.create( diff --git a/third_party/amd/lib/TritonAMDGPUToLLVM/DotOpToLLVM/WMMA.cpp b/third_party/amd/lib/TritonAMDGPUToLLVM/DotOpToLLVM/WMMA.cpp index c8907f40a166..9f575be082d9 100644 --- a/third_party/amd/lib/TritonAMDGPUToLLVM/DotOpToLLVM/WMMA.cpp +++ b/third_party/amd/lib/TritonAMDGPUToLLVM/DotOpToLLVM/WMMA.cpp @@ -220,8 +220,8 @@ Value generateWMMAIntrinsic(ConversionPatternRewriter &rewriter, Location loc, if (32 / dElType.getIntOrFloatBitWidth() > 1 || dElType.isInteger(32)) { operands.push_back(int_val(1, false)); } - auto wmmaIntrinsic = LLVM::createLLVMIntrinsicCall(rewriter, loc, name, - valC.getType(), operands); + auto wmmaIntrinsic = LLVM::createLLVMIntrinsicCallOp( + rewriter, loc, name, valC.getType(), operands); return wmmaIntrinsic.getResult(0); } diff --git a/third_party/amd/lib/TritonAMDGPUToLLVM/ElementwiseOpToLLVM.cpp b/third_party/amd/lib/TritonAMDGPUToLLVM/ElementwiseOpToLLVM.cpp index 3682f3d7dac3..47d5fbb3550d 100644 --- a/third_party/amd/lib/TritonAMDGPUToLLVM/ElementwiseOpToLLVM.cpp +++ b/third_party/amd/lib/TritonAMDGPUToLLVM/ElementwiseOpToLLVM.cpp @@ -1243,7 +1243,7 @@ struct ExpOpConversionApprox LLVM::LLVMFuncOp funcOp = appendOrGetExternFuncOp(rewriter, op, funcName, funcType); - return {rewriter.create(loc, funcOp, prod).getResult()}; + return {LLVM::createLLVMCallOp(rewriter, loc, funcOp, prod).getResult()}; } }; @@ -1276,7 +1276,7 @@ struct Exp2OpConversion appendOrGetExternFuncOp(rewriter, op, funcName, funcType); return { - rewriter.create(loc, funcOp, operands[0]).getResult()}; + LLVM::createLLVMCallOp(rewriter, loc, funcOp, operands[0]).getResult()}; } private: diff --git a/third_party/amd/lib/TritonAMDGPUToLLVM/SchedInstructions.cpp b/third_party/amd/lib/TritonAMDGPUToLLVM/SchedInstructions.cpp index fa5de9c2985e..9bed87961966 100644 --- a/third_party/amd/lib/TritonAMDGPUToLLVM/SchedInstructions.cpp +++ b/third_party/amd/lib/TritonAMDGPUToLLVM/SchedInstructions.cpp @@ -47,8 +47,8 @@ void createSchedGroupBarrier(PatternRewriter &rewriter, Location loc, Value groupId = LLVM::createConstantI32(loc, rewriter, static_cast(groupIdValue)); - LLVM::createLLVMIntrinsicCall(rewriter, loc, intrinsicName, TypeRange{}, - ValueRange{mask, size, groupId}); + LLVM::createLLVMIntrinsicCallOp(rewriter, loc, intrinsicName, TypeRange{}, + ValueRange{mask, size, groupId}); } // Insert intrinsic that controls the types of instructions that may be @@ -61,8 +61,8 @@ Operation *createSchedBarrier(PatternRewriter &rewriter, Location loc, Value mask = LLVM::createConstantI32(loc, rewriter, static_cast(maskValue)); - return LLVM::createLLVMIntrinsicCall(rewriter, loc, intrinsicName, - TypeRange{}, ValueRange{mask}); + return LLVM::createLLVMIntrinsicCallOp(rewriter, loc, intrinsicName, + TypeRange{}, ValueRange{mask}); } // Insert an experimental intrinsic for instruction group level parallelism. @@ -73,8 +73,8 @@ Operation *createIglpOpt(PatternRewriter &rewriter, Location loc, int value) { LLVM::FastmathFlagsAttr defaultFlags{}; Value iglpValue = LLVM::createConstantI32(loc, rewriter, static_cast(value)); - return LLVM::createLLVMIntrinsicCall(rewriter, loc, intrinsicName, - TypeRange{}, ValueRange{iglpValue}); + return LLVM::createLLVMIntrinsicCallOp(rewriter, loc, intrinsicName, + TypeRange{}, ValueRange{iglpValue}); } struct InstructionSchedHintsRewriter diff --git a/third_party/amd/lib/TritonAMDGPUToLLVM/TargetInfo.cpp b/third_party/amd/lib/TritonAMDGPUToLLVM/TargetInfo.cpp index ac4622e85f78..c96ddbbe8961 100644 --- a/third_party/amd/lib/TritonAMDGPUToLLVM/TargetInfo.cpp +++ b/third_party/amd/lib/TritonAMDGPUToLLVM/TargetInfo.cpp @@ -69,8 +69,8 @@ Value TargetInfo::getClusterCTAId(RewriterBase &rewriter, Location loc) const { Value TargetInfo::ballot(RewriterBase &rewriter, Location loc, Type type, Value cmp) const { - return LLVM::createLLVMIntrinsicCall(rewriter, loc, "llvm.amdgcn.ballot", - type, cmp) + return LLVM::createLLVMIntrinsicCallOp(rewriter, loc, "llvm.amdgcn.ballot", + type, cmp) ->getResult(0); } diff --git a/third_party/amd/lib/TritonAMDGPUToLLVM/Utility.cpp b/third_party/amd/lib/TritonAMDGPUToLLVM/Utility.cpp index 2e114c898ffe..e8ac2ea78a8f 100644 --- a/third_party/amd/lib/TritonAMDGPUToLLVM/Utility.cpp +++ b/third_party/amd/lib/TritonAMDGPUToLLVM/Utility.cpp @@ -231,11 +231,9 @@ Value llLoad(RewriterBase &rewriter, Location loc, Value ptr, Type elemTy, auto funcName = mangleFunc(getLoadNameRaw(cm), funcType); LLVM::LLVMFuncOp funcOp = appendOrGetExternFuncOp(rewriter, parent, funcName, funcType); - auto loadVal = - rewriter - .create(loc, funcOp, ValueRange({ptr, pred, falseVal})) - .getResult(); - return loadVal; + return LLVM::createLLVMCallOp(rewriter, loc, funcOp, + ValueRange({ptr, pred, falseVal})) + .getResult(); } void llStore(RewriterBase &rewriter, Location loc, Value ptr, Value val, @@ -276,7 +274,7 @@ void llStore(RewriterBase &rewriter, Location loc, Value ptr, Value val, auto funcName = mangleFunc(getStoreNameRaw(cm), funcType); LLVM::LLVMFuncOp funcOp = appendOrGetExternFuncOp(rewriter, parent, funcName, funcType); - rewriter.create(loc, funcOp, ValueRange({ptr, val, pred})); + LLVM::createLLVMCallOp(rewriter, loc, funcOp, ValueRange({ptr, pred, pred})); } } // namespace mlir::LLVM::AMD diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ElementwiseOpToLLVM.cpp b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ElementwiseOpToLLVM.cpp index 0b663a875422..ef69b96fce1e 100644 --- a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ElementwiseOpToLLVM.cpp +++ b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ElementwiseOpToLLVM.cpp @@ -5,6 +5,7 @@ #include "mlir/Support/LLVM.h" #include "triton/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVMBase.h" #include "triton/Conversion/TritonGPUToLLVM/PatternTritonGPUOpToLLVM.h" +#include "triton/Conversion/TritonGPUToLLVM/Utility.h" using namespace mlir::triton::gpu; @@ -912,7 +913,7 @@ struct OpToExternCallConversion LLVM::LLVMFuncOp funcOp = appendOrGetExternFuncOp(rewriter, op, funcName, funcType); return { - rewriter.create(loc, funcOp, operands[0]).getResult()}; + LLVM::createLLVMCallOp(rewriter, loc, funcOp, operands[0]).getResult()}; } private: From de1e83e70c7a2195177b3d6023401c194e036ec8 Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Fri, 4 Oct 2024 02:08:19 +0000 Subject: [PATCH 6/6] Fix --- third_party/amd/lib/TritonAMDGPUToLLVM/Utility.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/amd/lib/TritonAMDGPUToLLVM/Utility.cpp b/third_party/amd/lib/TritonAMDGPUToLLVM/Utility.cpp index e8ac2ea78a8f..542b1ecbb7fb 100644 --- a/third_party/amd/lib/TritonAMDGPUToLLVM/Utility.cpp +++ b/third_party/amd/lib/TritonAMDGPUToLLVM/Utility.cpp @@ -274,7 +274,7 @@ void llStore(RewriterBase &rewriter, Location loc, Value ptr, Value val, auto funcName = mangleFunc(getStoreNameRaw(cm), funcType); LLVM::LLVMFuncOp funcOp = appendOrGetExternFuncOp(rewriter, parent, funcName, funcType); - LLVM::createLLVMCallOp(rewriter, loc, funcOp, ValueRange({ptr, pred, pred})); + LLVM::createLLVMCallOp(rewriter, loc, funcOp, ValueRange({ptr, val, pred})); } } // namespace mlir::LLVM::AMD