diff --git a/cmake/llvm-hash.txt b/cmake/llvm-hash.txt index 36344442bd3a..454a94e1f4cb 100644 --- a/cmake/llvm-hash.txt +++ b/cmake/llvm-hash.txt @@ -1 +1 @@ -df0864e761107b07e38f5503e0cbee0cebb4c5e8 +61f8a7f618901797ee8663389a29722f29216a96 diff --git a/include/triton/Conversion/TritonGPUToLLVM/Utility.h b/include/triton/Conversion/TritonGPUToLLVM/Utility.h index 1862c03b9c7e..d9ebe7ccc1e8 100644 --- a/include/triton/Conversion/TritonGPUToLLVM/Utility.h +++ b/include/triton/Conversion/TritonGPUToLLVM/Utility.h @@ -101,7 +101,7 @@ using namespace mlir::triton; #define barrier() rewriter.create(loc) #define undef(...) rewriter.create(loc, __VA_ARGS__) #define null(...) rewriter.create(loc, __VA_ARGS__) -#define call(...) rewriter.create(loc, __VA_ARGS__) +#define call(...) LLVM::createLLVMCallOp(rewriter, loc, __VA_ARGS__) // Types #define int_ty(width) rewriter.getIntegerType(width) @@ -228,6 +228,12 @@ Value createIndexConstant(OpBuilder &builder, Location loc, Value createLLVMIntegerConstant(OpBuilder &builder, Location loc, short width, int64_t value); +LLVM::CallOp createLLVMCallOp(OpBuilder &builder, Location loc, + LLVMFuncOp funcOp, ValueRange args); +LLVM::CallIntrinsicOp +createLLVMIntrinsicCallOp(OpBuilder &builder, Location loc, StringRef intrinsic, + TypeRange types, ValueRange args); + // Is v an integer or floating-point scalar constant equal to 0? bool isConstantZero(Value v); diff --git a/lib/Conversion/TritonGPUToLLVM/ControlFlowOpToLLVM.cpp b/lib/Conversion/TritonGPUToLLVM/ControlFlowOpToLLVM.cpp index 4d42db426838..8d5a63eb1465 100644 --- a/lib/Conversion/TritonGPUToLLVM/ControlFlowOpToLLVM.cpp +++ b/lib/Conversion/TritonGPUToLLVM/ControlFlowOpToLLVM.cpp @@ -109,6 +109,10 @@ struct CallOpConversion : public ConvertOpToLLVMPattern { auto newCallOp = rewriter.create( callOp.getLoc(), packedResult ? TypeRange(packedResult) : TypeRange(), promotedOperands, callOp->getAttrs()); + newCallOp.getProperties().setOpBundleSizes( + rewriter.getDenseI32ArrayAttr({})); + newCallOp.getProperties().setOperandSegmentSizes( + {static_cast(promotedOperands.size()), 0}); return newCallOp; } diff --git a/lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp b/lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp index 787dee35fb25..8762942c311c 100644 --- a/lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp +++ b/lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp @@ -299,7 +299,7 @@ struct MulhiUIOpConversion LLVM::LLVMFuncOp funcOp = appendOrGetExternFuncOp(rewriter, op, funcName, funcType); return { - rewriter.create(loc, funcOp, operands[0]).getResult()}; + LLVM::createLLVMCallOp(rewriter, loc, funcOp, operands[0]).getResult()}; } protected: @@ -327,7 +327,7 @@ struct ExternElementwiseOpConversion LLVM::LLVMFuncOp funcOp = appendOrGetExternFuncOp( rewriter, op, funcName, funcType, op.getLibname(), op.getLibpath()); return { - rewriter.create(loc, funcOp, operands[0]).getResult()}; + LLVM::createLLVMCallOp(rewriter, loc, funcOp, operands[0]).getResult()}; } }; diff --git a/lib/Conversion/TritonGPUToLLVM/Utility.cpp b/lib/Conversion/TritonGPUToLLVM/Utility.cpp index 9de4434528fa..e857dd36f6cb 100644 --- a/lib/Conversion/TritonGPUToLLVM/Utility.cpp +++ b/lib/Conversion/TritonGPUToLLVM/Utility.cpp @@ -1,8 +1,7 @@ #include "triton/Conversion/TritonGPUToLLVM/Utility.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" -#include "mlir/Dialect/LLVMIR/NVVMDialect.h" +#include "mlir/IR/Attributes.h" #include "triton/Conversion/TritonGPUToLLVM/TargetInfoBase.h" -#include "triton/Conversion/TritonGPUToLLVM/TypeConverter.h" #include "triton/Dialect/TritonGPU/IR/Attributes.h" #include "triton/Dialect/TritonGPU/IR/LinearLayoutConversions.h" #include "llvm/ADT/STLExtras.h" @@ -518,6 +517,24 @@ Value createLLVMIntegerConstant(OpBuilder &builder, Location loc, short width, builder.getIntegerAttr(ty, value)); } +LLVM::CallOp createLLVMCallOp(OpBuilder &builder, Location loc, + LLVMFuncOp funcOp, ValueRange args) { + auto op = builder.create(loc, funcOp, args); + op.getProperties().setOpBundleSizes(builder.getDenseI32ArrayAttr({})); + op.getProperties().setOperandSegmentSizes({static_cast(args.size()), 0}); + return op; +} + +LLVM::CallIntrinsicOp +createLLVMIntrinsicCallOp(OpBuilder &builder, Location loc, StringRef intrinsic, + TypeRange types, ValueRange args) { + auto op = builder.create(loc, types, args); + op.getProperties().setIntrin(builder.getStringAttr(intrinsic)); + op.getProperties().setOpBundleSizes(builder.getDenseI32ArrayAttr({})); + op.getProperties().setOperandSegmentSizes({static_cast(args.size()), 0}); + return op; +} + bool isConstantZero(Value v) { if (auto constantOp = v.getDefiningOp()) { if (auto attr = dyn_cast(constantOp.getValue())) { diff --git a/third_party/amd/lib/TritonAMDGPUToLLVM/BuiltinFuncToLLVM.cpp b/third_party/amd/lib/TritonAMDGPUToLLVM/BuiltinFuncToLLVM.cpp index edf74cea4259..18364b67e1bd 100644 --- a/third_party/amd/lib/TritonAMDGPUToLLVM/BuiltinFuncToLLVM.cpp +++ b/third_party/amd/lib/TritonAMDGPUToLLVM/BuiltinFuncToLLVM.cpp @@ -4,6 +4,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "triton/Conversion/TritonGPUToLLVM/Utility.h" namespace mlir { namespace triton { @@ -187,11 +188,11 @@ class CallOpConversion : public mlir::RewritePattern { rewriter.create(loc, returnType, op->getResult(0)); } else if (calleeName == "__triton_hip_fast_fdividef") { assert(operands.size() == 2); - auto name = StringAttr::get(callOp.getContext(), "llvm.amdgcn.rcp.f32"); - LLVM::FastmathFlagsAttr defaultFlags{}; - auto rcpOp = rewriter.create( - loc, returnType, name, operands[1], defaultFlags); + const char *intrinsic = "llvm.amdgcn.rcp.f32"; + auto rcpOp = LLVM::createLLVMIntrinsicCallOp(rewriter, loc, intrinsic, + returnType, operands[1]); + LLVM::FastmathFlagsAttr defaultFlags{}; replacementOp = rewriter.create( loc, returnType, operands[0], rcpOp->getResult(0), defaultFlags); } diff --git a/third_party/amd/lib/TritonAMDGPUToLLVM/DotOpToLLVM/WMMA.cpp b/third_party/amd/lib/TritonAMDGPUToLLVM/DotOpToLLVM/WMMA.cpp index 9368443255f0..9f575be082d9 100644 --- a/third_party/amd/lib/TritonAMDGPUToLLVM/DotOpToLLVM/WMMA.cpp +++ b/third_party/amd/lib/TritonAMDGPUToLLVM/DotOpToLLVM/WMMA.cpp @@ -24,6 +24,7 @@ #include "../PatternTritonGPUOpToLLVM.h" #include "Utility.h" #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" +#include "triton/Conversion/TritonGPUToLLVM/Utility.h" namespace mlir::triton::AMD { namespace { @@ -219,10 +220,8 @@ Value generateWMMAIntrinsic(ConversionPatternRewriter &rewriter, Location loc, if (32 / dElType.getIntOrFloatBitWidth() > 1 || dElType.isInteger(32)) { operands.push_back(int_val(1, false)); } - auto wmmaIntrinsic = rewriter.create( - loc, TypeRange{valC.getType()}, StringAttr::get(loc.getContext(), name), - operands, defaultFlags); - + auto wmmaIntrinsic = LLVM::createLLVMIntrinsicCallOp( + rewriter, loc, name, valC.getType(), operands); return wmmaIntrinsic.getResult(0); } diff --git a/third_party/amd/lib/TritonAMDGPUToLLVM/ElementwiseOpToLLVM.cpp b/third_party/amd/lib/TritonAMDGPUToLLVM/ElementwiseOpToLLVM.cpp index 3682f3d7dac3..47d5fbb3550d 100644 --- a/third_party/amd/lib/TritonAMDGPUToLLVM/ElementwiseOpToLLVM.cpp +++ b/third_party/amd/lib/TritonAMDGPUToLLVM/ElementwiseOpToLLVM.cpp @@ -1243,7 +1243,7 @@ struct ExpOpConversionApprox LLVM::LLVMFuncOp funcOp = appendOrGetExternFuncOp(rewriter, op, funcName, funcType); - return {rewriter.create(loc, funcOp, prod).getResult()}; + return {LLVM::createLLVMCallOp(rewriter, loc, funcOp, prod).getResult()}; } }; @@ -1276,7 +1276,7 @@ struct Exp2OpConversion appendOrGetExternFuncOp(rewriter, op, funcName, funcType); return { - rewriter.create(loc, funcOp, operands[0]).getResult()}; + LLVM::createLLVMCallOp(rewriter, loc, funcOp, operands[0]).getResult()}; } private: diff --git a/third_party/amd/lib/TritonAMDGPUToLLVM/SchedInstructions.cpp b/third_party/amd/lib/TritonAMDGPUToLLVM/SchedInstructions.cpp index c9413a52f515..9bed87961966 100644 --- a/third_party/amd/lib/TritonAMDGPUToLLVM/SchedInstructions.cpp +++ b/third_party/amd/lib/TritonAMDGPUToLLVM/SchedInstructions.cpp @@ -38,7 +38,7 @@ void createSchedGroupBarrier(PatternRewriter &rewriter, Location loc, InstructionKindMask maskValue, int sizeValue, int groupIdValue) { MLIRContext *ctx = rewriter.getContext(); - auto intrinsicName = str_attr("llvm.amdgcn.sched.group.barrier"); + const char *intrinsicName = "llvm.amdgcn.sched.group.barrier"; Value mask = LLVM::createConstantI32(loc, rewriter, static_cast(maskValue)); @@ -47,10 +47,8 @@ void createSchedGroupBarrier(PatternRewriter &rewriter, Location loc, Value groupId = LLVM::createConstantI32(loc, rewriter, static_cast(groupIdValue)); - LLVM::FastmathFlagsAttr defaultFlags{}; - rewriter.create(loc, TypeRange{}, intrinsicName, - ValueRange{mask, size, groupId}, - defaultFlags); + LLVM::createLLVMIntrinsicCallOp(rewriter, loc, intrinsicName, TypeRange{}, + ValueRange{mask, size, groupId}); } // Insert intrinsic that controls the types of instructions that may be @@ -58,25 +56,25 @@ void createSchedGroupBarrier(PatternRewriter &rewriter, Location loc, Operation *createSchedBarrier(PatternRewriter &rewriter, Location loc, int64_t maskValue) { MLIRContext *ctx = rewriter.getContext(); - auto intrinsicName = str_attr("llvm.amdgcn.sched.barrier"); + const char *intrinsicName = "llvm.amdgcn.sched.barrier"; LLVM::FastmathFlagsAttr defaultFlags{}; Value mask = LLVM::createConstantI32(loc, rewriter, static_cast(maskValue)); - return rewriter.create(loc, TypeRange{}, intrinsicName, - ValueRange{mask}, defaultFlags); + return LLVM::createLLVMIntrinsicCallOp(rewriter, loc, intrinsicName, + TypeRange{}, ValueRange{mask}); } // Insert an experimental intrinsic for instruction group level parallelism. // The intrinsic takes a value that specifies the strategy. Operation *createIglpOpt(PatternRewriter &rewriter, Location loc, int value) { MLIRContext *ctx = rewriter.getContext(); - auto intrinsicName = str_attr("llvm.amdgcn.iglp.opt"); + const char *intrinsicName = "llvm.amdgcn.iglp.opt"; LLVM::FastmathFlagsAttr defaultFlags{}; Value iglpValue = LLVM::createConstantI32(loc, rewriter, static_cast(value)); - return rewriter.create( - loc, TypeRange{}, intrinsicName, ValueRange{iglpValue}, defaultFlags); + return LLVM::createLLVMIntrinsicCallOp(rewriter, loc, intrinsicName, + TypeRange{}, ValueRange{iglpValue}); } struct InstructionSchedHintsRewriter diff --git a/third_party/amd/lib/TritonAMDGPUToLLVM/TargetInfo.cpp b/third_party/amd/lib/TritonAMDGPUToLLVM/TargetInfo.cpp index 8462eb5fc9b8..c96ddbbe8961 100644 --- a/third_party/amd/lib/TritonAMDGPUToLLVM/TargetInfo.cpp +++ b/third_party/amd/lib/TritonAMDGPUToLLVM/TargetInfo.cpp @@ -69,12 +69,9 @@ Value TargetInfo::getClusterCTAId(RewriterBase &rewriter, Location loc) const { Value TargetInfo::ballot(RewriterBase &rewriter, Location loc, Type type, Value cmp) const { - auto stringAttr = rewriter.getStringAttr("llvm.amdgcn.ballot"); - SmallVector operands = {cmp}; - Value asmResult = - rewriter.create(loc, type, stringAttr, operands) - ->getResult(0); - return asmResult; + return LLVM::createLLVMIntrinsicCallOp(rewriter, loc, "llvm.amdgcn.ballot", + type, cmp) + ->getResult(0); } void TargetInfo::storeDShared(RewriterBase &rewriter, Location loc, Value ptr, diff --git a/third_party/amd/lib/TritonAMDGPUToLLVM/Utility.cpp b/third_party/amd/lib/TritonAMDGPUToLLVM/Utility.cpp index 2e114c898ffe..542b1ecbb7fb 100644 --- a/third_party/amd/lib/TritonAMDGPUToLLVM/Utility.cpp +++ b/third_party/amd/lib/TritonAMDGPUToLLVM/Utility.cpp @@ -231,11 +231,9 @@ Value llLoad(RewriterBase &rewriter, Location loc, Value ptr, Type elemTy, auto funcName = mangleFunc(getLoadNameRaw(cm), funcType); LLVM::LLVMFuncOp funcOp = appendOrGetExternFuncOp(rewriter, parent, funcName, funcType); - auto loadVal = - rewriter - .create(loc, funcOp, ValueRange({ptr, pred, falseVal})) - .getResult(); - return loadVal; + return LLVM::createLLVMCallOp(rewriter, loc, funcOp, + ValueRange({ptr, pred, falseVal})) + .getResult(); } void llStore(RewriterBase &rewriter, Location loc, Value ptr, Value val, @@ -276,7 +274,7 @@ void llStore(RewriterBase &rewriter, Location loc, Value ptr, Value val, auto funcName = mangleFunc(getStoreNameRaw(cm), funcType); LLVM::LLVMFuncOp funcOp = appendOrGetExternFuncOp(rewriter, parent, funcName, funcType); - rewriter.create(loc, funcOp, ValueRange({ptr, val, pred})); + LLVM::createLLVMCallOp(rewriter, loc, funcOp, ValueRange({ptr, val, pred})); } } // namespace mlir::LLVM::AMD diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ElementwiseOpToLLVM.cpp b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ElementwiseOpToLLVM.cpp index 0b663a875422..ef69b96fce1e 100644 --- a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ElementwiseOpToLLVM.cpp +++ b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ElementwiseOpToLLVM.cpp @@ -5,6 +5,7 @@ #include "mlir/Support/LLVM.h" #include "triton/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVMBase.h" #include "triton/Conversion/TritonGPUToLLVM/PatternTritonGPUOpToLLVM.h" +#include "triton/Conversion/TritonGPUToLLVM/Utility.h" using namespace mlir::triton::gpu; @@ -912,7 +913,7 @@ struct OpToExternCallConversion LLVM::LLVMFuncOp funcOp = appendOrGetExternFuncOp(rewriter, op, funcName, funcType); return { - rewriter.create(loc, funcOp, operands[0]).getResult()}; + LLVM::createLLVMCallOp(rewriter, loc, funcOp, operands[0]).getResult()}; } private: