diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp index f5e12fd960d0b..55f7d56af71c0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -34,6 +34,18 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter"); WaveLimiter = WaveLimitAttr.getValueAsBool(); + Attribute InstCostAttr = F.getFnAttribute("amdgpu-inst-cost"); + InstCostAttr.getValueAsString().consumeInteger(0, InstCost); + + Attribute MemInstCostAttr = F.getFnAttribute("amdgpu-mem-inst-cost"); + MemInstCostAttr.getValueAsString().consumeInteger(0, MemInstCost); + + Attribute IndirectAccessInstCostAttr = F.getFnAttribute("amdgpu-indirect-access-inst-cost"); + IndirectAccessInstCostAttr.getValueAsString().consumeInteger(0, IndirectAccessInstCost); + + Attribute LargeStrideInstCostAttr = F.getFnAttribute("amdgpu-large-stride-inst-cost"); + LargeStrideInstCostAttr.getValueAsString().consumeInteger(0, LargeStrideInstCost); + // FIXME: How is this attribute supposed to interact with statically known // global sizes? StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h index 97db8b7eb8d6b..3d9586c8c99c0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -17,6 +17,8 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include +#include namespace llvm { @@ -63,6 +65,14 @@ class AMDGPUMachineFunction : public MachineFunctionInfo { // Kernel may need limited waves per EU for better performance. bool WaveLimiter = false; + uint32_t InstCost = 0; + + uint32_t MemInstCost = 0; + + uint32_t IndirectAccessInstCost = 0; + + uint32_t LargeStrideInstCost = 0; + public: AMDGPUMachineFunction(const MachineFunction &MF); @@ -102,6 +112,22 @@ class AMDGPUMachineFunction : public MachineFunctionInfo { return WaveLimiter; } + uint32_t getInstCost() const { + return InstCost; + } + + uint32_t getMemInstCost() const { + return MemInstCost; + } + + uint32_t getIndirectAccessInstCost() const { + return IndirectAccessInstCost; + } + + uint32_t getLargeStrideInstCost() const { + return LargeStrideInstCost; + } + unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV); void allocateModuleLDSGlobal(const Function &F); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp index 94cd6047290b2..df28af1b87467 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp @@ -317,6 +317,12 @@ bool AMDGPUPerfHint::runOnFunction(Function &F) { Changed = true; } + // Annotate function with stats about properties of its memory instructions. + F.addFnAttr("amdgpu-inst-cost", Twine(Info->InstCost).str()); + F.addFnAttr("amdgpu-mem-inst-cost", Twine(Info->MemInstCost).str()); + F.addFnAttr("amdgpu-indirect-access-inst-cost", Twine(Info->IAMInstCost).str()); + F.addFnAttr("amdgpu-large-stride-inst-cost", Twine(Info->LSMInstCost).str()); + return Changed; } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 7108bdca93759..3718ea8f38f0c 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -611,6 +611,10 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()), NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()), MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()), + InstCost(MFI.getInstCost()), + MemInstCost(MFI.getMemInstCost()), + IndirectAccessInstCost(MFI.getIndirectAccessInstCost()), + LargeStrideInstCost(MFI.getLargeStrideInstCost()), HasSpilledSGPRs(MFI.hasSpilledSGPRs()), HasSpilledVGPRs(MFI.hasSpilledVGPRs()), HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()), @@ -649,6 +653,10 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields( NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath; MemoryBound = YamlMFI.MemoryBound; WaveLimiter = YamlMFI.WaveLimiter; + InstCost = YamlMFI.InstCost; + MemInstCost = YamlMFI.MemInstCost; + IndirectAccessInstCost = YamlMFI.IndirectAccessInstCost; + LargeStrideInstCost = YamlMFI.LargeStrideInstCost; HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs; HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs; BytesInStackArgArea = YamlMFI.BytesInStackArgArea; diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index ff7fee8ba741a..3d7d86758bccd 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -282,6 +282,10 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { bool NoSignedZerosFPMath = false; bool MemoryBound = false; bool WaveLimiter = false; + uint32_t InstCost = 0; + uint32_t MemInstCost = 0; + uint32_t IndirectAccessInstCost = 0; + uint32_t LargeStrideInstCost = 0; bool HasSpilledSGPRs = false; bool HasSpilledVGPRs = false; uint32_t HighBitsOf32BitAddress = 0; @@ -324,8 +328,12 @@ template <> struct MappingTraits { YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false); YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false); YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false); - YamlIO.mapOptional("hasSpilledSGPRs", MFI.HasSpilledSGPRs, false); - YamlIO.mapOptional("hasSpilledVGPRs", MFI.HasSpilledVGPRs, false); + YamlIO.mapOptional("instCost", MFI.InstCost, 0u); + YamlIO.mapOptional("memInstCost", MFI.MemInstCost, 0u); + YamlIO.mapOptional("indirectAccessInstCost", MFI.IndirectAccessInstCost, 0u); + YamlIO.mapOptional("largeStrideInstCost", MFI.LargeStrideInstCost, 0u); + YamlIO.mapOptional("hasSpilledSGPRs", MFI.HasSpilledSGPRs, 0u); + YamlIO.mapOptional("hasSpilledVGPRs", MFI.HasSpilledVGPRs, 0u); YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg, StringValue("$private_rsrc_reg")); YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,