Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,18 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF)
Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
WaveLimiter = WaveLimitAttr.getValueAsBool();

Attribute InstCostAttr = F.getFnAttribute("amdgpu-inst-cost");
InstCostAttr.getValueAsString().consumeInteger(0, InstCost);

Attribute MemInstCostAttr = F.getFnAttribute("amdgpu-mem-inst-cost");
MemInstCostAttr.getValueAsString().consumeInteger(0, MemInstCost);

Attribute IndirectAccessInstCostAttr = F.getFnAttribute("amdgpu-indirect-access-inst-cost");
IndirectAccessInstCostAttr.getValueAsString().consumeInteger(0, IndirectAccessInstCost);

Attribute LargeStrideInstCostAttr = F.getFnAttribute("amdgpu-large-stride-inst-cost");
LargeStrideInstCostAttr.getValueAsString().consumeInteger(0, LargeStrideInstCost);

// FIXME: How is this attribute supposed to interact with statically known
// global sizes?
StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
Expand Down
26 changes: 26 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include <cstdint>
#include <sys/types.h>

namespace llvm {

Expand Down Expand Up @@ -63,6 +65,14 @@ class AMDGPUMachineFunction : public MachineFunctionInfo {
// Kernel may need limited waves per EU for better performance.
bool WaveLimiter = false;

uint32_t InstCost = 0;

uint32_t MemInstCost = 0;

uint32_t IndirectAccessInstCost = 0;

uint32_t LargeStrideInstCost = 0;

public:
AMDGPUMachineFunction(const MachineFunction &MF);

Expand Down Expand Up @@ -102,6 +112,22 @@ class AMDGPUMachineFunction : public MachineFunctionInfo {
return WaveLimiter;
}

uint32_t getInstCost() const {
return InstCost;
}

uint32_t getMemInstCost() const {
return MemInstCost;
}

uint32_t getIndirectAccessInstCost() const {
return IndirectAccessInstCost;
}

uint32_t getLargeStrideInstCost() const {
return LargeStrideInstCost;
}

unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV);
void allocateModuleLDSGlobal(const Function &F);

Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,12 @@ bool AMDGPUPerfHint::runOnFunction(Function &F) {
Changed = true;
}

// Annotate function with stats about properties of its memory instructions.
F.addFnAttr("amdgpu-inst-cost", Twine(Info->InstCost).str());
F.addFnAttr("amdgpu-mem-inst-cost", Twine(Info->MemInstCost).str());
F.addFnAttr("amdgpu-indirect-access-inst-cost", Twine(Info->IAMInstCost).str());
F.addFnAttr("amdgpu-large-stride-inst-cost", Twine(Info->LSMInstCost).str());

return Changed;
}

Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,10 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
InstCost(MFI.getInstCost()),
MemInstCost(MFI.getMemInstCost()),
IndirectAccessInstCost(MFI.getIndirectAccessInstCost()),
LargeStrideInstCost(MFI.getLargeStrideInstCost()),
HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
Expand Down Expand Up @@ -649,6 +653,10 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
MemoryBound = YamlMFI.MemoryBound;
WaveLimiter = YamlMFI.WaveLimiter;
InstCost = YamlMFI.InstCost;
MemInstCost = YamlMFI.MemInstCost;
IndirectAccessInstCost = YamlMFI.IndirectAccessInstCost;
LargeStrideInstCost = YamlMFI.LargeStrideInstCost;
HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
BytesInStackArgArea = YamlMFI.BytesInStackArgArea;
Expand Down
12 changes: 10 additions & 2 deletions llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,10 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
bool NoSignedZerosFPMath = false;
bool MemoryBound = false;
bool WaveLimiter = false;
uint32_t InstCost = 0;
uint32_t MemInstCost = 0;
uint32_t IndirectAccessInstCost = 0;
uint32_t LargeStrideInstCost = 0;
bool HasSpilledSGPRs = false;
bool HasSpilledVGPRs = false;
uint32_t HighBitsOf32BitAddress = 0;
Expand Down Expand Up @@ -324,8 +328,12 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
YamlIO.mapOptional("hasSpilledSGPRs", MFI.HasSpilledSGPRs, false);
YamlIO.mapOptional("hasSpilledVGPRs", MFI.HasSpilledVGPRs, false);
YamlIO.mapOptional("instCost", MFI.InstCost, 0u);
YamlIO.mapOptional("memInstCost", MFI.MemInstCost, 0u);
YamlIO.mapOptional("indirectAccessInstCost", MFI.IndirectAccessInstCost, 0u);
YamlIO.mapOptional("largeStrideInstCost", MFI.LargeStrideInstCost, 0u);
YamlIO.mapOptional("hasSpilledSGPRs", MFI.HasSpilledSGPRs, 0u);
YamlIO.mapOptional("hasSpilledVGPRs", MFI.HasSpilledVGPRs, 0u);
YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
StringValue("$private_rsrc_reg"));
YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,
Expand Down