JoshHuttonCode · kerbowa · May 26, 2023
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -34,6 +34,18 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF)
   Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
   WaveLimiter = WaveLimitAttr.getValueAsBool();
 
+  Attribute InstCostAttr = F.getFnAttribute("amdgpu-inst-cost");
+  InstCostAttr.getValueAsString().consumeInteger(0, InstCost);
+
+  Attribute MemInstCostAttr = F.getFnAttribute("amdgpu-mem-inst-cost");
+  MemInstCostAttr.getValueAsString().consumeInteger(0, MemInstCost);
+
+  Attribute IndirectAccessInstCostAttr = F.getFnAttribute("amdgpu-indirect-access-inst-cost");
+  IndirectAccessInstCostAttr.getValueAsString().consumeInteger(0, IndirectAccessInstCost);
+
+  Attribute LargeStrideInstCostAttr = F.getFnAttribute("amdgpu-large-stride-inst-cost");
+  LargeStrideInstCostAttr.getValueAsString().consumeInteger(0, LargeStrideInstCost);
+
   // FIXME: How is this attribute supposed to interact with statically known
   // global sizes?
   StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -17,6 +17,8 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
+#include <cstdint>
+#include <sys/types.h>
 
 namespace llvm {
 
@@ -63,6 +65,14 @@ class AMDGPUMachineFunction : public MachineFunctionInfo {
   // Kernel may need limited waves per EU for better performance.
   bool WaveLimiter = false;
 
+  uint32_t InstCost = 0;
+
+  uint32_t MemInstCost = 0;
+
+  uint32_t IndirectAccessInstCost = 0;
+
+  uint32_t LargeStrideInstCost = 0;
+
 public:
   AMDGPUMachineFunction(const MachineFunction &MF);
 
@@ -102,6 +112,22 @@ class AMDGPUMachineFunction : public MachineFunctionInfo {
     return WaveLimiter;
   }
 
+  uint32_t getInstCost() const {
+    return InstCost;
+  }
+
+  uint32_t getMemInstCost() const {
+    return MemInstCost;
+  }
+
+  uint32_t getIndirectAccessInstCost() const {
+    return IndirectAccessInstCost;
+  }
+
+  uint32_t getLargeStrideInstCost() const {
+    return LargeStrideInstCost;
+  }
+
   unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV);
   void allocateModuleLDSGlobal(const Function &F);
 

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
@@ -317,6 +317,12 @@ bool AMDGPUPerfHint::runOnFunction(Function &F) {
     Changed = true;
   }
 
+  // Annotate function with stats about properties of its memory instructions.
+  F.addFnAttr("amdgpu-inst-cost", Twine(Info->InstCost).str());
+  F.addFnAttr("amdgpu-mem-inst-cost", Twine(Info->MemInstCost).str());
+  F.addFnAttr("amdgpu-indirect-access-inst-cost", Twine(Info->IAMInstCost).str());
+  F.addFnAttr("amdgpu-large-stride-inst-cost", Twine(Info->LSMInstCost).str());
+
   return Changed;
 }
 

diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -611,6 +611,10 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
       DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
       NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
       MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
+      InstCost(MFI.getInstCost()),
+      MemInstCost(MFI.getMemInstCost()),
+      IndirectAccessInstCost(MFI.getIndirectAccessInstCost()),
+      LargeStrideInstCost(MFI.getLargeStrideInstCost()),
       HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
       HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
       HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
@@ -649,6 +653,10 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
   NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
   MemoryBound = YamlMFI.MemoryBound;
   WaveLimiter = YamlMFI.WaveLimiter;
+  InstCost = YamlMFI.InstCost;
+  MemInstCost = YamlMFI.MemInstCost;
+  IndirectAccessInstCost = YamlMFI.IndirectAccessInstCost;
+  LargeStrideInstCost = YamlMFI.LargeStrideInstCost;
   HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
   HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
   BytesInStackArgArea = YamlMFI.BytesInStackArgArea;

diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -282,6 +282,10 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
   bool NoSignedZerosFPMath = false;
   bool MemoryBound = false;
   bool WaveLimiter = false;
+  uint32_t InstCost = 0;
+  uint32_t MemInstCost = 0;
+  uint32_t IndirectAccessInstCost = 0;
+  uint32_t LargeStrideInstCost = 0;
   bool HasSpilledSGPRs = false;
   bool HasSpilledVGPRs = false;
   uint32_t HighBitsOf32BitAddress = 0;
@@ -324,8 +328,12 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
     YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
     YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
     YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
-    YamlIO.mapOptional("hasSpilledSGPRs", MFI.HasSpilledSGPRs, false);
-    YamlIO.mapOptional("hasSpilledVGPRs", MFI.HasSpilledVGPRs, false);
+    YamlIO.mapOptional("instCost", MFI.InstCost, 0u);
+    YamlIO.mapOptional("memInstCost", MFI.MemInstCost, 0u);
+    YamlIO.mapOptional("indirectAccessInstCost", MFI.IndirectAccessInstCost, 0u);
+    YamlIO.mapOptional("largeStrideInstCost", MFI.LargeStrideInstCost, 0u);
+    YamlIO.mapOptional("hasSpilledSGPRs", MFI.HasSpilledSGPRs, 0u);
+    YamlIO.mapOptional("hasSpilledVGPRs", MFI.HasSpilledVGPRs, 0u);
     YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
                        StringValue("$private_rsrc_reg"));
     YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,