@@ -30,8 +30,9 @@ using namespace llvm::AMDGPU;
3030
3131#define DEBUG_TYPE " amdgpu-resource-usage"
3232
33- char llvm::AMDGPUResourceUsageAnalysis::ID = 0 ;
34- char &llvm::AMDGPUResourceUsageAnalysisID = AMDGPUResourceUsageAnalysis::ID;
33+ char llvm::AMDGPUResourceUsageAnalysisWrapperPass::ID = 0 ;
34+ char &llvm::AMDGPUResourceUsageAnalysisID =
35+ AMDGPUResourceUsageAnalysisWrapperPass::ID;
3536
3637// In code object v4 and older, we need to tell the runtime some amount ahead of
3738// time if we don't know the true stack size. Assume a smaller number if this is
@@ -47,7 +48,7 @@ static cl::opt<uint32_t> clAssumedStackSizeForDynamicSizeObjects(
4748 " variable sized objects (in bytes)" ),
4849 cl::Hidden, cl::init(4096 ));
4950
50- INITIALIZE_PASS (AMDGPUResourceUsageAnalysis , DEBUG_TYPE,
51+ INITIALIZE_PASS (AMDGPUResourceUsageAnalysisWrapperPass , DEBUG_TYPE,
5152 " Function register usage analysis" , true , true )
5253
5354static const Function *getCalleeFunction(const MachineOperand &Op) {
@@ -68,7 +69,8 @@ static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
6869 return false ;
6970}
7071
71- bool AMDGPUResourceUsageAnalysis::runOnMachineFunction (MachineFunction &MF) {
72+ bool AMDGPUResourceUsageAnalysisWrapperPass::runOnMachineFunction (
73+ MachineFunction &MF) {
7274 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
7375 if (!TPC)
7476 return false ;
@@ -90,14 +92,40 @@ bool AMDGPUResourceUsageAnalysis::runOnMachineFunction(MachineFunction &MF) {
9092 AssumedStackSizeForExternalCall = 0 ;
9193 }
9294
93- ResourceInfo = analyzeResourceUsage (MF, AssumedStackSizeForDynamicSizeObjects,
94- AssumedStackSizeForExternalCall);
95+ ResourceInfo = AMDGPUResourceUsageAnalysisImpl ().analyzeResourceUsage (
96+ MF, AssumedStackSizeForDynamicSizeObjects,
97+ AssumedStackSizeForExternalCall);
9598
9699 return false ;
97100}
98101
99- AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo
100- AMDGPUResourceUsageAnalysis::analyzeResourceUsage (
102+ AnalysisKey AMDGPUResourceUsageAnalysis::Key;
103+ AMDGPUResourceUsageAnalysis::Result
104+ AMDGPUResourceUsageAnalysis::run (MachineFunction &MF,
105+ MachineFunctionAnalysisManager &MFAM) {
106+ const MCSubtargetInfo &STI = *TM.getMCSubtargetInfo ();
107+
108+ // By default, for code object v5 and later, track only the minimum scratch
109+ // size
110+ uint32_t AssumedStackSizeForDynamicSizeObjects =
111+ clAssumedStackSizeForDynamicSizeObjects;
112+ uint32_t AssumedStackSizeForExternalCall = clAssumedStackSizeForExternalCall;
113+ if (AMDGPU::getAMDHSACodeObjectVersion (*MF.getFunction ().getParent ()) >=
114+ AMDGPU::AMDHSA_COV5 ||
115+ STI.getTargetTriple ().getOS () == Triple::AMDPAL) {
116+ if (!clAssumedStackSizeForDynamicSizeObjects.getNumOccurrences ())
117+ AssumedStackSizeForDynamicSizeObjects = 0 ;
118+ if (!clAssumedStackSizeForExternalCall.getNumOccurrences ())
119+ AssumedStackSizeForExternalCall = 0 ;
120+ }
121+
122+ return AMDGPUResourceUsageAnalysisImpl ().analyzeResourceUsage (
123+ MF, AssumedStackSizeForDynamicSizeObjects,
124+ AssumedStackSizeForExternalCall);
125+ }
126+
127+ AMDGPUResourceUsageAnalysisImpl::SIFunctionResourceInfo
128+ AMDGPUResourceUsageAnalysisImpl::analyzeResourceUsage (
101129 const MachineFunction &MF, uint32_t AssumedStackSizeForDynamicSizeObjects,
102130 uint32_t AssumedStackSizeForExternalCall) const {
103131 SIFunctionResourceInfo Info;
0 commit comments