We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 09be8bf commit fa7263cCopy full SHA for fa7263c
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -2610,8 +2610,8 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
2610
// waveslot limited kernel runs slower with the deallocation.
2611
if (!ReleaseVGPRInsts.empty() &&
2612
(MF.getFrameInfo().hasCalls() ||
2613
- AMDGPU::IsaInfo::getTotalNumVGPRs(ST) /
2614
- TRI->getNumUsedPhysRegs(*MRI, AMDGPU::VGPR_32RegClass) <
+ ST->getOccupancyWithNumVGPRs(
+ TRI->getNumUsedPhysRegs(*MRI, AMDGPU::VGPR_32RegClass)) <
2615
AMDGPU::IsaInfo::getMaxWavesPerEU(ST))) {
2616
for (MachineInstr *MI : ReleaseVGPRInsts) {
2617
if (ST->requiresNopBeforeDeallocVGPRs()) {
0 commit comments