Skip to content

Commit d27a6de

Browse files
committed
[AMDGPU] Use correct SlotIndex to calculate live-out register set.
Using SlotIndexes::getMBBEndIdx() isn't the correct choice here because it returns the starting index of the next basic block, causing live-ins of the next block to be calculated instead of the intended live-outs of the current block. Add SlotIndexes::getMBBLastIdx() method to return the last valid SlotIndex within a basic block, enabling correct live-out calculations.
1 parent 2512611 commit d27a6de

File tree

4 files changed

+45
-33
lines changed

4 files changed

+45
-33
lines changed

llvm/include/llvm/CodeGen/SlotIndexes.h

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -467,16 +467,29 @@ class raw_ostream;
467467
return getMBBRange(mbb).first;
468468
}
469469

470-
/// Returns the last index in the given basic block number.
470+
/// Returns the index past the last valid index in the given basic block.
471471
SlotIndex getMBBEndIdx(unsigned Num) const {
472472
return getMBBRange(Num).second;
473473
}
474474

475-
/// Returns the last index in the given basic block.
475+
/// Returns the index past the last valid index in the given basic block.
476476
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const {
477477
return getMBBRange(mbb).second;
478478
}
479479

480+
/// Returns the last valid index in the given basic block.
481+
/// This index corresponds to the dead slot of the last non-debug
482+
/// instruction and can be used to find live-out ranges of the block. Note
483+
/// that getMBBEndIdx returns the start index of the next block, which is
484+
/// also used as the start index for segments with phi-def values. Returns
485+
/// an invalid SlotIndex if the block has no non-debug instructions.
486+
SlotIndex getMBBLastIdx(const MachineBasicBlock *MBB) const {
487+
auto LastInstrI = MBB->getLastNonDebugInstr();
488+
return LastInstrI == MBB->end()
489+
? SlotIndex()
490+
: getInstructionIndex(*LastInstrI).getDeadSlot();
491+
}
492+
480493
/// Iterator over the idx2MBBMap (sorted pairs of slot index of basic block
481494
/// begin and basic block)
482495
using MBBIndexIterator = SmallVectorImpl<IdxMBBPair>::const_iterator;

llvm/lib/Target/AMDGPU/GCNRegPressure.cpp

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,8 @@ GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI,
461461
const LiveIntervals &LIS,
462462
const MachineRegisterInfo &MRI) {
463463
GCNRPTracker::LiveRegSet LiveRegs;
464+
if (!SI.isValid())
465+
return LiveRegs;
464466
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
465467
auto Reg = Register::index2VirtReg(I);
466468
if (!LIS.hasInterval(Reg))
@@ -906,32 +908,31 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
906908

907909
SlotIndex MBBStartSlot = LIS.getSlotIndexes()->getMBBStartIdx(&MBB);
908910
SlotIndex MBBEndSlot = LIS.getSlotIndexes()->getMBBEndIdx(&MBB);
911+
SlotIndex MBBLastSlot = LIS.getSlotIndexes()->getMBBLastIdx(&MBB);
909912

910913
GCNRPTracker::LiveRegSet LiveIn, LiveOut;
911914
GCNRegPressure RPAtMBBEnd;
912915

913-
if (UseDownwardTracker) {
914-
if (MBB.empty()) {
915-
LiveIn = LiveOut = getLiveRegs(MBBStartSlot, LIS, MRI);
916-
RPAtMBBEnd = getRegPressure(MRI, LiveIn);
917-
} else {
918-
GCNDownwardRPTracker RPT(LIS);
919-
RPT.reset(MBB.front());
916+
if (!MBBLastSlot.isValid()) { // MBB doesn't have any non-debug instrs.
917+
LiveIn = LiveOut = getLiveRegs(MBBStartSlot, LIS, MRI);
918+
RPAtMBBEnd = getRegPressure(MRI, LiveIn);
919+
} else if (UseDownwardTracker) {
920+
GCNDownwardRPTracker RPT(LIS);
921+
RPT.reset(MBB.front());
920922

921-
LiveIn = RPT.getLiveRegs();
922-
923-
while (!RPT.advanceBeforeNext()) {
924-
GCNRegPressure RPBeforeMI = RPT.getPressure();
925-
RPT.advanceToNext();
926-
RP.emplace_back(RPBeforeMI, RPT.getPressure());
927-
}
923+
LiveIn = RPT.getLiveRegs();
928924

929-
LiveOut = RPT.getLiveRegs();
930-
RPAtMBBEnd = RPT.getPressure();
925+
while (!RPT.advanceBeforeNext()) {
926+
GCNRegPressure RPBeforeMI = RPT.getPressure();
927+
RPT.advanceToNext();
928+
RP.emplace_back(RPBeforeMI, RPT.getPressure());
931929
}
930+
931+
LiveOut = RPT.getLiveRegs();
932+
RPAtMBBEnd = RPT.getPressure();
932933
} else {
933934
GCNUpwardRPTracker RPT(LIS);
934-
RPT.reset(MRI, MBBEndSlot);
935+
RPT.reset(MRI, MBBLastSlot);
935936

936937
LiveOut = RPT.getLiveRegs();
937938
RPAtMBBEnd = RPT.getPressure();
@@ -965,8 +966,8 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
965966
OS << printRP(RPAtMBBEnd) << '\n';
966967

967968
OS << PFX " Live-out:" << llvm::print(LiveOut, MRI);
968-
if (UseDownwardTracker)
969-
ReportLISMismatchIfAny(LiveOut, getLiveRegs(MBBEndSlot, LIS, MRI));
969+
if (UseDownwardTracker && MBBLastSlot.isValid())
970+
ReportLISMismatchIfAny(LiveOut, getLiveRegs(MBBLastSlot, LIS, MRI));
970971

971972
GCNRPTracker::LiveRegSet LiveThrough;
972973
for (auto [Reg, Mask] : LiveIn) {

llvm/lib/Target/AMDGPU/GCNRegPressure.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -313,8 +313,8 @@ class GCNUpwardRPTracker : public GCNRPTracker {
313313

314314
/// reset tracker to the end of the \p MBB.
315315
void reset(const MachineBasicBlock &MBB) {
316-
reset(MBB.getParent()->getRegInfo(),
317-
LIS.getSlotIndexes()->getMBBEndIdx(&MBB));
316+
SlotIndex MBBLastSlot = LIS.getSlotIndexes()->getMBBLastIdx(&MBB);
317+
reset(MBB.getParent()->getRegInfo(), MBBLastSlot);
318318
}
319319

320320
/// reset tracker to the point just after \p MI (in program order).

llvm/test/CodeGen/AMDGPU/regpressure_printer.mir

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -510,14 +510,14 @@ body: |
510510
; RPU-NEXT: 0 0 $sgpr0 = S_BUFFER_LOAD_DWORD_IMM $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0
511511
; RPU-NEXT: 0 0
512512
; RPU-NEXT: 0 1 undef %0.sub5:vreg_512 = V_MOV_B32_e32 5, implicit $exec
513-
; RPU-NEXT: 0 0
514-
; RPU-NEXT: 0 0 S_CMP_GT_U32 $sgpr0, 15, implicit-def $scc
515-
; RPU-NEXT: 0 0
516-
; RPU-NEXT: 0 0 S_CBRANCH_SCC1 %bb.2, implicit $scc
517-
; RPU-NEXT: 0 0
518-
; RPU-NEXT: 0 0 S_BRANCH %bb.1
519-
; RPU-NEXT: 0 0
520-
; RPU-NEXT: Live-out:
513+
; RPU-NEXT: 0 1
514+
; RPU-NEXT: 0 1 S_CMP_GT_U32 $sgpr0, 15, implicit-def $scc
515+
; RPU-NEXT: 0 1
516+
; RPU-NEXT: 0 1 S_CBRANCH_SCC1 %bb.2, implicit $scc
517+
; RPU-NEXT: 0 1
518+
; RPU-NEXT: 0 1 S_BRANCH %bb.1
519+
; RPU-NEXT: 0 1
520+
; RPU-NEXT: Live-out: %0:0000000000000C00
521521
; RPU-NEXT: Live-thr:
522522
; RPU-NEXT: 0 0
523523
; RPU-NEXT: bb.1:
@@ -571,8 +571,6 @@ body: |
571571
; RPD-NEXT: 0 1 S_BRANCH %bb.1
572572
; RPD-NEXT: 0 1
573573
; RPD-NEXT: Live-out: %0:0000000000000C00
574-
; RPD-NEXT: mis LIS:
575-
; RPD-NEXT: %0:L0000000000000C00 isn't found in LIS reported set
576574
; RPD-NEXT: Live-thr:
577575
; RPD-NEXT: 0 0
578576
; RPD-NEXT: bb.1:

0 commit comments

Comments
 (0)