Skip to content

Commit 88005df

Browse files
committed
Revert "Revert "Reland "RegisterCoalescer: Add implicit-def of super register when coalescing SUBREG_TO_REG" (#134408)""
This reverts commit ed5bd23.
1 parent 456b051 commit 88005df

24 files changed

+1417
-72
lines changed

llvm/lib/CodeGen/RegisterCoalescer.cpp

Lines changed: 164 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,12 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate {
307307
/// number if it is not zero. If DstReg is a physical register and the
308308
/// existing subregister number of the def / use being updated is not zero,
309309
/// make sure to set it to the correct physical subregister.
310-
void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
310+
///
311+
/// If \p SubregToRegSrcInst is not empty, we are coalescing a
312+
/// `DstReg = SUBREG_TO_REG SrcReg`, which should introduce an
313+
/// implicit-def of DstReg on instructions that define SrcReg.
314+
void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx,
315+
ArrayRef<MachineInstr *> SubregToRegSrcInst = {});
311316

312317
/// If the given machine operand reads only undefined lanes add an undef
313318
/// flag.
@@ -1444,6 +1449,7 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
14441449

14451450
// CopyMI may have implicit operands, save them so that we can transfer them
14461451
// over to the newly materialized instruction after CopyMI is removed.
1452+
LaneBitmask NewMIImplicitOpsMask;
14471453
SmallVector<MachineOperand, 4> ImplicitOps;
14481454
ImplicitOps.reserve(CopyMI->getNumOperands() -
14491455
CopyMI->getDesc().getNumOperands());
@@ -1458,6 +1464,9 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
14581464
(MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) &&
14591465
"unexpected implicit virtual register def");
14601466
ImplicitOps.push_back(MO);
1467+
if (MO.isDef() && MO.getReg().isVirtual() &&
1468+
MRI->shouldTrackSubRegLiveness(DstReg))
1469+
NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
14611470
}
14621471
}
14631472

@@ -1494,14 +1503,11 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
14941503
} else {
14951504
assert(MO.getReg() == NewMI.getOperand(0).getReg());
14961505

1497-
// We're only expecting another def of the main output, so the range
1498-
// should get updated with the regular output range.
1499-
//
1500-
// FIXME: The range updating below probably needs updating to look at
1501-
// the super register if subranges are tracked.
1502-
assert(!MRI->shouldTrackSubRegLiveness(DstReg) &&
1503-
"subrange update for implicit-def of super register may not be "
1504-
"properly handled");
1506+
// If lanemasks need to be tracked, compile the lanemask of the NewMI
1507+
// implicit def operands to avoid subranges for the super-regs from
1508+
// being removed by code later on in this function.
1509+
if (MRI->shouldTrackSubRegLiveness(MO.getReg()))
1510+
NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
15051511
}
15061512
}
15071513
}
@@ -1617,7 +1623,8 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
16171623
*LIS->getSlotIndexes(), *TRI);
16181624

16191625
for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1620-
if ((SR.LaneMask & DstMask).none()) {
1626+
if ((SR.LaneMask & DstMask).none() &&
1627+
(SR.LaneMask & NewMIImplicitOpsMask).none()) {
16211628
LLVM_DEBUG(dbgs()
16221629
<< "Removing undefined SubRange "
16231630
<< PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
@@ -1891,11 +1898,14 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
18911898
}
18921899
}
18931900

1894-
void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
1895-
unsigned SubIdx) {
1901+
void RegisterCoalescer::updateRegDefsUses(
1902+
Register SrcReg, Register DstReg, unsigned SubIdx,
1903+
ArrayRef<MachineInstr *> SubregToRegSrcInsts) {
18961904
bool DstIsPhys = DstReg.isPhysical();
18971905
LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
18981906

1907+
// Coalescing a COPY may expose reads of 'undef' subregisters.
1908+
// If so, then explicitly propagate 'undef' to those operands.
18991909
if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
19001910
for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
19011911
if (MO.isUndef())
@@ -1912,6 +1922,15 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
19121922
}
19131923
}
19141924

1925+
// If DstInt already has a subrange for the unused lanes, then we shouldn't
1926+
// create duplicate subranges when we update the interval for unused lanes.
1927+
LaneBitmask DstIntLaneMask;
1928+
if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
1929+
for (LiveInterval::SubRange &SR : DstInt->subranges())
1930+
DstIntLaneMask |= SR.LaneMask;
1931+
}
1932+
1933+
// Go through all instructions to replace uses of 'SrcReg' by 'DstReg'.
19151934
SmallPtrSet<MachineInstr *, 8> Visited;
19161935
for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin(SrcReg),
19171936
E = MRI->reg_instr_end();
@@ -1935,6 +1954,80 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
19351954
if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
19361955
Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
19371956

1957+
bool RequiresImplicitRedef = false;
1958+
if (!SubregToRegSrcInsts.empty()) {
1959+
// We can only add an implicit-def and undef if the sub registers match,
1960+
// e.g.
1961+
// %0:gr32 = INSTX
1962+
// %0.sub8:gr32 = INSTY // top 24 bits of %0 still defined
1963+
// %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub32
1964+
//
1965+
// This cannot be transformed into:
1966+
// %1.sub32:gr64 = INSTX
1967+
// undef %1.sub8:gr64 = INSTY , implicit-def %1
1968+
//
1969+
// Because that would thrash the top 24 bits of %1.sub32.
1970+
if (is_contained(SubregToRegSrcInsts, UseMI) &&
1971+
all_of(UseMI->defs(),
1972+
[&SubIdx, &SrcReg](const MachineOperand &MO) -> bool {
1973+
if (MO.getReg() != SrcReg || !MO.getSubReg() || MO.isUndef())
1974+
return true;
1975+
return SubIdx == MO.getSubReg();
1976+
})) {
1977+
// Add implicit-def of super-register to express that the whole
1978+
// register is defined by the instruction.
1979+
MachineInstrBuilder MIB(*MF, UseMI);
1980+
MIB.addReg(DstReg, RegState::ImplicitDefine);
1981+
RequiresImplicitRedef = true;
1982+
}
1983+
1984+
// If the coalesed instruction doesn't fully define the register, we need
1985+
// to preserve the original super register liveness for SUBREG_TO_REG.
1986+
//
1987+
// We pretended SUBREG_TO_REG was a regular copy for coalescing purposes,
1988+
// but it introduces liveness for other subregisters. Downstream users may
1989+
// have been relying on those bits, so we need to ensure their liveness is
1990+
// captured with a def of other lanes.
1991+
if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
1992+
// First check if there is sufficient granularity in terms of subranges.
1993+
LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
1994+
LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
1995+
LaneBitmask UnusedLanes = DstMask & ~UsedLanes;
1996+
if ((UnusedLanes & ~DstIntLaneMask).any()) {
1997+
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
1998+
DstInt->createSubRangeFrom(Allocator, UnusedLanes, *DstInt);
1999+
DstIntLaneMask |= UnusedLanes;
2000+
}
2001+
2002+
// After duplicating the live ranges for the low/hi bits, we
2003+
// need to update the subranges of the DstReg interval such that
2004+
// for a case like this:
2005+
//
2006+
// entry:
2007+
// 16B %1:gpr32 = INSTRUCTION (<=> UseMI)
2008+
// :
2009+
// if.then:
2010+
// 32B %1:gpr32 = MOVIMM32 ..
2011+
// 48B %0:gpr64 = SUBREG_TO_REG 0, %1, sub32
2012+
//
2013+
// Only the MOVIMM32 require a def of the top lanes and any intervals
2014+
// for the top 32-bits of the def at 16B should be removed.
2015+
for (LiveInterval::SubRange &SR : DstInt->subranges()) {
2016+
if (!Writes || RequiresImplicitRedef ||
2017+
(SR.LaneMask & UnusedLanes).none())
2018+
continue;
2019+
2020+
assert((SR.LaneMask & UnusedLanes) == SR.LaneMask &&
2021+
"Unexpected lanemask. Subrange needs finer granularity");
2022+
2023+
SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(false);
2024+
auto SegmentI = SR.find(UseIdx);
2025+
if (SegmentI != SR.end())
2026+
SR.removeSegment(SegmentI, true);
2027+
}
2028+
}
2029+
}
2030+
19382031
// Replace SrcReg with DstReg in all UseMI operands.
19392032
for (unsigned Op : Ops) {
19402033
MachineOperand &MO = UseMI->getOperand(Op);
@@ -1943,7 +2036,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
19432036
// turn a full def into a read-modify-write sub-register def and vice
19442037
// versa.
19452038
if (SubIdx && MO.isDef())
1946-
MO.setIsUndef(!Reads);
2039+
MO.setIsUndef(!Reads || RequiresImplicitRedef);
19472040

19482041
// A subreg use of a partially undef (super) register may be a complete
19492042
// undef use now and then has to be marked that way.
@@ -2046,6 +2139,30 @@ void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
20462139
LIS->shrinkToUses(&LI);
20472140
}
20482141

2142+
/// For a given use of value \p Idx, it returns the def in the current block,
2143+
/// or otherwise all possible defs in preceding blocks.
2144+
static bool FindDefInBlock(SmallPtrSetImpl<MachineBasicBlock *> &VisitedBlocks,
2145+
SmallVector<MachineInstr *> &Instrs,
2146+
LiveIntervals *LIS, LiveInterval &SrcInt,
2147+
MachineBasicBlock *MBB, VNInfo *Idx) {
2148+
if (!Idx->isPHIDef()) {
2149+
MachineInstr *Def = LIS->getInstructionFromIndex(Idx->def);
2150+
assert(Def && "Unable to find a def for SUBREG_TO_REG source operand");
2151+
Instrs.push_back(Def);
2152+
return true;
2153+
}
2154+
2155+
bool Any = false;
2156+
if (VisitedBlocks.count(MBB))
2157+
return false;
2158+
VisitedBlocks.insert(MBB);
2159+
for (MachineBasicBlock *Pred : MBB->predecessors()) {
2160+
Any |= FindDefInBlock(VisitedBlocks, Instrs, LIS, SrcInt, Pred,
2161+
SrcInt.getVNInfoBefore(LIS->getMBBEndIdx(Pred)));
2162+
}
2163+
return Any;
2164+
}
2165+
20492166
bool RegisterCoalescer::joinCopy(
20502167
MachineInstr *CopyMI, bool &Again,
20512168
SmallPtrSetImpl<MachineInstr *> &CurrentErasedInstrs) {
@@ -2183,6 +2300,35 @@ bool RegisterCoalescer::joinCopy(
21832300
});
21842301
}
21852302

2303+
SmallVector<MachineInstr *> SubregToRegSrcInsts;
2304+
if (CopyMI->isSubregToReg()) {
2305+
// For the case where the copy instruction is a SUBREG_TO_REG, e.g.
2306+
//
2307+
// %0:gpr32 = movimm32 ..
2308+
// %1:gpr64 = SUBREG_TO_REG 0, %0, sub32
2309+
// ...
2310+
// %0:gpr32 = COPY <something>
2311+
//
2312+
// After joining liveranges, the original `movimm32` will need an
2313+
// implicit-def to make it explicit that the entire register is written,
2314+
// i.e.
2315+
//
2316+
// undef %0.sub32:gpr64 = movimm32 ..., implicit-def %0
2317+
// ...
2318+
// undef %0.sub32:gpr64 = COPY <something> // Note that this does not
2319+
// // require an implicit-def,
2320+
// // because it has nothing to
2321+
// // do with the SUBREG_TO_REG.
2322+
LiveInterval &SrcInt =
2323+
LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
2324+
SlotIndex SubregToRegSlotIdx = LIS->getInstructionIndex(*CopyMI);
2325+
SmallPtrSet<MachineBasicBlock *, 8> VisitedBlocks;
2326+
if (!FindDefInBlock(VisitedBlocks, SubregToRegSrcInsts, LIS, SrcInt,
2327+
CopyMI->getParent(),
2328+
SrcInt.Query(SubregToRegSlotIdx).valueIn()))
2329+
llvm_unreachable("SUBREG_TO_REG src requires a def");
2330+
}
2331+
21862332
ShrinkMask = LaneBitmask::getNone();
21872333
ShrinkMainRange = false;
21882334

@@ -2251,9 +2397,12 @@ bool RegisterCoalescer::joinCopy(
22512397

22522398
// Rewrite all SrcReg operands to DstReg.
22532399
// Also update DstReg operands to include DstIdx if it is set.
2254-
if (CP.getDstIdx())
2400+
if (CP.getDstIdx()) {
2401+
assert(SubregToRegSrcInsts.empty() && "can this happen?");
22552402
updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
2256-
updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
2403+
}
2404+
updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(),
2405+
SubregToRegSrcInsts);
22572406

22582407
// Shrink subregister ranges if necessary.
22592408
if (ShrinkMask.any()) {

0 commit comments

Comments
 (0)