Skip to content

Commit 0aa4349

Browse files
sdesmalen-armgithub-actions[bot]
authored andcommitted
Automerge: Reland "RegisterCoalescer: Add implicit-def of super register when coalescing SUBREG_TO_REG"
A SUBREG_TO_REG instruction expresses that the top bits of the result register are set to a certain value (e.g. 0). The example below expresses that the result of %1 will have the top 32 bits zeroed and the lower 32bits being equal to the result of INSTR. ``` %0:gpr32 = INSTR %1:gpr64 = SUBREG_TO_REG 0, %0, sub32 ``` When the RegisterCoalescer tries to remove SUBREG_TO_REG instructions by coalescing %0 into %1, it must keep the same semantics. Currently however, the RegisterCoalescer would emit: ``` %1.sub32:gpr64 = INSTR ``` which no longer expresses that the top 32-bits of the register are defined (zeroed) by INSTR. This may cause issues with e.g. machine copy propagation where the pass may think it can remove a COPY-like instruction because the MIR says only the bottom 32-bits are defined/used, even though other uses of the register rely on the top 32-bits being zeroed by the COPY-like instruction. This PR changes the RegisterCoalescer to instead emit: ``` undef %1.sub32:gpr64 = MOVimm32 42, implicit-def %1 ``` to express that the entire contents of %1:gpr64 are defined by the instruction. This tries to reland #134408 which had to be reverted due to a few reported failures.
2 parents 0c6407c + bb78728 commit 0aa4349

31 files changed

+1823
-79
lines changed

llvm/lib/CodeGen/RegisterCoalescer.cpp

Lines changed: 171 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,13 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate {
307307
/// number if it is not zero. If DstReg is a physical register and the
308308
/// existing subregister number of the def / use being updated is not zero,
309309
/// make sure to set it to the correct physical subregister.
310-
void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
310+
///
311+
/// If \p SubregToRegSrcInsts is not empty, we are coalescing a
312+
/// `DstReg = SUBREG_TO_REG SrcReg`, which should introduce an
313+
/// implicit-def of DstReg on instructions that define SrcReg.
314+
void updateRegDefsUses(
315+
Register SrcReg, Register DstReg, unsigned SubIdx,
316+
SmallPtrSetImpl<MachineInstr *> *SubregToRegSrcInsts = nullptr);
311317

312318
/// If the given machine operand reads only undefined lanes add an undef
313319
/// flag.
@@ -1444,6 +1450,7 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
14441450

14451451
// CopyMI may have implicit operands, save them so that we can transfer them
14461452
// over to the newly materialized instruction after CopyMI is removed.
1453+
LaneBitmask NewMIImplicitOpsMask;
14471454
SmallVector<MachineOperand, 4> ImplicitOps;
14481455
ImplicitOps.reserve(CopyMI->getNumOperands() -
14491456
CopyMI->getDesc().getNumOperands());
@@ -1458,6 +1465,9 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
14581465
(MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) &&
14591466
"unexpected implicit virtual register def");
14601467
ImplicitOps.push_back(MO);
1468+
if (MO.isDef() && MO.getReg().isVirtual() &&
1469+
MRI->shouldTrackSubRegLiveness(DstReg))
1470+
NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
14611471
}
14621472
}
14631473

@@ -1494,14 +1504,11 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
14941504
} else {
14951505
assert(MO.getReg() == NewMI.getOperand(0).getReg());
14961506

1497-
// We're only expecting another def of the main output, so the range
1498-
// should get updated with the regular output range.
1499-
//
1500-
// FIXME: The range updating below probably needs updating to look at
1501-
// the super register if subranges are tracked.
1502-
assert(!MRI->shouldTrackSubRegLiveness(DstReg) &&
1503-
"subrange update for implicit-def of super register may not be "
1504-
"properly handled");
1507+
// If lanemasks need to be tracked, compile the lanemask of the NewMI
1508+
// implicit def operands to avoid subranges for the super-regs from
1509+
// being removed by code later on in this function.
1510+
if (MRI->shouldTrackSubRegLiveness(MO.getReg()))
1511+
NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
15051512
}
15061513
}
15071514
}
@@ -1617,7 +1624,8 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
16171624
*LIS->getSlotIndexes(), *TRI);
16181625

16191626
for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1620-
if ((SR.LaneMask & DstMask).none()) {
1627+
if ((SR.LaneMask & DstMask).none() &&
1628+
(SR.LaneMask & NewMIImplicitOpsMask).none()) {
16211629
LLVM_DEBUG(dbgs()
16221630
<< "Removing undefined SubRange "
16231631
<< PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
@@ -1891,11 +1899,14 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
18911899
}
18921900
}
18931901

1894-
void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
1895-
unsigned SubIdx) {
1902+
void RegisterCoalescer::updateRegDefsUses(
1903+
Register SrcReg, Register DstReg, unsigned SubIdx,
1904+
SmallPtrSetImpl<MachineInstr *> *SubregToRegSrcInsts) {
18961905
bool DstIsPhys = DstReg.isPhysical();
18971906
LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
18981907

1908+
// Coalescing a COPY may expose reads of 'undef' subregisters.
1909+
// If so, then explicitly propagate 'undef' to those operands.
18991910
if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
19001911
for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
19011912
if (MO.isUndef())
@@ -1912,6 +1923,15 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
19121923
}
19131924
}
19141925

1926+
// If DstInt already has a subrange for the unused lanes, then we shouldn't
1927+
// create duplicate subranges when we update the interval for unused lanes.
1928+
LaneBitmask DstIntLaneMask;
1929+
if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
1930+
for (LiveInterval::SubRange &SR : DstInt->subranges())
1931+
DstIntLaneMask |= SR.LaneMask;
1932+
}
1933+
1934+
// Go through all instructions to replace uses of 'SrcReg' by 'DstReg'.
19151935
SmallPtrSet<MachineInstr *, 8> Visited;
19161936
for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin(SrcReg),
19171937
E = MRI->reg_instr_end();
@@ -1935,6 +1955,82 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
19351955
if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
19361956
Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
19371957

1958+
bool RequiresImplicitRedef = false;
1959+
if (SubregToRegSrcInsts && !SubregToRegSrcInsts->empty()) {
1960+
// We can only add an implicit-def and undef if the sub registers match,
1961+
// e.g.
1962+
// %0:gr32 = INSTX
1963+
// %0.sub8:gr32 = INSTY // top 24 bits of %0 still defined
1964+
// %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub32
1965+
//
1966+
// This cannot be transformed into:
1967+
// %1.sub32:gr64 = INSTX
1968+
// undef %1.sub8:gr64 = INSTY , implicit-def %1
1969+
//
1970+
// because the undef means that none of the bits of %1 are read, thus
1971+
// thrashing the top 24 bits of %1.sub32.
1972+
if (SubregToRegSrcInsts->contains(UseMI) &&
1973+
all_of(UseMI->all_defs(),
1974+
[&SubIdx, &SrcReg](const MachineOperand &MO) -> bool {
1975+
if (MO.getReg() != SrcReg) // Ignore unrelated registers
1976+
return true;
1977+
return MO.isUndef() ||
1978+
(SubIdx &&
1979+
(!MO.getSubReg() || SubIdx == MO.getSubReg()));
1980+
})) {
1981+
// Add implicit-def of super-register to express that the whole
1982+
// register is defined by the instruction.
1983+
UseMI->addRegisterDefined(DstReg);
1984+
RequiresImplicitRedef = true;
1985+
}
1986+
1987+
// If the coalesed instruction doesn't fully define the register, we need
1988+
// to preserve the original super register liveness for SUBREG_TO_REG.
1989+
//
1990+
// We pretended SUBREG_TO_REG was a regular copy for coalescing purposes,
1991+
// but it introduces liveness for other subregisters. Downstream users may
1992+
// have been relying on those bits, so we need to ensure their liveness is
1993+
// captured with a def of other lanes.
1994+
if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
1995+
// First check if there is sufficient granularity in terms of subranges.
1996+
LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
1997+
LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
1998+
LaneBitmask UnusedLanes = DstMask & ~UsedLanes;
1999+
if ((UnusedLanes & ~DstIntLaneMask).any()) {
2000+
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
2001+
DstInt->createSubRangeFrom(Allocator, UnusedLanes, *DstInt);
2002+
DstIntLaneMask |= UnusedLanes;
2003+
}
2004+
2005+
// After duplicating the live ranges for the low/hi bits, we
2006+
// need to update the subranges of the DstReg interval such that
2007+
// for a case like this:
2008+
//
2009+
// entry:
2010+
// 16B %1:gpr32 = INSTRUCTION (<=> UseMI)
2011+
// :
2012+
// if.then:
2013+
// 32B %1:gpr32 = MOVIMM32 ..
2014+
// 48B %0:gpr64 = SUBREG_TO_REG 0, %1, sub32
2015+
//
2016+
// Only the MOVIMM32 requires a def of the top lanes and any intervals
2017+
// for the top 32-bits of the def at 16B should be removed.
2018+
for (LiveInterval::SubRange &SR : DstInt->subranges()) {
2019+
if (!Writes || RequiresImplicitRedef ||
2020+
(SR.LaneMask & UnusedLanes).none())
2021+
continue;
2022+
2023+
assert((SR.LaneMask & UnusedLanes) == SR.LaneMask &&
2024+
"Unexpected lanemask. Subrange needs finer granularity");
2025+
2026+
SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot();
2027+
auto SegmentI = SR.find(UseIdx);
2028+
if (SegmentI != SR.end())
2029+
SR.removeSegment(SegmentI, true);
2030+
}
2031+
}
2032+
}
2033+
19382034
// Replace SrcReg with DstReg in all UseMI operands.
19392035
for (unsigned Op : Ops) {
19402036
MachineOperand &MO = UseMI->getOperand(Op);
@@ -1943,7 +2039,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
19432039
// turn a full def into a read-modify-write sub-register def and vice
19442040
// versa.
19452041
if (SubIdx && MO.isDef())
1946-
MO.setIsUndef(!Reads);
2042+
MO.setIsUndef(!Reads || RequiresImplicitRedef);
19472043

19482044
// A subreg use of a partially undef (super) register may be a complete
19492045
// undef use now and then has to be marked that way.
@@ -2046,6 +2142,38 @@ void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
20462142
LIS->shrinkToUses(&LI);
20472143
}
20482144

2145+
/// For a given use of value \p Idx, it returns the def in the current block,
2146+
/// or otherwise all possible defs in preceding blocks.
2147+
static bool findPrecedingDefs(SmallPtrSetImpl<MachineInstr *> &Instrs,
2148+
LiveIntervals *LIS, LiveInterval &SrcInt,
2149+
MachineBasicBlock *MBB, VNInfo *Idx) {
2150+
auto IsPrecedingDef = [&](VNInfo *Idx) -> bool {
2151+
if (Idx->isPHIDef())
2152+
return false;
2153+
MachineInstr *Def = LIS->getInstructionFromIndex(Idx->def);
2154+
assert(Def && "Unable to find a def for SUBREG_TO_REG source operand");
2155+
Instrs.insert(Def);
2156+
return true;
2157+
};
2158+
2159+
if (IsPrecedingDef(Idx))
2160+
return true;
2161+
2162+
SmallVector<MachineBasicBlock *> Worklist(MBB->pred_begin(), MBB->pred_end());
2163+
SmallPtrSet<MachineBasicBlock *, 8> VisitedBlocks;
2164+
while (!Worklist.empty()) {
2165+
MachineBasicBlock *MBB = Worklist.pop_back_val();
2166+
auto [_, Inserted] = VisitedBlocks.insert(MBB);
2167+
if (!Inserted)
2168+
continue;
2169+
VNInfo *Idx = SrcInt.getVNInfoBefore(LIS->getMBBEndIdx(MBB));
2170+
if (!IsPrecedingDef(Idx))
2171+
Worklist.append(MBB->pred_begin(), MBB->pred_end());
2172+
}
2173+
2174+
return !Instrs.empty();
2175+
}
2176+
20492177
bool RegisterCoalescer::joinCopy(
20502178
MachineInstr *CopyMI, bool &Again,
20512179
SmallPtrSetImpl<MachineInstr *> &CurrentErasedInstrs) {
@@ -2183,6 +2311,34 @@ bool RegisterCoalescer::joinCopy(
21832311
});
21842312
}
21852313

2314+
SmallPtrSet<MachineInstr *, 4> SubregToRegSrcInsts;
2315+
Register SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
2316+
if (CopyMI->isSubregToReg() && !SrcReg.isPhysical()) {
2317+
// For the case where the copy instruction is a SUBREG_TO_REG, e.g.
2318+
//
2319+
// %0:gpr32 = movimm32 ..
2320+
// %1:gpr64 = SUBREG_TO_REG 0, %0, sub32
2321+
// ...
2322+
// %0:gpr32 = COPY <something>
2323+
//
2324+
// After joining liveranges, the original `movimm32` will need an
2325+
// implicit-def to make it explicit that the entire register is written,
2326+
// i.e.
2327+
//
2328+
// undef %0.sub32:gpr64 = movimm32 ..., implicit-def %0
2329+
// ...
2330+
// undef %0.sub32:gpr64 = COPY <something> // Note that this does not
2331+
// // require an implicit-def,
2332+
// // because it has nothing to
2333+
// // do with the SUBREG_TO_REG.
2334+
LiveInterval &SrcInt = LIS->getInterval(SrcReg);
2335+
SlotIndex SubregToRegSlotIdx = LIS->getInstructionIndex(*CopyMI);
2336+
if (!findPrecedingDefs(SubregToRegSrcInsts, LIS, SrcInt,
2337+
CopyMI->getParent(),
2338+
SrcInt.Query(SubregToRegSlotIdx).valueIn()))
2339+
llvm_unreachable("SUBREG_TO_REG src requires a def");
2340+
}
2341+
21862342
ShrinkMask = LaneBitmask::getNone();
21872343
ShrinkMainRange = false;
21882344

@@ -2253,7 +2409,8 @@ bool RegisterCoalescer::joinCopy(
22532409
// Also update DstReg operands to include DstIdx if it is set.
22542410
if (CP.getDstIdx())
22552411
updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
2256-
updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
2412+
updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(),
2413+
&SubregToRegSrcInsts);
22572414

22582415
// Shrink subregister ranges if necessary.
22592416
if (ShrinkMask.any()) {

llvm/lib/CodeGen/SplitKit.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -448,7 +448,7 @@ void SplitEditor::addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original) {
448448
const MachineInstr *DefMI = LIS.getInstructionFromIndex(Def);
449449
assert(DefMI != nullptr);
450450
LaneBitmask LM;
451-
for (const MachineOperand &DefOp : DefMI->defs()) {
451+
for (const MachineOperand &DefOp : DefMI->all_defs()) {
452452
Register R = DefOp.getReg();
453453
if (R != LI.reg())
454454
continue;

0 commit comments

Comments
 (0)