@@ -307,7 +307,12 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate {
307307 // / number if it is not zero. If DstReg is a physical register and the
308308 // / existing subregister number of the def / use being updated is not zero,
309309 // / make sure to set it to the correct physical subregister.
310- void updateRegDefsUses (Register SrcReg, Register DstReg, unsigned SubIdx);
310+ // /
311+ // / If \p SubregToRegSrcInst is not empty, we are coalescing a
312+ // / `DstReg = SUBREG_TO_REG SrcReg`, which should introduce an
313+ // / implicit-def of DstReg on instructions that define SrcReg.
314+ void updateRegDefsUses (Register SrcReg, Register DstReg, unsigned SubIdx,
315+ ArrayRef<MachineInstr *> SubregToRegSrcInst = {});
311316
312317 // / If the given machine operand reads only undefined lanes add an undef
313318 // / flag.
@@ -1444,6 +1449,7 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
14441449
14451450 // CopyMI may have implicit operands, save them so that we can transfer them
14461451 // over to the newly materialized instruction after CopyMI is removed.
1452+ LaneBitmask NewMIImplicitOpsMask;
14471453 SmallVector<MachineOperand, 4 > ImplicitOps;
14481454 ImplicitOps.reserve (CopyMI->getNumOperands () -
14491455 CopyMI->getDesc ().getNumOperands ());
@@ -1458,6 +1464,9 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
14581464 (MO.getSubReg () == 0 && MO.getReg () == DstOperand.getReg ())) &&
14591465 " unexpected implicit virtual register def" );
14601466 ImplicitOps.push_back (MO);
1467+ if (MO.isDef () && MO.getReg ().isVirtual () &&
1468+ MRI->shouldTrackSubRegLiveness (DstReg))
1469+ NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg (MO.getReg ());
14611470 }
14621471 }
14631472
@@ -1494,14 +1503,11 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
14941503 } else {
14951504 assert (MO.getReg () == NewMI.getOperand (0 ).getReg ());
14961505
1497- // We're only expecting another def of the main output, so the range
1498- // should get updated with the regular output range.
1499- //
1500- // FIXME: The range updating below probably needs updating to look at
1501- // the super register if subranges are tracked.
1502- assert (!MRI->shouldTrackSubRegLiveness (DstReg) &&
1503- " subrange update for implicit-def of super register may not be "
1504- " properly handled" );
1506+ // If lanemasks need to be tracked, compile the lanemask of the NewMI
1507+ // implicit def operands to avoid subranges for the super-regs from
1508+ // being removed by code later on in this function.
1509+ if (MRI->shouldTrackSubRegLiveness (MO.getReg ()))
1510+ NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg (MO.getReg ());
15051511 }
15061512 }
15071513 }
@@ -1617,7 +1623,8 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
16171623 *LIS->getSlotIndexes (), *TRI);
16181624
16191625 for (LiveInterval::SubRange &SR : DstInt.subranges ()) {
1620- if ((SR.LaneMask & DstMask).none ()) {
1626+ if ((SR.LaneMask & DstMask).none () &&
1627+ (SR.LaneMask & NewMIImplicitOpsMask).none ()) {
16211628 LLVM_DEBUG (dbgs ()
16221629 << " Removing undefined SubRange "
16231630 << PrintLaneMask (SR.LaneMask ) << " : " << SR << " \n " );
@@ -1891,11 +1898,14 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
18911898 }
18921899}
18931900
1894- void RegisterCoalescer::updateRegDefsUses (Register SrcReg, Register DstReg,
1895- unsigned SubIdx) {
1901+ void RegisterCoalescer::updateRegDefsUses (
1902+ Register SrcReg, Register DstReg, unsigned SubIdx,
1903+ ArrayRef<MachineInstr *> SubregToRegSrcInsts) {
18961904 bool DstIsPhys = DstReg.isPhysical ();
18971905 LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval (DstReg);
18981906
1907+ // Coalescing a COPY may expose reads of 'undef' subregisters.
1908+ // If so, then explicitly propagate 'undef' to those operands.
18991909 if (DstInt && DstInt->hasSubRanges () && DstReg != SrcReg) {
19001910 for (MachineOperand &MO : MRI->reg_operands (DstReg)) {
19011911 if (MO.isUndef ())
@@ -1912,6 +1922,15 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
19121922 }
19131923 }
19141924
1925+ // If DstInt already has a subrange for the unused lanes, then we shouldn't
1926+ // create duplicate subranges when we update the interval for unused lanes.
1927+ LaneBitmask DstIntLaneMask;
1928+ if (DstInt && MRI->shouldTrackSubRegLiveness (DstReg)) {
1929+ for (LiveInterval::SubRange &SR : DstInt->subranges ())
1930+ DstIntLaneMask |= SR.LaneMask ;
1931+ }
1932+
1933+ // Go through all instructions to replace uses of 'SrcReg' by 'DstReg'.
19151934 SmallPtrSet<MachineInstr *, 8 > Visited;
19161935 for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin (SrcReg),
19171936 E = MRI->reg_instr_end ();
@@ -1935,6 +1954,80 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
19351954 if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr ())
19361955 Reads = DstInt->liveAt (LIS->getInstructionIndex (*UseMI));
19371956
1957+ bool RequiresImplicitRedef = false ;
1958+ if (!SubregToRegSrcInsts.empty ()) {
1959+ // We can only add an implicit-def and undef if the sub registers match,
1960+ // e.g.
1961+ // %0:gr32 = INSTX
1962+ // %0.sub8:gr32 = INSTY // top 24 bits of %0 still defined
1963+ // %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub32
1964+ //
1965+ // This cannot be transformed into:
1966+ // %1.sub32:gr64 = INSTX
1967+ // undef %1.sub8:gr64 = INSTY , implicit-def %1
1968+ //
1969+ // Because that would thrash the top 24 bits of %1.sub32.
1970+ if (is_contained (SubregToRegSrcInsts, UseMI) &&
1971+ all_of (UseMI->defs (),
1972+ [&SubIdx, &SrcReg](const MachineOperand &MO) -> bool {
1973+ if (MO.getReg () != SrcReg || !MO.getSubReg () || MO.isUndef ())
1974+ return true ;
1975+ return SubIdx == MO.getSubReg ();
1976+ })) {
1977+ // Add implicit-def of super-register to express that the whole
1978+ // register is defined by the instruction.
1979+ MachineInstrBuilder MIB (*MF, UseMI);
1980+ MIB.addReg (DstReg, RegState::ImplicitDefine);
1981+ RequiresImplicitRedef = true ;
1982+ }
1983+
1984+ // If the coalesed instruction doesn't fully define the register, we need
1985+ // to preserve the original super register liveness for SUBREG_TO_REG.
1986+ //
1987+ // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes,
1988+ // but it introduces liveness for other subregisters. Downstream users may
1989+ // have been relying on those bits, so we need to ensure their liveness is
1990+ // captured with a def of other lanes.
1991+ if (DstInt && MRI->shouldTrackSubRegLiveness (DstReg)) {
1992+ // First check if there is sufficient granularity in terms of subranges.
1993+ LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg (DstInt->reg ());
1994+ LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask (SubIdx);
1995+ LaneBitmask UnusedLanes = DstMask & ~UsedLanes;
1996+ if ((UnusedLanes & ~DstIntLaneMask).any ()) {
1997+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator ();
1998+ DstInt->createSubRangeFrom (Allocator, UnusedLanes, *DstInt);
1999+ DstIntLaneMask |= UnusedLanes;
2000+ }
2001+
2002+ // After duplicating the live ranges for the low/hi bits, we
2003+ // need to update the subranges of the DstReg interval such that
2004+ // for a case like this:
2005+ //
2006+ // entry:
2007+ // 16B %1:gpr32 = INSTRUCTION (<=> UseMI)
2008+ // :
2009+ // if.then:
2010+ // 32B %1:gpr32 = MOVIMM32 ..
2011+ // 48B %0:gpr64 = SUBREG_TO_REG 0, %1, sub32
2012+ //
2013+ // Only the MOVIMM32 require a def of the top lanes and any intervals
2014+ // for the top 32-bits of the def at 16B should be removed.
2015+ for (LiveInterval::SubRange &SR : DstInt->subranges ()) {
2016+ if (!Writes || RequiresImplicitRedef ||
2017+ (SR.LaneMask & UnusedLanes).none ())
2018+ continue ;
2019+
2020+ assert ((SR.LaneMask & UnusedLanes) == SR.LaneMask &&
2021+ " Unexpected lanemask. Subrange needs finer granularity" );
2022+
2023+ SlotIndex UseIdx = LIS->getInstructionIndex (*UseMI).getRegSlot (false );
2024+ auto SegmentI = SR.find (UseIdx);
2025+ if (SegmentI != SR.end ())
2026+ SR.removeSegment (SegmentI, true );
2027+ }
2028+ }
2029+ }
2030+
19382031 // Replace SrcReg with DstReg in all UseMI operands.
19392032 for (unsigned Op : Ops) {
19402033 MachineOperand &MO = UseMI->getOperand (Op);
@@ -1943,7 +2036,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
19432036 // turn a full def into a read-modify-write sub-register def and vice
19442037 // versa.
19452038 if (SubIdx && MO.isDef ())
1946- MO.setIsUndef (!Reads);
2039+ MO.setIsUndef (!Reads || RequiresImplicitRedef );
19472040
19482041 // A subreg use of a partially undef (super) register may be a complete
19492042 // undef use now and then has to be marked that way.
@@ -2046,6 +2139,30 @@ void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
20462139 LIS->shrinkToUses (&LI);
20472140}
20482141
2142+ // / For a given use of value \p Idx, it returns the def in the current block,
2143+ // / or otherwise all possible defs in preceding blocks.
2144+ static bool FindDefInBlock (SmallPtrSetImpl<MachineBasicBlock *> &VisitedBlocks,
2145+ SmallVector<MachineInstr *> &Instrs,
2146+ LiveIntervals *LIS, LiveInterval &SrcInt,
2147+ MachineBasicBlock *MBB, VNInfo *Idx) {
2148+ if (!Idx->isPHIDef ()) {
2149+ MachineInstr *Def = LIS->getInstructionFromIndex (Idx->def );
2150+ assert (Def && " Unable to find a def for SUBREG_TO_REG source operand" );
2151+ Instrs.push_back (Def);
2152+ return true ;
2153+ }
2154+
2155+ bool Any = false ;
2156+ if (VisitedBlocks.count (MBB))
2157+ return false ;
2158+ VisitedBlocks.insert (MBB);
2159+ for (MachineBasicBlock *Pred : MBB->predecessors ()) {
2160+ Any |= FindDefInBlock (VisitedBlocks, Instrs, LIS, SrcInt, Pred,
2161+ SrcInt.getVNInfoBefore (LIS->getMBBEndIdx (Pred)));
2162+ }
2163+ return Any;
2164+ }
2165+
20492166bool RegisterCoalescer::joinCopy (
20502167 MachineInstr *CopyMI, bool &Again,
20512168 SmallPtrSetImpl<MachineInstr *> &CurrentErasedInstrs) {
@@ -2183,6 +2300,35 @@ bool RegisterCoalescer::joinCopy(
21832300 });
21842301 }
21852302
2303+ SmallVector<MachineInstr *> SubregToRegSrcInsts;
2304+ if (CopyMI->isSubregToReg ()) {
2305+ // For the case where the copy instruction is a SUBREG_TO_REG, e.g.
2306+ //
2307+ // %0:gpr32 = movimm32 ..
2308+ // %1:gpr64 = SUBREG_TO_REG 0, %0, sub32
2309+ // ...
2310+ // %0:gpr32 = COPY <something>
2311+ //
2312+ // After joining liveranges, the original `movimm32` will need an
2313+ // implicit-def to make it explicit that the entire register is written,
2314+ // i.e.
2315+ //
2316+ // undef %0.sub32:gpr64 = movimm32 ..., implicit-def %0
2317+ // ...
2318+ // undef %0.sub32:gpr64 = COPY <something> // Note that this does not
2319+ // // require an implicit-def,
2320+ // // because it has nothing to
2321+ // // do with the SUBREG_TO_REG.
2322+ LiveInterval &SrcInt =
2323+ LIS->getInterval (CP.isFlipped () ? CP.getDstReg () : CP.getSrcReg ());
2324+ SlotIndex SubregToRegSlotIdx = LIS->getInstructionIndex (*CopyMI);
2325+ SmallPtrSet<MachineBasicBlock *, 8 > VisitedBlocks;
2326+ if (!FindDefInBlock (VisitedBlocks, SubregToRegSrcInsts, LIS, SrcInt,
2327+ CopyMI->getParent (),
2328+ SrcInt.Query (SubregToRegSlotIdx).valueIn ()))
2329+ llvm_unreachable (" SUBREG_TO_REG src requires a def" );
2330+ }
2331+
21862332 ShrinkMask = LaneBitmask::getNone ();
21872333 ShrinkMainRange = false ;
21882334
@@ -2251,9 +2397,12 @@ bool RegisterCoalescer::joinCopy(
22512397
22522398 // Rewrite all SrcReg operands to DstReg.
22532399 // Also update DstReg operands to include DstIdx if it is set.
2254- if (CP.getDstIdx ())
2400+ if (CP.getDstIdx ()) {
2401+ assert (SubregToRegSrcInsts.empty () && " can this happen?" );
22552402 updateRegDefsUses (CP.getDstReg (), CP.getDstReg (), CP.getDstIdx ());
2256- updateRegDefsUses (CP.getSrcReg (), CP.getDstReg (), CP.getSrcIdx ());
2403+ }
2404+ updateRegDefsUses (CP.getSrcReg (), CP.getDstReg (), CP.getSrcIdx (),
2405+ SubregToRegSrcInsts);
22572406
22582407 // Shrink subregister ranges if necessary.
22592408 if (ShrinkMask.any ()) {
0 commit comments