@@ -307,7 +307,13 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate {
307307 // / number if it is not zero. If DstReg is a physical register and the
308308 // / existing subregister number of the def / use being updated is not zero,
309309 // / make sure to set it to the correct physical subregister.
310- void updateRegDefsUses (Register SrcReg, Register DstReg, unsigned SubIdx);
310+ // /
311+ // / If \p SubregToRegSrcInsts is not empty, we are coalescing a
312+ // / `DstReg = SUBREG_TO_REG SrcReg`, which should introduce an
313+ // / implicit-def of DstReg on instructions that define SrcReg.
314+ void updateRegDefsUses (
315+ Register SrcReg, Register DstReg, unsigned SubIdx,
316+ SmallPtrSetImpl<MachineInstr *> *SubregToRegSrcInsts = nullptr );
311317
312318 // / If the given machine operand reads only undefined lanes add an undef
313319 // / flag.
@@ -1444,6 +1450,7 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
14441450
14451451 // CopyMI may have implicit operands, save them so that we can transfer them
14461452 // over to the newly materialized instruction after CopyMI is removed.
1453+ LaneBitmask NewMIImplicitOpsMask;
14471454 SmallVector<MachineOperand, 4 > ImplicitOps;
14481455 ImplicitOps.reserve (CopyMI->getNumOperands () -
14491456 CopyMI->getDesc ().getNumOperands ());
@@ -1458,6 +1465,9 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
14581465 (MO.getSubReg () == 0 && MO.getReg () == DstOperand.getReg ())) &&
14591466 " unexpected implicit virtual register def" );
14601467 ImplicitOps.push_back (MO);
1468+ if (MO.isDef () && MO.getReg ().isVirtual () &&
1469+ MRI->shouldTrackSubRegLiveness (DstReg))
1470+ NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg (MO.getReg ());
14611471 }
14621472 }
14631473
@@ -1494,14 +1504,11 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
14941504 } else {
14951505 assert (MO.getReg () == NewMI.getOperand (0 ).getReg ());
14961506
1497- // We're only expecting another def of the main output, so the range
1498- // should get updated with the regular output range.
1499- //
1500- // FIXME: The range updating below probably needs updating to look at
1501- // the super register if subranges are tracked.
1502- assert (!MRI->shouldTrackSubRegLiveness (DstReg) &&
1503- " subrange update for implicit-def of super register may not be "
1504- " properly handled" );
1507+ // If lanemasks need to be tracked, compile the lanemask of the NewMI
1508+ // implicit def operands to avoid subranges for the super-regs from
1509+ // being removed by code later on in this function.
1510+ if (MRI->shouldTrackSubRegLiveness (MO.getReg ()))
1511+ NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg (MO.getReg ());
15051512 }
15061513 }
15071514 }
@@ -1617,7 +1624,8 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
16171624 *LIS->getSlotIndexes (), *TRI);
16181625
16191626 for (LiveInterval::SubRange &SR : DstInt.subranges ()) {
1620- if ((SR.LaneMask & DstMask).none ()) {
1627+ if ((SR.LaneMask & DstMask).none () &&
1628+ (SR.LaneMask & NewMIImplicitOpsMask).none ()) {
16211629 LLVM_DEBUG (dbgs ()
16221630 << " Removing undefined SubRange "
16231631 << PrintLaneMask (SR.LaneMask ) << " : " << SR << " \n " );
@@ -1891,11 +1899,14 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
18911899 }
18921900}
18931901
1894- void RegisterCoalescer::updateRegDefsUses (Register SrcReg, Register DstReg,
1895- unsigned SubIdx) {
1902+ void RegisterCoalescer::updateRegDefsUses (
1903+ Register SrcReg, Register DstReg, unsigned SubIdx,
1904+ SmallPtrSetImpl<MachineInstr *> *SubregToRegSrcInsts) {
18961905 bool DstIsPhys = DstReg.isPhysical ();
18971906 LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval (DstReg);
18981907
1908+ // Coalescing a COPY may expose reads of 'undef' subregisters.
1909+ // If so, then explicitly propagate 'undef' to those operands.
18991910 if (DstInt && DstInt->hasSubRanges () && DstReg != SrcReg) {
19001911 for (MachineOperand &MO : MRI->reg_operands (DstReg)) {
19011912 if (MO.isUndef ())
@@ -1912,6 +1923,15 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
19121923 }
19131924 }
19141925
1926+ // If DstInt already has a subrange for the unused lanes, then we shouldn't
1927+ // create duplicate subranges when we update the interval for unused lanes.
1928+ LaneBitmask DstIntLaneMask;
1929+ if (DstInt && MRI->shouldTrackSubRegLiveness (DstReg)) {
1930+ for (LiveInterval::SubRange &SR : DstInt->subranges ())
1931+ DstIntLaneMask |= SR.LaneMask ;
1932+ }
1933+
1934+ // Go through all instructions to replace uses of 'SrcReg' by 'DstReg'.
19151935 SmallPtrSet<MachineInstr *, 8 > Visited;
19161936 for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin (SrcReg),
19171937 E = MRI->reg_instr_end ();
@@ -1935,6 +1955,82 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
19351955 if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr ())
19361956 Reads = DstInt->liveAt (LIS->getInstructionIndex (*UseMI));
19371957
1958+ bool RequiresImplicitRedef = false ;
1959+ if (SubregToRegSrcInsts && !SubregToRegSrcInsts->empty ()) {
1960+ // We can only add an implicit-def and undef if the sub registers match,
1961+ // e.g.
1962+ // %0:gr32 = INSTX
1963+ // %0.sub8:gr32 = INSTY // top 24 bits of %0 still defined
1964+ // %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub32
1965+ //
1966+ // This cannot be transformed into:
1967+ // %1.sub32:gr64 = INSTX
1968+ // undef %1.sub8:gr64 = INSTY , implicit-def %1
1969+ //
1970+ // because the undef means that none of the bits of %1 are read, thus
1971+ // thrashing the top 24 bits of %1.sub32.
1972+ if (SubregToRegSrcInsts->contains (UseMI) &&
1973+ all_of (UseMI->all_defs (),
1974+ [&SubIdx, &SrcReg](const MachineOperand &MO) -> bool {
1975+ if (MO.getReg () != SrcReg) // Ignore unrelated registers
1976+ return true ;
1977+ return MO.isUndef () ||
1978+ (SubIdx &&
1979+ (!MO.getSubReg () || SubIdx == MO.getSubReg ()));
1980+ })) {
1981+ // Add implicit-def of super-register to express that the whole
1982+ // register is defined by the instruction.
1983+ UseMI->addRegisterDefined (DstReg);
1984+ RequiresImplicitRedef = true ;
1985+ }
1986+
1987+ // If the coalesed instruction doesn't fully define the register, we need
1988+ // to preserve the original super register liveness for SUBREG_TO_REG.
1989+ //
1990+ // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes,
1991+ // but it introduces liveness for other subregisters. Downstream users may
1992+ // have been relying on those bits, so we need to ensure their liveness is
1993+ // captured with a def of other lanes.
1994+ if (DstInt && MRI->shouldTrackSubRegLiveness (DstReg)) {
1995+ // First check if there is sufficient granularity in terms of subranges.
1996+ LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg (DstInt->reg ());
1997+ LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask (SubIdx);
1998+ LaneBitmask UnusedLanes = DstMask & ~UsedLanes;
1999+ if ((UnusedLanes & ~DstIntLaneMask).any ()) {
2000+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator ();
2001+ DstInt->createSubRangeFrom (Allocator, UnusedLanes, *DstInt);
2002+ DstIntLaneMask |= UnusedLanes;
2003+ }
2004+
2005+ // After duplicating the live ranges for the low/hi bits, we
2006+ // need to update the subranges of the DstReg interval such that
2007+ // for a case like this:
2008+ //
2009+ // entry:
2010+ // 16B %1:gpr32 = INSTRUCTION (<=> UseMI)
2011+ // :
2012+ // if.then:
2013+ // 32B %1:gpr32 = MOVIMM32 ..
2014+ // 48B %0:gpr64 = SUBREG_TO_REG 0, %1, sub32
2015+ //
2016+ // Only the MOVIMM32 requires a def of the top lanes and any intervals
2017+ // for the top 32-bits of the def at 16B should be removed.
2018+ for (LiveInterval::SubRange &SR : DstInt->subranges ()) {
2019+ if (!Writes || RequiresImplicitRedef ||
2020+ (SR.LaneMask & UnusedLanes).none ())
2021+ continue ;
2022+
2023+ assert ((SR.LaneMask & UnusedLanes) == SR.LaneMask &&
2024+ " Unexpected lanemask. Subrange needs finer granularity" );
2025+
2026+ SlotIndex UseIdx = LIS->getInstructionIndex (*UseMI).getRegSlot ();
2027+ auto SegmentI = SR.find (UseIdx);
2028+ if (SegmentI != SR.end ())
2029+ SR.removeSegment (SegmentI, true );
2030+ }
2031+ }
2032+ }
2033+
19382034 // Replace SrcReg with DstReg in all UseMI operands.
19392035 for (unsigned Op : Ops) {
19402036 MachineOperand &MO = UseMI->getOperand (Op);
@@ -1943,7 +2039,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
19432039 // turn a full def into a read-modify-write sub-register def and vice
19442040 // versa.
19452041 if (SubIdx && MO.isDef ())
1946- MO.setIsUndef (!Reads);
2042+ MO.setIsUndef (!Reads || RequiresImplicitRedef );
19472043
19482044 // A subreg use of a partially undef (super) register may be a complete
19492045 // undef use now and then has to be marked that way.
@@ -2046,6 +2142,38 @@ void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
20462142 LIS->shrinkToUses (&LI);
20472143}
20482144
2145+ // / For a given use of value \p Idx, it returns the def in the current block,
2146+ // / or otherwise all possible defs in preceding blocks.
2147+ static bool findPrecedingDefs (SmallPtrSetImpl<MachineInstr *> &Instrs,
2148+ LiveIntervals *LIS, LiveInterval &SrcInt,
2149+ MachineBasicBlock *MBB, VNInfo *Idx) {
2150+ auto IsPrecedingDef = [&](VNInfo *Idx) -> bool {
2151+ if (Idx->isPHIDef ())
2152+ return false ;
2153+ MachineInstr *Def = LIS->getInstructionFromIndex (Idx->def );
2154+ assert (Def && " Unable to find a def for SUBREG_TO_REG source operand" );
2155+ Instrs.insert (Def);
2156+ return true ;
2157+ };
2158+
2159+ if (IsPrecedingDef (Idx))
2160+ return true ;
2161+
2162+ SmallVector<MachineBasicBlock *> Worklist (MBB->pred_begin (), MBB->pred_end ());
2163+ SmallPtrSet<MachineBasicBlock *, 8 > VisitedBlocks;
2164+ while (!Worklist.empty ()) {
2165+ MachineBasicBlock *MBB = Worklist.pop_back_val ();
2166+ auto [_, Inserted] = VisitedBlocks.insert (MBB);
2167+ if (!Inserted)
2168+ continue ;
2169+ VNInfo *Idx = SrcInt.getVNInfoBefore (LIS->getMBBEndIdx (MBB));
2170+ if (!IsPrecedingDef (Idx))
2171+ Worklist.append (MBB->pred_begin (), MBB->pred_end ());
2172+ }
2173+
2174+ return !Instrs.empty ();
2175+ }
2176+
20492177bool RegisterCoalescer::joinCopy (
20502178 MachineInstr *CopyMI, bool &Again,
20512179 SmallPtrSetImpl<MachineInstr *> &CurrentErasedInstrs) {
@@ -2183,6 +2311,34 @@ bool RegisterCoalescer::joinCopy(
21832311 });
21842312 }
21852313
2314+ SmallPtrSet<MachineInstr *, 4 > SubregToRegSrcInsts;
2315+ Register SrcReg = CP.isFlipped () ? CP.getDstReg () : CP.getSrcReg ();
2316+ if (CopyMI->isSubregToReg () && !SrcReg.isPhysical ()) {
2317+ // For the case where the copy instruction is a SUBREG_TO_REG, e.g.
2318+ //
2319+ // %0:gpr32 = movimm32 ..
2320+ // %1:gpr64 = SUBREG_TO_REG 0, %0, sub32
2321+ // ...
2322+ // %0:gpr32 = COPY <something>
2323+ //
2324+ // After joining liveranges, the original `movimm32` will need an
2325+ // implicit-def to make it explicit that the entire register is written,
2326+ // i.e.
2327+ //
2328+ // undef %0.sub32:gpr64 = movimm32 ..., implicit-def %0
2329+ // ...
2330+ // undef %0.sub32:gpr64 = COPY <something> // Note that this does not
2331+ // // require an implicit-def,
2332+ // // because it has nothing to
2333+ // // do with the SUBREG_TO_REG.
2334+ LiveInterval &SrcInt = LIS->getInterval (SrcReg);
2335+ SlotIndex SubregToRegSlotIdx = LIS->getInstructionIndex (*CopyMI);
2336+ if (!findPrecedingDefs (SubregToRegSrcInsts, LIS, SrcInt,
2337+ CopyMI->getParent (),
2338+ SrcInt.Query (SubregToRegSlotIdx).valueIn ()))
2339+ llvm_unreachable (" SUBREG_TO_REG src requires a def" );
2340+ }
2341+
21862342 ShrinkMask = LaneBitmask::getNone ();
21872343 ShrinkMainRange = false ;
21882344
@@ -2253,7 +2409,8 @@ bool RegisterCoalescer::joinCopy(
22532409 // Also update DstReg operands to include DstIdx if it is set.
22542410 if (CP.getDstIdx ())
22552411 updateRegDefsUses (CP.getDstReg (), CP.getDstReg (), CP.getDstIdx ());
2256- updateRegDefsUses (CP.getSrcReg (), CP.getDstReg (), CP.getSrcIdx ());
2412+ updateRegDefsUses (CP.getSrcReg (), CP.getDstReg (), CP.getSrcIdx (),
2413+ &SubregToRegSrcInsts);
22572414
22582415 // Shrink subregister ranges if necessary.
22592416 if (ShrinkMask.any ()) {
0 commit comments