@@ -306,7 +306,12 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate {
306306 // / number if it is not zero. If DstReg is a physical register and the
307307 // / existing subregister number of the def / use being updated is not zero,
308308 // / make sure to set it to the correct physical subregister.
309- void updateRegDefsUses (Register SrcReg, Register DstReg, unsigned SubIdx);
309+ // /
310+ // / If \p SubregToRegSrcInst is not empty, we are coalescing a
311+ // / `DstReg = SUBREG_TO_REG SrcReg`, which should introduce an
312+ // / implicit-def of DstReg on instructions that define SrcReg.
313+ void updateRegDefsUses (Register SrcReg, Register DstReg, unsigned SubIdx,
314+ ArrayRef<MachineInstr *> SubregToRegSrcInst = {});
310315
311316 // / If the given machine operand reads only undefined lanes add an undef
312317 // / flag.
@@ -1443,6 +1448,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
14431448
14441449 // CopyMI may have implicit operands, save them so that we can transfer them
14451450 // over to the newly materialized instruction after CopyMI is removed.
1451+ LaneBitmask NewMIImplicitOpsMask;
14461452 SmallVector<MachineOperand, 4 > ImplicitOps;
14471453 ImplicitOps.reserve (CopyMI->getNumOperands () -
14481454 CopyMI->getDesc ().getNumOperands ());
@@ -1457,6 +1463,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
14571463 (MO.getSubReg () == 0 && MO.getReg () == DstOperand.getReg ())) &&
14581464 " unexpected implicit virtual register def" );
14591465 ImplicitOps.push_back (MO);
1466+ if (MO.isDef () && MO.getReg ().isVirtual () &&
1467+ MRI->shouldTrackSubRegLiveness (DstReg))
1468+ NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg (MO.getReg ());
14601469 }
14611470 }
14621471
@@ -1499,14 +1508,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
14991508 } else {
15001509 assert (MO.getReg () == NewMI.getOperand (0 ).getReg ());
15011510
1502- // We're only expecting another def of the main output, so the range
1503- // should get updated with the regular output range.
1504- //
1505- // FIXME: The range updating below probably needs updating to look at
1506- // the super register if subranges are tracked.
1507- assert (!MRI->shouldTrackSubRegLiveness (DstReg) &&
1508- " subrange update for implicit-def of super register may not be "
1509- " properly handled" );
1511+ // If lanemasks need to be tracked, compile the lanemask of the NewMI
1512+ // implicit def operands to avoid subranges for the super-regs from
1513+ // being removed by code later on in this function.
1514+ if (MRI->shouldTrackSubRegLiveness (MO.getReg ()))
1515+ NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg (MO.getReg ());
15101516 }
15111517 }
15121518 }
@@ -1606,7 +1612,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
16061612 CurrIdx.getRegSlot (NewMI.getOperand (0 ).isEarlyClobber ());
16071613 VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator ();
16081614 for (LiveInterval::SubRange &SR : DstInt.subranges ()) {
1609- if ((SR.LaneMask & DstMask).none ()) {
1615+ if ((SR.LaneMask & DstMask).none () &&
1616+ (SR.LaneMask & NewMIImplicitOpsMask).none ()) {
16101617 LLVM_DEBUG (dbgs ()
16111618 << " Removing undefined SubRange "
16121619 << PrintLaneMask (SR.LaneMask ) << " : " << SR << " \n " );
@@ -1870,11 +1877,14 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
18701877 }
18711878}
18721879
1873- void RegisterCoalescer::updateRegDefsUses (Register SrcReg, Register DstReg,
1874- unsigned SubIdx) {
1880+ void RegisterCoalescer::updateRegDefsUses (
1881+ Register SrcReg, Register DstReg, unsigned SubIdx,
1882+ ArrayRef<MachineInstr *> SubregToRegSrcInsts) {
18751883 bool DstIsPhys = DstReg.isPhysical ();
18761884 LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval (DstReg);
18771885
1886+ // Coalescing a COPY may expose reads of 'undef' subregisters.
1887+ // If so, then explicitly propagate 'undef' to those operands.
18781888 if (DstInt && DstInt->hasSubRanges () && DstReg != SrcReg) {
18791889 for (MachineOperand &MO : MRI->reg_operands (DstReg)) {
18801890 if (MO.isUndef ())
@@ -1891,6 +1901,15 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
18911901 }
18921902 }
18931903
1904+ // If DstInt already has a subrange for the unused lanes, then we shouldn't
1905+ // create duplicate subranges when we update the interval for unused lanes.
1906+ LaneBitmask DstIntLaneMask;
1907+ if (DstInt && MRI->shouldTrackSubRegLiveness (DstReg)) {
1908+ for (LiveInterval::SubRange &SR : DstInt->subranges ())
1909+ DstIntLaneMask |= SR.LaneMask ;
1910+ }
1911+
1912+ // Go through all instructions to replace uses of 'SrcReg' by 'DstReg'.
18941913 SmallPtrSet<MachineInstr *, 8 > Visited;
18951914 for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin (SrcReg),
18961915 E = MRI->reg_instr_end ();
@@ -1914,6 +1933,80 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
19141933 if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr ())
19151934 Reads = DstInt->liveAt (LIS->getInstructionIndex (*UseMI));
19161935
1936+ bool RequiresImplicitRedef = false ;
1937+ if (!SubregToRegSrcInsts.empty ()) {
1938+ // We can only add an implicit-def and undef if the sub registers match,
1939+ // e.g.
1940+ // %0:gr32 = INSTX
1941+ // %0.sub8:gr32 = INSTY // top 24 bits of %0 still defined
1942+ // %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub32
1943+ //
1944+ // This cannot be transformed into:
1945+ // %1.sub32:gr64 = INSTX
1946+ // undef %1.sub8:gr64 = INSTY , implicit-def %1
1947+ //
1948+ // Because that would thrash the top 24 bits of %1.sub32.
1949+ if (is_contained (SubregToRegSrcInsts, UseMI) &&
1950+ all_of (UseMI->defs (),
1951+ [&SubIdx, &SrcReg](const MachineOperand &MO) -> bool {
1952+ if (MO.getReg () != SrcReg || !MO.getSubReg () || MO.isUndef ())
1953+ return true ;
1954+ return SubIdx == MO.getSubReg ();
1955+ })) {
1956+ // Add implicit-def of super-register to express that the whole
1957+ // register is defined by the instruction.
1958+ MachineInstrBuilder MIB (*MF, UseMI);
1959+ MIB.addReg (DstReg, RegState::ImplicitDefine);
1960+ RequiresImplicitRedef = true ;
1961+ }
1962+
1963+ // If the coalesed instruction doesn't fully define the register, we need
1964+ // to preserve the original super register liveness for SUBREG_TO_REG.
1965+ //
1966+ // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes,
1967+ // but it introduces liveness for other subregisters. Downstream users may
1968+ // have been relying on those bits, so we need to ensure their liveness is
1969+ // captured with a def of other lanes.
1970+ if (DstInt && MRI->shouldTrackSubRegLiveness (DstReg)) {
1971+ // First check if there is sufficient granularity in terms of subranges.
1972+ LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg (DstInt->reg ());
1973+ LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask (SubIdx);
1974+ LaneBitmask UnusedLanes = DstMask & ~UsedLanes;
1975+ if ((UnusedLanes & ~DstIntLaneMask).any ()) {
1976+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator ();
1977+ DstInt->createSubRangeFrom (Allocator, UnusedLanes, *DstInt);
1978+ DstIntLaneMask |= UnusedLanes;
1979+ }
1980+
1981+ // After duplicating the live ranges for the low/hi bits, we
1982+ // need to update the subranges of the DstReg interval such that
1983+ // for a case like this:
1984+ //
1985+ // entry:
1986+ // 16B %1:gpr32 = INSTRUCTION (<=> UseMI)
1987+ // :
1988+ // if.then:
1989+ // 32B %1:gpr32 = MOVIMM32 ..
1990+ // 48B %0:gpr64 = SUBREG_TO_REG 0, %1, sub32
1991+ //
1992+ // Only the MOVIMM32 require a def of the top lanes and any intervals
1993+ // for the top 32-bits of the def at 16B should be removed.
1994+ for (LiveInterval::SubRange &SR : DstInt->subranges ()) {
1995+ if (!Writes || RequiresImplicitRedef ||
1996+ (SR.LaneMask & UnusedLanes).none ())
1997+ continue ;
1998+
1999+ assert ((SR.LaneMask & UnusedLanes) == SR.LaneMask &&
2000+ " Unexpected lanemask. Subrange needs finer granularity" );
2001+
2002+ SlotIndex UseIdx = LIS->getInstructionIndex (*UseMI).getRegSlot (false );
2003+ auto SegmentI = SR.find (UseIdx);
2004+ if (SegmentI != SR.end ())
2005+ SR.removeSegment (SegmentI, true );
2006+ }
2007+ }
2008+ }
2009+
19172010 // Replace SrcReg with DstReg in all UseMI operands.
19182011 for (unsigned Op : Ops) {
19192012 MachineOperand &MO = UseMI->getOperand (Op);
@@ -1922,7 +2015,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
19222015 // turn a full def into a read-modify-write sub-register def and vice
19232016 // versa.
19242017 if (SubIdx && MO.isDef ())
1925- MO.setIsUndef (!Reads);
2018+ MO.setIsUndef (!Reads || RequiresImplicitRedef );
19262019
19272020 // A subreg use of a partially undef (super) register may be a complete
19282021 // undef use now and then has to be marked that way.
@@ -2025,6 +2118,30 @@ void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
20252118 LIS->shrinkToUses (&LI);
20262119}
20272120
2121+ // / For a given use of value \p Idx, it returns the def in the current block,
2122+ // / or otherwise all possible defs in preceding blocks.
2123+ static bool FindDefInBlock (SmallPtrSetImpl<MachineBasicBlock *> &VisitedBlocks,
2124+ SmallVector<MachineInstr *> &Instrs,
2125+ LiveIntervals *LIS, LiveInterval &SrcInt,
2126+ MachineBasicBlock *MBB, VNInfo *Idx) {
2127+ if (!Idx->isPHIDef ()) {
2128+ MachineInstr *Def = LIS->getInstructionFromIndex (Idx->def );
2129+ assert (Def && " Unable to find a def for SUBREG_TO_REG source operand" );
2130+ Instrs.push_back (Def);
2131+ return true ;
2132+ }
2133+
2134+ bool Any = false ;
2135+ if (VisitedBlocks.count (MBB))
2136+ return false ;
2137+ VisitedBlocks.insert (MBB);
2138+ for (MachineBasicBlock *Pred : MBB->predecessors ()) {
2139+ Any |= FindDefInBlock (VisitedBlocks, Instrs, LIS, SrcInt, Pred,
2140+ SrcInt.getVNInfoBefore (LIS->getMBBEndIdx (Pred)));
2141+ }
2142+ return Any;
2143+ }
2144+
20282145bool RegisterCoalescer::joinCopy (
20292146 MachineInstr *CopyMI, bool &Again,
20302147 SmallPtrSetImpl<MachineInstr *> &CurrentErasedInstrs) {
@@ -2156,6 +2273,35 @@ bool RegisterCoalescer::joinCopy(
21562273 });
21572274 }
21582275
2276+ SmallVector<MachineInstr *> SubregToRegSrcInsts;
2277+ if (CopyMI->isSubregToReg ()) {
2278+ // For the case where the copy instruction is a SUBREG_TO_REG, e.g.
2279+ //
2280+ // %0:gpr32 = movimm32 ..
2281+ // %1:gpr64 = SUBREG_TO_REG 0, %0, sub32
2282+ // ...
2283+ // %0:gpr32 = COPY <something>
2284+ //
2285+ // After joining liveranges, the original `movimm32` will need an
2286+ // implicit-def to make it explicit that the entire register is written,
2287+ // i.e.
2288+ //
2289+ // undef %0.sub32:gpr64 = movimm32 ..., implicit-def %0
2290+ // ...
2291+ // undef %0.sub32:gpr64 = COPY <something> // Note that this does not
2292+ // // require an implicit-def,
2293+ // // because it has nothing to
2294+ // // do with the SUBREG_TO_REG.
2295+ LiveInterval &SrcInt =
2296+ LIS->getInterval (CP.isFlipped () ? CP.getDstReg () : CP.getSrcReg ());
2297+ SlotIndex SubregToRegSlotIdx = LIS->getInstructionIndex (*CopyMI);
2298+ SmallPtrSet<MachineBasicBlock *, 8 > VisitedBlocks;
2299+ if (!FindDefInBlock (VisitedBlocks, SubregToRegSrcInsts, LIS, SrcInt,
2300+ CopyMI->getParent (),
2301+ SrcInt.Query (SubregToRegSlotIdx).valueIn ()))
2302+ llvm_unreachable (" SUBREG_TO_REG src requires a def" );
2303+ }
2304+
21592305 ShrinkMask = LaneBitmask::getNone ();
21602306 ShrinkMainRange = false ;
21612307
@@ -2225,9 +2371,12 @@ bool RegisterCoalescer::joinCopy(
22252371
22262372 // Rewrite all SrcReg operands to DstReg.
22272373 // Also update DstReg operands to include DstIdx if it is set.
2228- if (CP.getDstIdx ())
2374+ if (CP.getDstIdx ()) {
2375+ assert (SubregToRegSrcInsts.empty () && " can this happen?" );
22292376 updateRegDefsUses (CP.getDstReg (), CP.getDstReg (), CP.getDstIdx ());
2230- updateRegDefsUses (CP.getSrcReg (), CP.getDstReg (), CP.getSrcIdx ());
2377+ }
2378+ updateRegDefsUses (CP.getSrcReg (), CP.getDstReg (), CP.getSrcIdx (),
2379+ SubregToRegSrcInsts);
22312380
22322381 // Shrink subregister ranges if necessary.
22332382 if (ShrinkMask.any ()) {
0 commit comments