Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 164 additions & 15 deletions llvm/lib/CodeGen/RegisterCoalescer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,12 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate {
/// number if it is not zero. If DstReg is a physical register and the
/// existing subregister number of the def / use being updated is not zero,
/// make sure to set it to the correct physical subregister.
void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
///
/// If \p SubregToRegSrcInst is not empty, we are coalescing a
/// `DstReg = SUBREG_TO_REG SrcReg`, which should introduce an
/// implicit-def of DstReg on instructions that define SrcReg.
void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx,
ArrayRef<MachineInstr *> SubregToRegSrcInst = {});

/// If the given machine operand reads only undefined lanes add an undef
/// flag.
Expand Down Expand Up @@ -1443,6 +1448,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,

// CopyMI may have implicit operands, save them so that we can transfer them
// over to the newly materialized instruction after CopyMI is removed.
LaneBitmask NewMIImplicitOpsMask;
SmallVector<MachineOperand, 4> ImplicitOps;
ImplicitOps.reserve(CopyMI->getNumOperands() -
CopyMI->getDesc().getNumOperands());
Expand All @@ -1457,6 +1463,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
(MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) &&
"unexpected implicit virtual register def");
ImplicitOps.push_back(MO);
if (MO.isDef() && MO.getReg().isVirtual() &&
MRI->shouldTrackSubRegLiveness(DstReg))
NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
}
}

Expand Down Expand Up @@ -1499,14 +1508,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
} else {
assert(MO.getReg() == NewMI.getOperand(0).getReg());

// We're only expecting another def of the main output, so the range
// should get updated with the regular output range.
//
// FIXME: The range updating below probably needs updating to look at
// the super register if subranges are tracked.
assert(!MRI->shouldTrackSubRegLiveness(DstReg) &&
"subrange update for implicit-def of super register may not be "
"properly handled");
// If lanemasks need to be tracked, compile the lanemask of the NewMI
// implicit def operands to avoid subranges for the super-regs from
// being removed by code later on in this function.
if (MRI->shouldTrackSubRegLiveness(MO.getReg()))
NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
}
}
}
Expand Down Expand Up @@ -1606,7 +1612,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator();
for (LiveInterval::SubRange &SR : DstInt.subranges()) {
if ((SR.LaneMask & DstMask).none()) {
if ((SR.LaneMask & DstMask).none() &&
(SR.LaneMask & NewMIImplicitOpsMask).none()) {
LLVM_DEBUG(dbgs()
<< "Removing undefined SubRange "
<< PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
Expand Down Expand Up @@ -1870,11 +1877,14 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
}
}

void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
unsigned SubIdx) {
void RegisterCoalescer::updateRegDefsUses(
Register SrcReg, Register DstReg, unsigned SubIdx,
ArrayRef<MachineInstr *> SubregToRegSrcInsts) {
bool DstIsPhys = DstReg.isPhysical();
LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);

// Coalescing a COPY may expose reads of 'undef' subregisters.
// If so, then explicitly propagate 'undef' to those operands.
if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
if (MO.isUndef())
Expand All @@ -1891,6 +1901,15 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
}
}

// If DstInt already has a subrange for the unused lanes, then we shouldn't
// create duplicate subranges when we update the interval for unused lanes.
LaneBitmask DstIntLaneMask;
if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
for (LiveInterval::SubRange &SR : DstInt->subranges())
DstIntLaneMask |= SR.LaneMask;
}

// Go through all instructions to replace uses of 'SrcReg' by 'DstReg'.
SmallPtrSet<MachineInstr *, 8> Visited;
for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin(SrcReg),
E = MRI->reg_instr_end();
Expand All @@ -1914,6 +1933,80 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));

bool RequiresImplicitRedef = false;
if (!SubregToRegSrcInsts.empty()) {
// We can only add an implicit-def and undef if the sub registers match,
// e.g.
// %0:gr32 = INSTX
// %0.sub8:gr32 = INSTY // top 24 bits of %0 still defined
// %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub32
//
// This cannot be transformed into:
// %1.sub32:gr64 = INSTX
// undef %1.sub8:gr64 = INSTY , implicit-def %1
//
// Because that would thrash the top 24 bits of %1.sub32.
if (is_contained(SubregToRegSrcInsts, UseMI) &&
all_of(UseMI->defs(),
[&SubIdx, &SrcReg](const MachineOperand &MO) -> bool {
if (MO.getReg() != SrcReg || !MO.getSubReg() || MO.isUndef())
return true;
return SubIdx == MO.getSubReg();
})) {
// Add implicit-def of super-register to express that the whole
// register is defined by the instruction.
MachineInstrBuilder MIB(*MF, UseMI);
MIB.addReg(DstReg, RegState::ImplicitDefine);
RequiresImplicitRedef = true;
}

// If the coalesed instruction doesn't fully define the register, we need
// to preserve the original super register liveness for SUBREG_TO_REG.
//
// We pretended SUBREG_TO_REG was a regular copy for coalescing purposes,
// but it introduces liveness for other subregisters. Downstream users may
// have been relying on those bits, so we need to ensure their liveness is
// captured with a def of other lanes.
if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
// First check if there is sufficient granularity in terms of subranges.
LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
LaneBitmask UnusedLanes = DstMask & ~UsedLanes;
if ((UnusedLanes & ~DstIntLaneMask).any()) {
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
DstInt->createSubRangeFrom(Allocator, UnusedLanes, *DstInt);
DstIntLaneMask |= UnusedLanes;
}

// After duplicating the live ranges for the low/hi bits, we
// need to update the subranges of the DstReg interval such that
// for a case like this:
//
// entry:
// 16B %1:gpr32 = INSTRUCTION (<=> UseMI)
// :
// if.then:
// 32B %1:gpr32 = MOVIMM32 ..
// 48B %0:gpr64 = SUBREG_TO_REG 0, %1, sub32
//
// Only the MOVIMM32 require a def of the top lanes and any intervals
// for the top 32-bits of the def at 16B should be removed.
for (LiveInterval::SubRange &SR : DstInt->subranges()) {
if (!Writes || RequiresImplicitRedef ||
(SR.LaneMask & UnusedLanes).none())
continue;

assert((SR.LaneMask & UnusedLanes) == SR.LaneMask &&
"Unexpected lanemask. Subrange needs finer granularity");

SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(false);
auto SegmentI = SR.find(UseIdx);
if (SegmentI != SR.end())
SR.removeSegment(SegmentI, true);
}
}
}

// Replace SrcReg with DstReg in all UseMI operands.
for (unsigned Op : Ops) {
MachineOperand &MO = UseMI->getOperand(Op);
Expand All @@ -1922,7 +2015,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
// turn a full def into a read-modify-write sub-register def and vice
// versa.
if (SubIdx && MO.isDef())
MO.setIsUndef(!Reads);
MO.setIsUndef(!Reads || RequiresImplicitRedef);

// A subreg use of a partially undef (super) register may be a complete
// undef use now and then has to be marked that way.
Expand Down Expand Up @@ -2025,6 +2118,30 @@ void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
LIS->shrinkToUses(&LI);
}

/// For a given use of value \p Idx, it returns the def in the current block,
/// or otherwise all possible defs in preceding blocks.
static bool FindDefInBlock(SmallPtrSetImpl<MachineBasicBlock *> &VisitedBlocks,
SmallVector<MachineInstr *> &Instrs,
LiveIntervals *LIS, LiveInterval &SrcInt,
MachineBasicBlock *MBB, VNInfo *Idx) {
if (!Idx->isPHIDef()) {
MachineInstr *Def = LIS->getInstructionFromIndex(Idx->def);
assert(Def && "Unable to find a def for SUBREG_TO_REG source operand");
Instrs.push_back(Def);
return true;
}

bool Any = false;
if (VisitedBlocks.count(MBB))
return false;
VisitedBlocks.insert(MBB);
for (MachineBasicBlock *Pred : MBB->predecessors()) {
Any |= FindDefInBlock(VisitedBlocks, Instrs, LIS, SrcInt, Pred,
SrcInt.getVNInfoBefore(LIS->getMBBEndIdx(Pred)));
}
return Any;
}

bool RegisterCoalescer::joinCopy(
MachineInstr *CopyMI, bool &Again,
SmallPtrSetImpl<MachineInstr *> &CurrentErasedInstrs) {
Expand Down Expand Up @@ -2156,6 +2273,35 @@ bool RegisterCoalescer::joinCopy(
});
}

SmallVector<MachineInstr *> SubregToRegSrcInsts;
if (CopyMI->isSubregToReg()) {
// For the case where the copy instruction is a SUBREG_TO_REG, e.g.
//
// %0:gpr32 = movimm32 ..
// %1:gpr64 = SUBREG_TO_REG 0, %0, sub32
// ...
// %0:gpr32 = COPY <something>
//
// After joining liveranges, the original `movimm32` will need an
// implicit-def to make it explicit that the entire register is written,
// i.e.
//
// undef %0.sub32:gpr64 = movimm32 ..., implicit-def %0
// ...
// undef %0.sub32:gpr64 = COPY <something> // Note that this does not
// // require an implicit-def,
// // because it has nothing to
// // do with the SUBREG_TO_REG.
LiveInterval &SrcInt =
LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
SlotIndex SubregToRegSlotIdx = LIS->getInstructionIndex(*CopyMI);
SmallPtrSet<MachineBasicBlock *, 8> VisitedBlocks;
if (!FindDefInBlock(VisitedBlocks, SubregToRegSrcInsts, LIS, SrcInt,
CopyMI->getParent(),
SrcInt.Query(SubregToRegSlotIdx).valueIn()))
llvm_unreachable("SUBREG_TO_REG src requires a def");
}

ShrinkMask = LaneBitmask::getNone();
ShrinkMainRange = false;

Expand Down Expand Up @@ -2225,9 +2371,12 @@ bool RegisterCoalescer::joinCopy(

// Rewrite all SrcReg operands to DstReg.
// Also update DstReg operands to include DstIdx if it is set.
if (CP.getDstIdx())
if (CP.getDstIdx()) {
assert(SubregToRegSrcInsts.empty() && "can this happen?");
updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
}
updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(),
SubregToRegSrcInsts);

// Shrink subregister ranges if necessary.
if (ShrinkMask.any()) {
Expand Down
Loading