Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions llvm/include/llvm/CodeGen/LiveRegMatrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,16 @@ class LiveRegMatrix : public MachineFunctionPass {
/// the segment [Start, End).
bool checkInterference(SlotIndex Start, SlotIndex End, MCRegister PhysReg);

/// Check for interference in the segment [Start, End) that may prevent
/// assignment to PhysReg, like checkInterference. Returns a lane mask of
/// which lanes of the physical register interfere in the segment [Start, End)
/// of some other interval already assigned to PhysReg.
///
/// If this function returns LaneBitmask::getNone(), PhysReg is completely
/// free at the segment [Start, End).
LaneBitmask checkInterferenceLanes(SlotIndex Start, SlotIndex End,
MCRegister PhysReg);

/// Assign VirtReg to PhysReg.
/// This will mark VirtReg's live range as occupied in the LiveRegMatrix and
/// update VirtRegMap. The live range is expected to be available in PhysReg.
Expand Down
35 changes: 35 additions & 0 deletions llvm/lib/CodeGen/LiveRegMatrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,41 @@ bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End,
return false;
}

LaneBitmask LiveRegMatrix::checkInterferenceLanes(SlotIndex Start,
SlotIndex End,
MCRegister PhysReg) {
// Construct artificial live range containing only one segment [Start, End).
VNInfo valno(0, Start);
LiveRange::Segment Seg(Start, End, &valno);
LiveRange LR;
LR.addSegment(Seg);

LaneBitmask InterferingLanes;

// Check for interference with that segment
for (MCRegUnitMaskIterator MCRU(PhysReg, TRI); MCRU.isValid(); ++MCRU) {
auto [Unit, Lanes] = *MCRU;
// LR is stack-allocated. LiveRegMatrix caches queries by a key that
// includes the address of the live range. If (for the same reg unit) this
// checkInterference overload is called twice, without any other query()
// calls in between (on heap-allocated LiveRanges) - which would invalidate
// the cached query - the LR address seen the second time may well be the
// same as that seen the first time, while the Start/End/valno may not - yet
// the same cached result would be fetched. To avoid that, we don't cache
// this query.
//
// FIXME: the usability of the Query API needs to be improved to avoid
// subtle bugs due to query identity. Avoiding caching, for example, would
// greatly simplify things.
LiveIntervalUnion::Query Q;
Q.reset(UserTag, LR, Matrix[Unit]);
if (Q.checkInterference())
InterferingLanes |= Lanes;
}

return InterferingLanes;
}

Register LiveRegMatrix::getOneVReg(unsigned PhysReg) const {
const LiveInterval *VRegInterval = nullptr;
for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
Expand Down
64 changes: 51 additions & 13 deletions llvm/lib/CodeGen/VirtRegMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "llvm/CodeGen/LiveDebugVariables.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
Expand Down Expand Up @@ -187,6 +188,7 @@ class VirtRegRewriter : public MachineFunctionPass {
MachineRegisterInfo *MRI = nullptr;
SlotIndexes *Indexes = nullptr;
LiveIntervals *LIS = nullptr;
LiveRegMatrix *LRM = nullptr;
VirtRegMap *VRM = nullptr;
LiveDebugVariables *DebugVars = nullptr;
DenseSet<Register> RewriteRegs;
Expand All @@ -199,9 +201,9 @@ class VirtRegRewriter : public MachineFunctionPass {
void handleIdentityCopy(MachineInstr &MI);
void expandCopyBundle(MachineInstr &MI) const;
bool subRegLiveThrough(const MachineInstr &MI, MCRegister SuperPhysReg) const;
bool needLiveOutUndefSubregDef(const LiveInterval &LI,
const MachineBasicBlock &MBB, unsigned SubReg,
MCPhysReg PhysReg) const;
LaneBitmask liveOutUndefPhiLanesForUndefSubregDef(
const LiveInterval &LI, const MachineBasicBlock &MBB, unsigned SubReg,
MCPhysReg PhysReg, const MachineInstr &MI) const;

public:
static char ID;
Expand Down Expand Up @@ -234,6 +236,7 @@ INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter",
INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
INITIALIZE_PASS_DEPENDENCY(LiveStacks)
INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter",
Expand All @@ -249,6 +252,7 @@ void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<LiveStacks>();
AU.addPreserved<LiveStacks>();
AU.addRequired<VirtRegMap>();
AU.addRequired<LiveRegMatrix>();

if (!ClearVirtRegs)
AU.addPreserved<LiveDebugVariables>();
Expand All @@ -263,6 +267,7 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
MRI = &MF->getRegInfo();
Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI();
LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
LRM = &getAnalysis<LiveRegMatrix>();
VRM = &getAnalysis<VirtRegMap>();
DebugVars = &getAnalysis<LiveDebugVariables>();
LLVM_DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
Expand Down Expand Up @@ -535,24 +540,36 @@ bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI,
return false;
}

/// Check if we need to maintain liveness for undef subregister lanes that are
/// live out of a block.
bool VirtRegRewriter::needLiveOutUndefSubregDef(const LiveInterval &LI,
const MachineBasicBlock &MBB,
unsigned SubReg,
MCPhysReg PhysReg) const {
/// Compute a lanemask for undef lanes which need to be preserved out of the
/// defining block for a register assignment.
LaneBitmask VirtRegRewriter::liveOutUndefPhiLanesForUndefSubregDef(
const LiveInterval &LI, const MachineBasicBlock &MBB, unsigned SubReg,
MCPhysReg PhysReg, const MachineInstr &MI) const {
LaneBitmask UndefMask = ~TRI->getSubRegIndexLaneMask(SubReg);
LaneBitmask LiveOutUndefLanes;

for (const LiveInterval::SubRange &SR : LI.subranges()) {
LaneBitmask NeedImpDefLanes = UndefMask & SR.LaneMask;
if (NeedImpDefLanes.any() && !LIS->isLiveOutOfMBB(SR, &MBB)) {
for (const MachineBasicBlock *Succ : MBB.successors()) {
if (LIS->isLiveInToMBB(SR, Succ))
return true;
LiveOutUndefLanes |= NeedImpDefLanes;
}
}
}

return false;
SlotIndex MIIndex = LIS->getInstructionIndex(MI);
SlotIndex BeforeMIUses = MIIndex.getBaseIndex();
LaneBitmask InterferingLanes =
LRM->checkInterferenceLanes(BeforeMIUses, MIIndex.getRegSlot(), PhysReg);
LiveOutUndefLanes &= ~InterferingLanes;

LLVM_DEBUG(if (LiveOutUndefLanes.any()) {
dbgs() << "Need live out undef defs for " << printReg(PhysReg)
<< LiveOutUndefLanes << " from " << printMBBReference(MBB) << '\n';
});

return LiveOutUndefLanes;
}

void VirtRegRewriter::rewrite() {
Expand Down Expand Up @@ -611,8 +628,29 @@ void VirtRegRewriter::rewrite() {
assert(MO.isDef());
if (MO.isUndef()) {
const LiveInterval &LI = LIS->getInterval(VirtReg);
if (needLiveOutUndefSubregDef(LI, *MBBI, SubReg, PhysReg))
SuperDefs.push_back(PhysReg);

LaneBitmask LiveOutUndefLanes =
liveOutUndefPhiLanesForUndefSubregDef(LI, *MBBI, SubReg,
PhysReg, MI);
if (LiveOutUndefLanes.any()) {
SmallVector<unsigned, 16> CoveringIndexes;

// TODO: Just use one super register def if none of the lanes
// are needed?
if (!TRI->getCoveringSubRegIndexes(
*MRI, MRI->getRegClass(VirtReg), LiveOutUndefLanes,
CoveringIndexes))
llvm_unreachable(
"cannot represent required subregister defs");

// Try to represent the minimum needed live out def as a
// sequence of subregister defs.
//
// FIXME: It would be better if we could directly represent
// liveness with a lanemask instead of spamming operands.
for (unsigned SubIdx : CoveringIndexes)
SuperDefs.push_back(TRI->getSubReg(PhysReg, SubIdx));
}
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/AMDGPU/indirect-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,7 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) {
; GISEL-NEXT: s_mov_b32 s14, s43
; GISEL-NEXT: s_mov_b32 s15, s42
; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GISEL-NEXT: v_mov_b32_e32 v1, v0
; GISEL-NEXT: ; implicit-def: $vgpr0
; GISEL-NEXT: ; implicit-def: $vgpr31
; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49]
Expand Down Expand Up @@ -1383,6 +1384,7 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) {
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9]
; GISEL-NEXT: v_mov_b32_e32 v2, v0
; GISEL-NEXT: ; implicit-def: $vgpr1
; GISEL-NEXT: ; implicit-def: $vgpr0
; GISEL-NEXT: s_xor_b64 exec, exec, s[6:7]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ body: |
; CHECK-NEXT: dead [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: dead undef [[DEF2:%[0-9]+]].sub0:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX16_IMM renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
; CHECK-NEXT: renamable $sgpr24 = IMPLICIT_DEF implicit-def $sgpr24_sgpr25_sgpr26_sgpr27
; CHECK-NEXT: renamable $sgpr24 = IMPLICIT_DEF implicit-def $sgpr25
; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
; CHECK-NEXT: $exec = S_MOV_B64_term undef renamable $sgpr4_sgpr5
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.6, implicit $exec
Expand Down Expand Up @@ -83,7 +83,7 @@ body: |
; CHECK-NEXT: liveins: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x00000000FFFFFFFF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: dead [[IMAGE_SAMPLE_LZ_V1_V2_5:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, undef renamable $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
; CHECK-NEXT: renamable $sgpr25 = COPY undef renamable $sgpr24, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27
; CHECK-NEXT: renamable $sgpr25 = COPY undef renamable $sgpr24, implicit-def $sgpr24
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc
; CHECK-NEXT: S_BRANCH %bb.6
; CHECK-NEXT: {{ $}}
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ body: |
; CHECK-NEXT: dead undef [[DEF3:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: dead renamable $sgpr5 = IMPLICIT_DEF
; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
; CHECK-NEXT: renamable $sgpr24 = IMPLICIT_DEF implicit-def $sgpr24_sgpr25_sgpr26_sgpr27
; CHECK-NEXT: renamable $sgpr24 = IMPLICIT_DEF implicit-def $sgpr25
; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
; CHECK-NEXT: $exec = S_MOV_B64_term undef renamable $sgpr4_sgpr5
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec
Expand Down Expand Up @@ -80,7 +80,7 @@ body: |
; CHECK-NEXT: liveins: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x00000000FFFFFFFF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: dead [[IMAGE_SAMPLE_LZ_V1_V2_5:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, undef renamable $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
; CHECK-NEXT: renamable $sgpr25 = COPY undef renamable $sgpr24, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27
; CHECK-NEXT: renamable $sgpr25 = COPY undef renamable $sgpr24, implicit-def $sgpr24
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.6, implicit undef $vcc
; CHECK-NEXT: S_BRANCH %bb.5
; CHECK-NEXT: {{ $}}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -start-before=greedy,2 -stop-after=tailduplication -verify-machineinstrs -o - %s | FileCheck %s

---
name: undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub1_sub2_assigned_physreg_interference
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
body: |
; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub1_sub2_assigned_physreg_interference
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $sgpr0, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $vgpr3 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr4_vgpr5
; CHECK-NEXT: EXP 0, killed renamable $vgpr3, renamable $vgpr4, renamable $vgpr5, killed renamable $vgpr2, 0, 0, 0, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: liveins: $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: renamable $vgpr3_vgpr4_vgpr5 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 8)
; CHECK-NEXT: EXP 0, killed renamable $vgpr3, renamable $vgpr4, renamable $vgpr5, killed renamable $vgpr2, 0, 0, 0, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $vgpr2

%2:vgpr_32 = COPY $vgpr2
S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc
S_CBRANCH_SCC0 %bb.2, implicit killed $scc

bb.1:
undef %0.sub0:vreg_96 = V_MOV_B32_e32 0, implicit $exec
S_BRANCH %bb.3

bb.2:
S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
%1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%0:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), addrspace 8)

bb.3:
EXP 0, killed %0.sub0, killed %0.sub1, killed %0.sub2, %2:vgpr_32, 0, 0, 0, implicit $exec
S_ENDPGM 0

...
Loading
Loading