Skip to content

Commit f3f78c3

Browse files
perlfuPriyanshu3820
authored andcommitted
RenameIndependentSubregs: try to only implicit def used subregs (llvm#167486)
Attempt to only define used subregisters when creating IMPLICIT_DEF fix ups for live interval subranges. This avoids the appearance at the MIR level of entire (wide) registers becoming live rather than relying only on transient LiveIntervals dead definitions for unused subregisters.
1 parent 6c97e76 commit f3f78c3

12 files changed

+10145
-22015
lines changed

llvm/lib/CodeGen/RenameIndependentSubregs.cpp

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ void RenameIndependentSubregs::computeMainRangesFixFlags(
306306
const IntEqClasses &Classes,
307307
const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
308308
const SmallVectorImpl<LiveInterval*> &Intervals) const {
309+
const TargetRegisterInfo &TRI = TII->getRegisterInfo();
309310
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
310311
const SlotIndexes &Indexes = *LIS->getSlotIndexes();
311312
for (size_t I = 0, E = Intervals.size(); I < E; ++I) {
@@ -314,6 +315,25 @@ void RenameIndependentSubregs::computeMainRangesFixFlags(
314315

315316
LI.removeEmptySubRanges();
316317

318+
// Try to establish a single subregister which covers all uses.
319+
// Note: this is assuming the selected subregister will only be
320+
// used for fixing up live intervals issues created by this pass.
321+
LaneBitmask UsedMask, UnusedMask;
322+
for (LiveInterval::SubRange &SR : LI.subranges())
323+
UsedMask |= SR.LaneMask;
324+
SmallVector<unsigned> SubRegIdxs;
325+
unsigned Flags = 0;
326+
unsigned SubReg = 0;
327+
// TODO: Handle SubRegIdxs.size() > 1
328+
if (TRI.getCoveringSubRegIndexes(MRI->getRegClass(Reg), UsedMask,
329+
SubRegIdxs) &&
330+
SubRegIdxs.size() == 1) {
331+
SubReg = SubRegIdxs.front();
332+
Flags = RegState::Undef;
333+
} else {
334+
UnusedMask = MRI->getMaxLaneMaskForVReg(Reg) & ~UsedMask;
335+
}
336+
317337
// There must be a def (or live-in) before every use. Splitting vregs may
318338
// violate this principle as the splitted vreg may not have a definition on
319339
// every path. Fix this by creating IMPLICIT_DEF instruction as necessary.
@@ -336,19 +356,18 @@ void RenameIndependentSubregs::computeMainRangesFixFlags(
336356
MachineBasicBlock::iterator InsertPos =
337357
llvm::findPHICopyInsertPoint(PredMBB, &MBB, Reg);
338358
const MCInstrDesc &MCDesc = TII->get(TargetOpcode::IMPLICIT_DEF);
339-
MachineInstrBuilder ImpDef = BuildMI(*PredMBB, InsertPos,
340-
DebugLoc(), MCDesc, Reg);
359+
MachineInstrBuilder ImpDef =
360+
BuildMI(*PredMBB, InsertPos, DebugLoc(), MCDesc)
361+
.addDef(Reg, Flags, SubReg);
341362
SlotIndex DefIdx = LIS->InsertMachineInstrInMaps(*ImpDef);
342363
SlotIndex RegDefIdx = DefIdx.getRegSlot();
343-
LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(Reg);
344364
for (LiveInterval::SubRange &SR : LI.subranges()) {
345-
Mask = Mask & ~SR.LaneMask;
346365
VNInfo *SRVNI = SR.getNextValue(RegDefIdx, Allocator);
347366
SR.addSegment(LiveRange::Segment(RegDefIdx, PredEnd, SRVNI));
348367
}
349-
350-
if (!Mask.none()) {
351-
LiveInterval::SubRange *SR = LI.createSubRange(Allocator, Mask);
368+
if (!UnusedMask.none()) {
369+
LiveInterval::SubRange *SR =
370+
LI.createSubRange(Allocator, UnusedMask);
352371
SR->createDeadDef(RegDefIdx, Allocator);
353372
}
354373
}

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll

Lines changed: 147 additions & 168 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll

Lines changed: 5488 additions & 8704 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.640bit.ll

Lines changed: 688 additions & 2040 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll

Lines changed: 712 additions & 2104 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll

Lines changed: 736 additions & 2168 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll

Lines changed: 752 additions & 2232 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll

Lines changed: 768 additions & 2256 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll

Lines changed: 768 additions & 2280 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/collapse-endcf.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -979,7 +979,7 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
979979
; GCN-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0
980980
; GCN-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
981981
; GCN-NEXT: s_mov_b64 s[8:9], 0
982-
; GCN-NEXT: v_mov_b32_e32 v7, 0
982+
; GCN-NEXT: v_mov_b32_e32 v0, 0
983983
; GCN-NEXT: s_mov_b64 s[6:7], 0
984984
; GCN-NEXT: s_branch .LBB5_3
985985
; GCN-NEXT: .LBB5_1: ; %Flow
@@ -1002,36 +1002,36 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
10021002
; GCN-NEXT: ; %bb.4: ; %bb2
10031003
; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1
10041004
; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
1005-
; GCN-NEXT: v_mov_b32_e32 v8, v7
1006-
; GCN-NEXT: v_mov_b32_e32 v2, v7
1007-
; GCN-NEXT: v_mov_b32_e32 v6, v7
1005+
; GCN-NEXT: v_mov_b32_e32 v1, v0
1006+
; GCN-NEXT: v_mov_b32_e32 v2, v0
1007+
; GCN-NEXT: v_mov_b32_e32 v3, v0
10081008
; GCN-NEXT: s_and_saveexec_b64 s[10:11], s[4:5]
10091009
; GCN-NEXT: s_cbranch_execz .LBB5_2
10101010
; GCN-NEXT: ; %bb.5: ; %bb4
10111011
; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1
1012-
; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
1013-
; GCN-NEXT: v_mov_b32_e32 v8, v7
1014-
; GCN-NEXT: v_mov_b32_e32 v2, v7
1015-
; GCN-NEXT: v_mov_b32_e32 v6, v7
1012+
; GCN-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen
1013+
; GCN-NEXT: v_mov_b32_e32 v1, v0
1014+
; GCN-NEXT: v_mov_b32_e32 v2, v0
10161015
; GCN-NEXT: s_waitcnt vmcnt(0)
1017-
; GCN-NEXT: v_cmp_gt_f32_e64 s[6:7], 0, v0
1016+
; GCN-NEXT: v_cmp_gt_f32_e64 s[6:7], 0, v3
1017+
; GCN-NEXT: v_mov_b32_e32 v3, v0
10181018
; GCN-NEXT: s_and_saveexec_b64 s[12:13], s[6:7]
10191019
; GCN-NEXT: s_cbranch_execz .LBB5_1
10201020
; GCN-NEXT: ; %bb.6: ; %bb8
10211021
; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1
1022-
; GCN-NEXT: v_mov_b32_e32 v8, v7
1023-
; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
1024-
; GCN-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6
1022+
; GCN-NEXT: v_mov_b32_e32 v1, v0
1023+
; GCN-NEXT: ; implicit-def: $vgpr2
1024+
; GCN-NEXT: ; implicit-def: $vgpr3
10251025
; GCN-NEXT: s_branch .LBB5_1
10261026
; GCN-NEXT: .LBB5_7: ; %bb12
10271027
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
1028-
; GCN-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen
1028+
; GCN-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
10291029
; GCN-NEXT: s_waitcnt vmcnt(0)
10301030
; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
10311031
; GCN-NEXT: s_waitcnt vmcnt(0)
1032-
; GCN-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen
1032+
; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
10331033
; GCN-NEXT: s_waitcnt vmcnt(0)
1034-
; GCN-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen
1034+
; GCN-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen
10351035
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
10361036
; GCN-NEXT: s_setpc_b64 s[30:31]
10371037
;

0 commit comments

Comments
 (0)