Skip to content

Commit a274ffe

Browse files
authored
[MachineSink] Remove subrange of live-ins from super register as well. (#159145)
Post-RA machine sinking could sink a copy of sub-register into a successor. However, the sub-register might not be removed from the live-in bitmask of its super register in successor and then a later pass, e.g, if-converter, may add an implicit use of the register from live-in resulting in an use of an undefined register. This change makes sure subrange of live-ins from super register could be removed as well.
1 parent de59bc4 commit a274ffe

File tree

5 files changed

+141
-5
lines changed

5 files changed

+141
-5
lines changed

llvm/include/llvm/CodeGen/MachineBasicBlock.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -505,6 +505,11 @@ class MachineBasicBlock
505505
LLVM_ABI void removeLiveIn(MCRegister Reg,
506506
LaneBitmask LaneMask = LaneBitmask::getAll());
507507

508+
/// Remove the specified register from any overlapped live in. The method is
509+
/// subreg-aware and removes Reg and its subregs from the live in set. It also
510+
/// clears the corresponding bitmask from its live-in super registers.
511+
LLVM_ABI void removeLiveInOverlappedWith(MCRegister Reg);
512+
508513
/// Return true if the specified register is in the live in set.
509514
LLVM_ABI bool isLiveIn(MCRegister Reg,
510515
LaneBitmask LaneMask = LaneBitmask::getAll()) const;

llvm/lib/CodeGen/MachineBasicBlock.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,26 @@ void MachineBasicBlock::removeLiveIn(MCRegister Reg, LaneBitmask LaneMask) {
606606
LiveIns.erase(I);
607607
}
608608

609+
void MachineBasicBlock::removeLiveInOverlappedWith(MCRegister Reg) {
610+
const MachineFunction *MF = getParent();
611+
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
612+
// Remove Reg and its subregs from live in set.
613+
for (MCPhysReg S : TRI->subregs_inclusive(Reg))
614+
removeLiveIn(S);
615+
616+
// Remove live-in bitmask in super registers as well.
617+
for (MCPhysReg Super : TRI->superregs(Reg)) {
618+
for (MCSubRegIndexIterator SRI(Super, TRI); SRI.isValid(); ++SRI) {
619+
if (Reg == SRI.getSubReg()) {
620+
unsigned SubRegIndex = SRI.getSubRegIndex();
621+
LaneBitmask SubRegLaneMask = TRI->getSubRegIndexLaneMask(SubRegIndex);
622+
removeLiveIn(Super, SubRegLaneMask);
623+
break;
624+
}
625+
}
626+
}
627+
}
628+
609629
MachineBasicBlock::livein_iterator
610630
MachineBasicBlock::removeLiveIn(MachineBasicBlock::livein_iterator I) {
611631
// Get non-const version of iterator.

llvm/lib/CodeGen/MachineSink.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2187,11 +2187,9 @@ static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB,
21872187
static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
21882188
const SmallVectorImpl<unsigned> &UsedOpsInCopy,
21892189
const SmallVectorImpl<Register> &DefedRegsInCopy) {
2190-
MachineFunction &MF = *SuccBB->getParent();
2191-
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
21922190
for (Register DefReg : DefedRegsInCopy)
2193-
for (MCPhysReg S : TRI->subregs_inclusive(DefReg))
2194-
SuccBB->removeLiveIn(S);
2191+
SuccBB->removeLiveInOverlappedWith(DefReg);
2192+
21952193
for (auto U : UsedOpsInCopy)
21962194
SuccBB->addLiveIn(MI->getOperand(U).getReg());
21972195
SuccBB->sortUniqueLiveIns();

llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ body: |
4949
; GCN-NEXT: {{ $}}
5050
; GCN-NEXT: bb.1:
5151
; GCN-NEXT: successors: %bb.2(0x80000000)
52-
; GCN-NEXT: liveins: $exec, $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr40, $sgpr30_sgpr31, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr41_vgpr42:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F, $vgpr45_vgpr46:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F
52+
; GCN-NEXT: liveins: $exec, $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr40, $sgpr30_sgpr31, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F
5353
; GCN-NEXT: {{ $}}
5454
; GCN-NEXT: renamable $vgpr57 = COPY $vgpr9, implicit $exec
5555
; GCN-NEXT: renamable $vgpr56 = COPY $vgpr8, implicit $exec
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -run-pass=postra-machine-sink -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s
3+
4+
# Test live-in with subrange is updated accordingly in postra-machine-sink.
5+
---
6+
name: test_postra_machine_sink_livein_update
7+
tracksRegLiveness: true
8+
frameInfo:
9+
adjustsStack: true
10+
stack:
11+
- { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
12+
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
13+
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
14+
- { id: 1, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4,
15+
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
16+
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
17+
- { id: 2, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4,
18+
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
19+
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
20+
machineFunctionInfo:
21+
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
22+
stackPtrOffsetReg: '$sgpr32'
23+
body: |
24+
; GCN-LABEL: name: test_postra_machine_sink_livein_update
25+
; GCN: bb.0:
26+
; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
27+
; GCN-NEXT: liveins: $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr40, $sgpr30_sgpr31
28+
; GCN-NEXT: {{ $}}
29+
; GCN-NEXT: renamable $vgpr44 = COPY $vgpr13, implicit $exec
30+
; GCN-NEXT: renamable $vgpr43 = COPY $vgpr12, implicit $exec
31+
; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit undef $scc
32+
; GCN-NEXT: S_BRANCH %bb.1
33+
; GCN-NEXT: {{ $}}
34+
; GCN-NEXT: bb.1:
35+
; GCN-NEXT: successors: %bb.2(0x80000000)
36+
; GCN-NEXT: liveins: $exec, $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr40, $sgpr30_sgpr31, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F
37+
; GCN-NEXT: {{ $}}
38+
; GCN-NEXT: renamable $vgpr57 = COPY $vgpr9, implicit $exec
39+
; GCN-NEXT: renamable $vgpr56 = COPY $vgpr8, implicit $exec
40+
; GCN-NEXT: renamable $vgpr59 = COPY $vgpr7, implicit $exec
41+
; GCN-NEXT: renamable $vgpr58 = COPY $vgpr6, implicit $exec
42+
; GCN-NEXT: renamable $vgpr61 = COPY $vgpr5, implicit $exec
43+
; GCN-NEXT: renamable $vgpr60 = COPY $vgpr4, implicit $exec
44+
; GCN-NEXT: renamable $vgpr42 = COPY $vgpr3, implicit $exec
45+
; GCN-NEXT: renamable $vgpr41 = COPY $vgpr2, implicit $exec
46+
; GCN-NEXT: renamable $vgpr46 = COPY $vgpr1, implicit $exec
47+
; GCN-NEXT: renamable $vgpr45 = COPY $vgpr0, implicit $exec
48+
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
49+
; GCN-NEXT: renamable $sgpr16_sgpr17 = IMPLICIT_DEF
50+
; GCN-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
51+
; GCN-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31
52+
; GCN-NEXT: SI_SPILL_AV64_SAVE killed $vgpr14_vgpr15, %stack.1, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.1, align 4, addrspace 5)
53+
; GCN-NEXT: SI_SPILL_AV64_SAVE killed $vgpr10_vgpr11, %stack.2, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.2, align 4, addrspace 5)
54+
; GCN-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu, implicit-def dead $vgpr0
55+
; GCN-NEXT: renamable $vgpr14_vgpr15 = SI_SPILL_AV64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
56+
; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, killed $vgpr45_vgpr46, 0, killed $vgpr41_vgpr42, 0, killed $vgpr60_vgpr61, 0, 0, implicit $mode, implicit $exec
57+
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
58+
; GCN-NEXT: FLAT_STORE_DWORDX2 killed renamable $vgpr58_vgpr59, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
59+
; GCN-NEXT: renamable $vgpr0_vgpr1 = SI_SPILL_AV64_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.2, align 4, addrspace 5)
60+
; GCN-NEXT: FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr56_vgpr57, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
61+
; GCN-NEXT: {{ $}}
62+
; GCN-NEXT: bb.2:
63+
; GCN-NEXT: liveins: $vgpr40, $vgpr14_vgpr15:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F
64+
; GCN-NEXT: {{ $}}
65+
; GCN-NEXT: renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 0, implicit $exec
66+
; GCN-NEXT: FLAT_STORE_DWORDX2 undef renamable $vgpr0_vgpr1, killed renamable $vgpr43_vgpr44, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
67+
; GCN-NEXT: FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr14_vgpr15, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
68+
; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31
69+
bb.0:
70+
successors: %bb.2(0x40000000), %bb.1(0x40000000)
71+
liveins: $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr40, $sgpr30_sgpr31
72+
73+
renamable $vgpr44 = COPY $vgpr13, implicit $exec
74+
renamable $vgpr43 = COPY $vgpr12, implicit $exec
75+
renamable $vgpr57 = COPY $vgpr9, implicit $exec
76+
renamable $vgpr56 = COPY $vgpr8, implicit $exec
77+
renamable $vgpr59 = COPY $vgpr7, implicit $exec
78+
renamable $vgpr58 = COPY $vgpr6, implicit $exec
79+
renamable $vgpr61 = COPY $vgpr5, implicit $exec
80+
renamable $vgpr60 = COPY $vgpr4, implicit $exec
81+
renamable $vgpr42 = COPY $vgpr3, implicit $exec
82+
renamable $vgpr41 = COPY $vgpr2, implicit $exec
83+
renamable $vgpr46 = COPY $vgpr1, implicit $exec
84+
renamable $vgpr45 = COPY $vgpr0, implicit $exec
85+
S_CBRANCH_SCC1 %bb.2, implicit undef $scc
86+
S_BRANCH %bb.1
87+
88+
bb.1:
89+
successors: %bb.2(0x80000000)
90+
liveins: $sgpr30, $sgpr31, $vgpr40, $sgpr30_sgpr31, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr41_vgpr42:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F, $vgpr45_vgpr46:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F
91+
92+
ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
93+
renamable $sgpr16_sgpr17 = IMPLICIT_DEF
94+
$vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
95+
$vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31
96+
SI_SPILL_AV64_SAVE killed $vgpr14_vgpr15, %stack.1, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.1, align 4, addrspace 5)
97+
SI_SPILL_AV64_SAVE killed $vgpr10_vgpr11, %stack.2, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.2, align 4, addrspace 5)
98+
dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu, implicit-def dead $vgpr0
99+
renamable $vgpr14_vgpr15 = SI_SPILL_AV64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
100+
renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, killed $vgpr45_vgpr46, 0, killed $vgpr41_vgpr42, 0, killed $vgpr60_vgpr61, 0, 0, implicit $mode, implicit $exec
101+
ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
102+
FLAT_STORE_DWORDX2 killed renamable $vgpr58_vgpr59, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
103+
renamable $vgpr0_vgpr1 = SI_SPILL_AV64_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.2, align 4, addrspace 5)
104+
FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr56_vgpr57, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
105+
106+
bb.2:
107+
liveins: $vgpr40, $vgpr14_vgpr15:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F
108+
109+
renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 0, implicit $exec
110+
FLAT_STORE_DWORDX2 undef renamable $vgpr0_vgpr1, killed renamable $vgpr43_vgpr44, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
111+
FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr14_vgpr15, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
112+
S_SETPC_B64_return undef $sgpr30_sgpr31
113+
...

0 commit comments

Comments
 (0)