Skip to content

Commit c8af241

Browse files
committed
Merging r359898:
------------------------------------------------------------------------ r359898 | arsenm | 2019-05-03 08:21:53 -0700 (Fri, 03 May 2019) | 3 lines AMDGPU: Support shrinking add with FI in SIFoldOperands Avoids test regression in a future patch ------------------------------------------------------------------------ llvm-svn: 362648
1 parent b73bafa commit c8af241

File tree

2 files changed

+267
-35
lines changed

2 files changed

+267
-35
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 37 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -201,53 +201,55 @@ static bool updateOperand(FoldCandidate &Fold,
201201
Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
202202
}
203203
}
204+
}
204205

205-
if (Fold.needsShrink()) {
206-
MachineBasicBlock *MBB = MI->getParent();
207-
auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
208-
if (Liveness != MachineBasicBlock::LQR_Dead)
209-
return false;
206+
if ((Fold.isImm() || Fold.isFI()) && Fold.needsShrink()) {
207+
MachineBasicBlock *MBB = MI->getParent();
208+
auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
209+
if (Liveness != MachineBasicBlock::LQR_Dead)
210+
return false;
210211

211-
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
212-
int Op32 = Fold.getShrinkOpcode();
213-
MachineOperand &Dst0 = MI->getOperand(0);
214-
MachineOperand &Dst1 = MI->getOperand(1);
215-
assert(Dst0.isDef() && Dst1.isDef());
212+
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
213+
int Op32 = Fold.getShrinkOpcode();
214+
MachineOperand &Dst0 = MI->getOperand(0);
215+
MachineOperand &Dst1 = MI->getOperand(1);
216+
assert(Dst0.isDef() && Dst1.isDef());
216217

217-
bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
218+
bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
218219

219-
const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
220-
unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
220+
const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
221+
unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
221222

222-
MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
223+
MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
223224

224-
if (HaveNonDbgCarryUse) {
225-
BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
226-
.addReg(AMDGPU::VCC, RegState::Kill);
227-
}
228-
229-
// Keep the old instruction around to avoid breaking iterators, but
230-
// replace it with a dummy instruction to remove uses.
231-
//
232-
// FIXME: We should not invert how this pass looks at operands to avoid
233-
// this. Should track set of foldable movs instead of looking for uses
234-
// when looking at a use.
235-
Dst0.setReg(NewReg0);
236-
for (unsigned I = MI->getNumOperands() - 1; I > 0; --I)
237-
MI->RemoveOperand(I);
238-
MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF));
239-
240-
if (Fold.isCommuted())
241-
TII.commuteInstruction(*Inst32, false);
242-
return true;
225+
if (HaveNonDbgCarryUse) {
226+
BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
227+
.addReg(AMDGPU::VCC, RegState::Kill);
243228
}
244229

245-
Old.ChangeToImmediate(Fold.ImmToFold);
230+
// Keep the old instruction around to avoid breaking iterators, but
231+
// replace it with a dummy instruction to remove uses.
232+
//
233+
// FIXME: We should not invert how this pass looks at operands to avoid
234+
// this. Should track set of foldable movs instead of looking for uses
235+
// when looking at a use.
236+
Dst0.setReg(NewReg0);
237+
for (unsigned I = MI->getNumOperands() - 1; I > 0; --I)
238+
MI->RemoveOperand(I);
239+
MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF));
240+
241+
if (Fold.isCommuted())
242+
TII.commuteInstruction(*Inst32, false);
246243
return true;
247244
}
248245

249246
assert(!Fold.needsShrink() && "not handled");
250247

248+
if (Fold.isImm()) {
249+
Old.ChangeToImmediate(Fold.ImmToFold);
250+
return true;
251+
}
252+
251253
if (Fold.isFI()) {
252254
Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
253255
return true;
@@ -348,7 +350,7 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
348350
if ((Opc == AMDGPU::V_ADD_I32_e64 ||
349351
Opc == AMDGPU::V_SUB_I32_e64 ||
350352
Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME
351-
OpToFold->isImm()) {
353+
(OpToFold->isImm() || OpToFold->isFI())) {
352354
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
353355

354356
// Verify the other operand is a VGPR, otherwise we would violate the
Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination %s -o - | FileCheck -check-prefix=GCN %s
3+
4+
---
5+
6+
# First operand is FI is in a VGPR, other operand is a VGPR
7+
name: shrink_vgpr_fi_vgpr_v_add_i32_e64_no_carry_out_use
8+
tracksRegLiveness: true
9+
stack:
10+
- { id: 0, type: default, offset: 0, size: 64, alignment: 16 }
11+
body: |
12+
bb.0:
13+
liveins: $vgpr0
14+
15+
; GCN-LABEL: name: shrink_vgpr_fi_vgpr_v_add_i32_e64_no_carry_out_use
16+
; GCN: liveins: $vgpr0
17+
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
18+
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
19+
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_MOV_B32_e32_]], [[COPY]], implicit-def $vcc, implicit $exec
20+
; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]]
21+
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
22+
%1:vgpr_32 = COPY $vgpr0
23+
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
24+
S_ENDPGM implicit %2
25+
26+
...
27+
28+
---
29+
30+
# First operand is a VGPR, other operand FI is in a VGPR
31+
name: shrink_vgpr_vgpr_fi_v_add_i32_e64_no_carry_out_use
32+
tracksRegLiveness: true
33+
stack:
34+
- { id: 0, type: default, offset: 0, size: 64, alignment: 16 }
35+
body: |
36+
bb.0:
37+
liveins: $vgpr0
38+
39+
; GCN-LABEL: name: shrink_vgpr_vgpr_fi_v_add_i32_e64_no_carry_out_use
40+
; GCN: liveins: $vgpr0
41+
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
42+
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
43+
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[COPY]], [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
44+
; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]]
45+
%0:vgpr_32 = COPY $vgpr0
46+
%1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
47+
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
48+
S_ENDPGM implicit %2
49+
50+
...
51+
52+
---
53+
54+
# First operand is FI is in an SGPR, other operand is a VGPR
55+
name: shrink_vgpr_fi_sgpr_v_add_i32_e64_no_carry_out_use
56+
tracksRegLiveness: true
57+
stack:
58+
- { id: 0, type: default, offset: 0, size: 64, alignment: 16 }
59+
body: |
60+
bb.0:
61+
liveins: $sgpr0
62+
63+
; GCN-LABEL: name: shrink_vgpr_fi_sgpr_v_add_i32_e64_no_carry_out_use
64+
; GCN: liveins: $sgpr0
65+
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
66+
; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
67+
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
68+
; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]]
69+
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
70+
%1:sreg_32_xm0 = COPY $sgpr0
71+
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
72+
S_ENDPGM implicit %2
73+
74+
...
75+
76+
---
77+
78+
# First operand is an SGPR, other operand FI is in a VGPR
79+
name: shrink_sgpr_vgpr_fi_v_add_i32_e64_no_carry_out_use
80+
tracksRegLiveness: true
81+
stack:
82+
- { id: 0, type: default, offset: 0, size: 64, alignment: 16 }
83+
body: |
84+
bb.0:
85+
liveins: $sgpr0
86+
87+
; GCN-LABEL: name: shrink_sgpr_vgpr_fi_v_add_i32_e64_no_carry_out_use
88+
; GCN: liveins: $sgpr0
89+
; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
90+
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
91+
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[COPY]], implicit $exec
92+
; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]]
93+
%0:sreg_32_xm0 = COPY $sgpr0
94+
%1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
95+
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
96+
S_ENDPGM implicit %2
97+
98+
...
99+
100+
---
101+
102+
# First operand is FI is in an SGPR, other operand is a VGPR
103+
name: shrink_sgpr_fi_vgpr_v_add_i32_e64_no_carry_out_use
104+
tracksRegLiveness: true
105+
stack:
106+
- { id: 0, type: default, offset: 0, size: 64, alignment: 16 }
107+
body: |
108+
bb.0:
109+
liveins: $vgpr0
110+
111+
; GCN-LABEL: name: shrink_sgpr_fi_vgpr_v_add_i32_e64_no_carry_out_use
112+
; GCN: liveins: $vgpr0
113+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0
114+
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
115+
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[COPY]], implicit-def $vcc, implicit $exec
116+
; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]]
117+
%0:sreg_32_xm0 = S_MOV_B32 %stack.0
118+
%1:vgpr_32 = COPY $vgpr0
119+
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
120+
S_ENDPGM implicit %2
121+
122+
...
123+
124+
---
125+
126+
# First operand is a VGPR, other operand FI is in an SGPR
127+
name: shrink_vgpr_sgpr_fi_v_add_i32_e64_no_carry_out_use
128+
tracksRegLiveness: true
129+
stack:
130+
- { id: 0, type: default, offset: 0, size: 64, alignment: 16}
131+
body: |
132+
bb.0:
133+
liveins: $vgpr0
134+
135+
; GCN-LABEL: name: shrink_vgpr_sgpr_fi_v_add_i32_e64_no_carry_out_use
136+
; GCN: liveins: $vgpr0
137+
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
138+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0
139+
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[COPY]], implicit-def $vcc, implicit $exec
140+
; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]]
141+
%0:vgpr_32 = COPY $vgpr0
142+
%1:sreg_32_xm0 = S_MOV_B32 %stack.0
143+
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
144+
S_ENDPGM implicit %2
145+
146+
...
147+
148+
---
149+
150+
# First operand is FI is in a VGPR, other operand is an inline imm in a VGPR
151+
name: shrink_vgpr_imm_fi_vgpr_v_add_i32_e64_no_carry_out_use
152+
tracksRegLiveness: true
153+
stack:
154+
- { id: 0, type: default, offset: 0, size: 64, alignment: 16 }
155+
body: |
156+
bb.0:
157+
158+
; GCN-LABEL: name: shrink_vgpr_imm_fi_vgpr_v_add_i32_e64_no_carry_out_use
159+
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
160+
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 16, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
161+
; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]]
162+
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
163+
%1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
164+
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
165+
S_ENDPGM implicit %2
166+
167+
...
168+
169+
---
170+
171+
# First operand is an inline imm in a VGPR, other operand FI is in a VGPR
172+
name: shrink_vgpr_imm_vgpr_fi_v_add_i32_e64_no_carry_out_use
173+
tracksRegLiveness: true
174+
stack:
175+
- { id: 0, type: default, offset: 0, size: 64, alignment: 16 }
176+
body: |
177+
bb.0:
178+
179+
; GCN-LABEL: name: shrink_vgpr_imm_vgpr_fi_v_add_i32_e64_no_carry_out_use
180+
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
181+
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 16, [[V_MOV_B32_e32_]], implicit $exec
182+
; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]]
183+
%0:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
184+
%1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
185+
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
186+
S_ENDPGM implicit %2
187+
188+
...
189+
190+
---
191+
192+
# First operand is FI is in a VGPR, other operand is an literal constant in a VGPR
193+
name: shrink_vgpr_k_fi_vgpr_v_add_i32_e64_no_carry_out_use
194+
tracksRegLiveness: true
195+
stack:
196+
- { id: 0, type: default, offset: 0, size: 64, alignment: 16 }
197+
body: |
198+
bb.0:
199+
200+
; GCN-LABEL: name: shrink_vgpr_k_fi_vgpr_v_add_i32_e64_no_carry_out_use
201+
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
202+
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 1234, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
203+
; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]]
204+
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
205+
%1:vgpr_32 = V_MOV_B32_e32 1234, implicit $exec
206+
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
207+
S_ENDPGM implicit %2
208+
209+
...
210+
211+
---
212+
213+
# First operand is a literal constant in a VGPR, other operand FI is in a VGPR
214+
name: shrink_vgpr_k_vgpr_fi_v_add_i32_e64_no_carry_out_use
215+
tracksRegLiveness: true
216+
stack:
217+
- { id: 0, type: default, offset: 0, size: 64, alignment: 16 }
218+
body: |
219+
bb.0:
220+
221+
; GCN-LABEL: name: shrink_vgpr_k_vgpr_fi_v_add_i32_e64_no_carry_out_use
222+
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1234, implicit $exec
223+
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 %stack.0, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
224+
; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]]
225+
%0:vgpr_32 = V_MOV_B32_e32 1234, implicit $exec
226+
%1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
227+
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
228+
S_ENDPGM implicit %2
229+
230+
...

0 commit comments

Comments
 (0)