Skip to content

Commit f79cef3

Browse files
committed
AMDGPU: Fold mov imm to copy to av_32 class
Previously we had special case folding into copies to AGPR_32, ignoring AV_32. Try folding into the pseudos. Not sure why the true16 case regressed.
1 parent 4454152 commit f79cef3

File tree

5 files changed

+403
-116
lines changed

5 files changed

+403
-116
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1260,30 +1260,13 @@ void SIFoldOperandsImpl::foldOperand(
12601260
return;
12611261

12621262
const TargetRegisterClass *DestRC = TRI->getRegClassForReg(*MRI, DestReg);
1263-
if (!DestReg.isPhysical() && DestRC == &AMDGPU::AGPR_32RegClass) {
1264-
std::optional<int64_t> UseImmVal = OpToFold.getEffectiveImmVal();
1265-
if (UseImmVal && TII->isInlineConstant(
1266-
*UseImmVal, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
1267-
UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64));
1268-
UseMI->getOperand(1).ChangeToImmediate(*UseImmVal);
1269-
CopiesToReplace.push_back(UseMI);
1270-
return;
1271-
}
1272-
}
1273-
1274-
// Allow immediates COPYd into sgpr_lo16 to be further folded while
1275-
// still being legal if not further folded
1276-
if (DestRC == &AMDGPU::SGPR_LO16RegClass) {
1277-
assert(ST->useRealTrue16Insts());
1278-
MRI->setRegClass(DestReg, &AMDGPU::SGPR_32RegClass);
1279-
DestRC = &AMDGPU::SGPR_32RegClass;
1280-
}
12811263

12821264
// In order to fold immediates into copies, we need to change the copy to a
12831265
// MOV. Find a compatible mov instruction with the value.
12841266
for (unsigned MovOp :
12851267
{AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
1286-
AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_MOV_B16_t16_e64}) {
1268+
AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_MOV_B16_t16_e64,
1269+
AMDGPU::V_ACCVGPR_WRITE_B32_e64, AMDGPU::AV_MOV_B32_IMM_PSEUDO}) {
12871270
const MCInstrDesc &MovDesc = TII->get(MovOp);
12881271
assert(MovDesc.getNumDefs() > 0 && MovDesc.operands()[0].RegClass != -1);
12891272

@@ -1315,6 +1298,14 @@ void SIFoldOperandsImpl::foldOperand(
13151298
UseMI->setDesc(MovDesc);
13161299

13171300
if (MovOp == AMDGPU::V_MOV_B16_t16_e64) {
1301+
// Allow immediates COPYd into sgpr_lo16 to be further folded while
1302+
// still being legal if not further folded
1303+
if (DestRC == &AMDGPU::SGPR_LO16RegClass) {
1304+
assert(ST->useRealTrue16Insts());
1305+
MRI->setRegClass(DestReg, &AMDGPU::SGPR_32RegClass);
1306+
DestRC = &AMDGPU::SGPR_32RegClass;
1307+
}
1308+
13181309
const auto &SrcOp = UseMI->getOperand(UseOpIdx);
13191310
MachineOperand NewSrcOp(SrcOp);
13201311
MachineFunction *MF = UseMI->getParent()->getParent();

llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir

Lines changed: 330 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,8 @@ body: |
9191
bb.0:
9292
; GCN-LABEL: name: v_mov_b64_pseudo_lit_copy_sub0_to_agpr_32
9393
; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329592, implicit $exec
94-
; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY [[V_MOV_B]].sub0
95-
; GCN-NEXT: $agpr0 = COPY [[COPY]]
94+
; GCN-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[V_MOV_B]].sub0, implicit $exec
95+
; GCN-NEXT: $agpr0 = COPY [[V_ACCVGPR_WRITE_B32_e64_]]
9696
; GCN-NEXT: S_ENDPGM 0
9797
%0:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329592, implicit $exec
9898
%1:agpr_32 = COPY %0.sub0
@@ -108,8 +108,8 @@ body: |
108108
bb.0:
109109
; GCN-LABEL: name: v_mov_b64_pseudo_lit_copy_sub1_to_agpr_32
110110
; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329592, implicit $exec
111-
; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY [[V_MOV_B]].sub1
112-
; GCN-NEXT: $agpr0 = COPY [[COPY]]
111+
; GCN-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[V_MOV_B]].sub1, implicit $exec
112+
; GCN-NEXT: $agpr0 = COPY [[V_ACCVGPR_WRITE_B32_e64_]]
113113
; GCN-NEXT: S_ENDPGM 0
114114
%0:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329592, implicit $exec
115115
%1:agpr_32 = COPY %0.sub1
@@ -133,3 +133,329 @@ body: |
133133
S_ENDPGM 0, implicit %1
134134
135135
...
136+
137+
---
138+
name: s_mov_b32_imm_0_copy_to_agpr_32
139+
tracksRegLiveness: true
140+
body: |
141+
bb.0:
142+
; GCN-LABEL: name: s_mov_b32_imm_0_copy_to_agpr_32
143+
; GCN: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
144+
; GCN-NEXT: S_ENDPGM 0, implicit [[V_ACCVGPR_WRITE_B32_e64_]]
145+
%0:sreg_32 = S_MOV_B32 0, implicit $exec
146+
%1:agpr_32 = COPY %0
147+
S_ENDPGM 0, implicit %1
148+
149+
...
150+
151+
---
152+
name: s_mov_b32_imm_neg16_copy_to_agpr_32
153+
tracksRegLiveness: true
154+
body: |
155+
bb.0:
156+
; GCN-LABEL: name: s_mov_b32_imm_neg16_copy_to_agpr_32
157+
; GCN: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 -16, implicit $exec
158+
; GCN-NEXT: S_ENDPGM 0, implicit [[V_ACCVGPR_WRITE_B32_e64_]]
159+
%0:sreg_32 = S_MOV_B32 -16, implicit $exec
160+
%1:agpr_32 = COPY %0
161+
S_ENDPGM 0, implicit %1
162+
163+
...
164+
165+
---
166+
name: s_mov_b32_imm_65_copy_to_agpr_32
167+
tracksRegLiveness: true
168+
body: |
169+
bb.0:
170+
; GCN-LABEL: name: s_mov_b32_imm_65_copy_to_agpr_32
171+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65, implicit $exec
172+
; GCN-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[S_MOV_B32_]], implicit $exec
173+
; GCN-NEXT: S_ENDPGM 0, implicit [[V_ACCVGPR_WRITE_B32_e64_]]
174+
%0:sreg_32 = S_MOV_B32 65, implicit $exec
175+
%1:agpr_32 = COPY %0
176+
S_ENDPGM 0, implicit %1
177+
178+
...
179+
180+
---
181+
name: s_mov_b32_imm_0_copy_to_av_32
182+
tracksRegLiveness: true
183+
body: |
184+
bb.0:
185+
; GCN-LABEL: name: s_mov_b32_imm_0_copy_to_av_32
186+
; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
187+
; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
188+
%0:sreg_32 = S_MOV_B32 0, implicit $exec
189+
%1:av_32 = COPY %0
190+
S_ENDPGM 0, implicit %1
191+
192+
...
193+
194+
---
195+
name: s_mov_b32_imm_neg16_copy_to_av_32
196+
tracksRegLiveness: true
197+
body: |
198+
bb.0:
199+
; GCN-LABEL: name: s_mov_b32_imm_neg16_copy_to_av_32
200+
; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO -16, implicit $exec
201+
; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
202+
%0:sreg_32 = S_MOV_B32 -16, implicit $exec
203+
%1:av_32 = COPY %0
204+
S_ENDPGM 0, implicit %1
205+
206+
...
207+
208+
---
209+
name: s_mov_b32_imm_65_copy_to_av_32
210+
tracksRegLiveness: true
211+
body: |
212+
bb.0:
213+
; GCN-LABEL: name: s_mov_b32_imm_65_copy_to_av_32
214+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65, implicit $exec
215+
; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
216+
; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
217+
%0:sreg_32 = S_MOV_B32 65, implicit $exec
218+
%1:av_32 = COPY %0
219+
S_ENDPGM 0, implicit %1
220+
221+
...
222+
223+
---
224+
name: s_mov_b64_imm_0_copy_to_areg_64
225+
tracksRegLiveness: true
226+
body: |
227+
bb.0:
228+
; GCN-LABEL: name: s_mov_b64_imm_0_copy_to_areg_64
229+
; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0, implicit $exec
230+
; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[S_MOV_B64_]]
231+
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
232+
%0:sreg_64 = S_MOV_B64 0, implicit $exec
233+
%1:areg_64 = COPY %0
234+
S_ENDPGM 0, implicit %1
235+
236+
...
237+
238+
---
239+
name: s_mov_b64_imm_0_copy_to_areg_64_align2
240+
tracksRegLiveness: true
241+
body: |
242+
bb.0:
243+
; GCN-LABEL: name: s_mov_b64_imm_0_copy_to_areg_64_align2
244+
; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0, implicit $exec
245+
; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[S_MOV_B64_]]
246+
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
247+
%0:sreg_64 = S_MOV_B64 0, implicit $exec
248+
%1:areg_64_align2 = COPY %0
249+
S_ENDPGM 0, implicit %1
250+
251+
...
252+
253+
---
254+
name: s_mov_b64_imm_neg16_copy_to_areg_64
255+
tracksRegLiveness: true
256+
body: |
257+
bb.0:
258+
; GCN-LABEL: name: s_mov_b64_imm_neg16_copy_to_areg_64
259+
; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -16, implicit $exec
260+
; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[S_MOV_B64_]]
261+
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
262+
%0:sreg_64 = S_MOV_B64 -16, implicit $exec
263+
%1:areg_64 = COPY %0
264+
S_ENDPGM 0, implicit %1
265+
266+
...
267+
268+
---
269+
name: s_mov_b64_imm_neg16_copy_to_areg_64_align2
270+
tracksRegLiveness: true
271+
body: |
272+
bb.0:
273+
; GCN-LABEL: name: s_mov_b64_imm_neg16_copy_to_areg_64_align2
274+
; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -16, implicit $exec
275+
; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[S_MOV_B64_]]
276+
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
277+
%0:sreg_64 = S_MOV_B64 -16, implicit $exec
278+
%1:areg_64_align2 = COPY %0
279+
S_ENDPGM 0, implicit %1
280+
281+
...
282+
283+
---
284+
name: s_mov_b64_imm_0_copy_to_av_64
285+
tracksRegLiveness: true
286+
body: |
287+
bb.0:
288+
; GCN-LABEL: name: s_mov_b64_imm_0_copy_to_av_64
289+
; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0, implicit $exec
290+
; GCN-NEXT: [[COPY:%[0-9]+]]:av_64 = COPY [[S_MOV_B64_]]
291+
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
292+
%0:sreg_64 = S_MOV_B64 0, implicit $exec
293+
%1:av_64 = COPY %0
294+
S_ENDPGM 0, implicit %1
295+
296+
...
297+
298+
---
299+
name: s_mov_b64_imm_0_copy_to_av_64_align2
300+
tracksRegLiveness: true
301+
body: |
302+
bb.0:
303+
; GCN-LABEL: name: s_mov_b64_imm_0_copy_to_av_64_align2
304+
; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0, implicit $exec
305+
; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY [[S_MOV_B64_]]
306+
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
307+
%0:sreg_64 = S_MOV_B64 0, implicit $exec
308+
%1:av_64_align2 = COPY %0
309+
S_ENDPGM 0, implicit %1
310+
311+
...
312+
313+
---
314+
name: s_mov_b64_imm_neg16_copy_to_av_64
315+
tracksRegLiveness: true
316+
body: |
317+
bb.0:
318+
; GCN-LABEL: name: s_mov_b64_imm_neg16_copy_to_av_64
319+
; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -16, implicit $exec
320+
; GCN-NEXT: [[COPY:%[0-9]+]]:av_64 = COPY [[S_MOV_B64_]]
321+
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
322+
%0:sreg_64 = S_MOV_B64 -16, implicit $exec
323+
%1:av_64 = COPY %0
324+
S_ENDPGM 0, implicit %1
325+
326+
...
327+
328+
---
329+
name: s_mov_b64_imm_neg16_copy_to_av_64_align2
330+
tracksRegLiveness: true
331+
body: |
332+
bb.0:
333+
; GCN-LABEL: name: s_mov_b64_imm_neg16_copy_to_av_64_align2
334+
; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -16, implicit $exec
335+
; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY [[S_MOV_B64_]]
336+
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
337+
%0:sreg_64 = S_MOV_B64 -16, implicit $exec
338+
%1:av_64_align2 = COPY %0
339+
S_ENDPGM 0, implicit %1
340+
341+
...
342+
343+
---
344+
name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_areg_64
345+
tracksRegLiveness: true
346+
body: |
347+
bb.0:
348+
; GCN-LABEL: name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_areg_64
349+
; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
350+
; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[S_MOV_B]]
351+
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
352+
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744030759878656, implicit $exec
353+
%1:areg_64 = COPY %0
354+
S_ENDPGM 0, implicit %1
355+
356+
...
357+
358+
---
359+
name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_areg_64_align2
360+
tracksRegLiveness: true
361+
body: |
362+
bb.0:
363+
; GCN-LABEL: name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_areg_64_align2
364+
; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
365+
; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[S_MOV_B]]
366+
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
367+
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744030759878656, implicit $exec
368+
%1:areg_64_align2 = COPY %0
369+
S_ENDPGM 0, implicit %1
370+
371+
...
372+
373+
---
374+
name: s_mov_b64_imm_pseudo_inlineimm_32_halves_copy_to_areg_64
375+
tracksRegLiveness: true
376+
body: |
377+
bb.0:
378+
; GCN-LABEL: name: s_mov_b64_imm_pseudo_inlineimm_32_halves_copy_to_areg_64
379+
; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -21474836480, implicit $exec
380+
; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[S_MOV_B]]
381+
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
382+
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744052234715136, implicit $exec
383+
%1:areg_64 = COPY %0
384+
S_ENDPGM 0, implicit %1
385+
386+
...
387+
388+
---
389+
name: s_mov_b64_imm_pseudo_inlineimm_32_halves_copy_to_areg_64_align2
390+
tracksRegLiveness: true
391+
body: |
392+
bb.0:
393+
; GCN-LABEL: name: s_mov_b64_imm_pseudo_inlineimm_32_halves_copy_to_areg_64_align2
394+
; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -21474836480, implicit $exec
395+
; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[S_MOV_B]]
396+
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
397+
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744052234715136, implicit $exec
398+
%1:areg_64_align2 = COPY %0
399+
S_ENDPGM 0, implicit %1
400+
401+
...
402+
403+
---
404+
name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_av_64
405+
tracksRegLiveness: true
406+
body: |
407+
bb.0:
408+
; GCN-LABEL: name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_av_64
409+
; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
410+
; GCN-NEXT: [[COPY:%[0-9]+]]:av_64 = COPY [[S_MOV_B]]
411+
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
412+
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744030759878656, implicit $exec
413+
%1:av_64 = COPY %0
414+
S_ENDPGM 0, implicit %1
415+
416+
...
417+
418+
---
419+
name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_av_64_align2
420+
tracksRegLiveness: true
421+
body: |
422+
bb.0:
423+
; GCN-LABEL: name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_av_64_align2
424+
; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
425+
; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY [[S_MOV_B]]
426+
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
427+
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744030759878656, implicit $exec
428+
%1:av_64_align2 = COPY %0
429+
S_ENDPGM 0, implicit %1
430+
431+
...
432+
433+
---
434+
name: s_mov_b64_imm_pseudo_inlineimm_32_halves_copy_to_av_64
435+
tracksRegLiveness: true
436+
body: |
437+
bb.0:
438+
; GCN-LABEL: name: s_mov_b64_imm_pseudo_inlineimm_32_halves_copy_to_av_64
439+
; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775784, implicit $exec
440+
; GCN-NEXT: [[COPY:%[0-9]+]]:av_64 = COPY [[S_MOV_B]]
441+
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
442+
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775784, implicit $exec
443+
%1:av_64 = COPY %0
444+
S_ENDPGM 0, implicit %1
445+
446+
...
447+
448+
---
449+
name: s_mov_b64_imm_pseudo_inlineimm_32_halves_copy_to_av_64_align2
450+
tracksRegLiveness: true
451+
body: |
452+
bb.0:
453+
; GCN-LABEL: name: s_mov_b64_imm_pseudo_inlineimm_32_halves_copy_to_av_64_align2
454+
; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775784, implicit $exec
455+
; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY [[S_MOV_B]]
456+
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
457+
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO -9223372036854775784, implicit $exec
458+
%1:av_64_align2 = COPY %0
459+
S_ENDPGM 0, implicit %1
460+
461+
...

0 commit comments

Comments
 (0)