Skip to content

Commit 836f1e2

Browse files
committed
Merging r359891:
------------------------------------------------------------------------ r359891 | arsenm | 2019-05-03 07:40:10 -0700 (Fri, 03 May 2019) | 9 lines AMDGPU: Replace shrunk instruction with dummy implicit_def This was broken if the original operand was killed. The kill flag would appear on both instructions, and fail the verifier. Keep the kill flag, but remove the operands from the old instruction. This has an added benefit of really reducing the use count for future folds. Ideally the pass would be structured more like what PeepholeOptimizer does to avoid this hack to avoid breaking instruction iterators. ------------------------------------------------------------------------ llvm-svn: 362634
1 parent 0489682 commit 836f1e2

File tree

2 files changed

+64
-4
lines changed

2 files changed

+64
-4
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -218,8 +218,6 @@ static bool updateOperand(FoldCandidate &Fold,
218218

219219
const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
220220
unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
221-
const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg());
222-
unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC);
223221

224222
MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
225223

@@ -229,9 +227,15 @@ static bool updateOperand(FoldCandidate &Fold,
229227
}
230228

231229
// Keep the old instruction around to avoid breaking iterators, but
232-
// replace the outputs with dummy registers.
230+
// replace it with a dummy instruction to remove uses.
231+
//
232+
// FIXME: We should not invert how this pass looks at operands to avoid
233+
// this. Should track set of foldable movs instead of looking for uses
234+
// when looking at a use.
233235
Dst0.setReg(NewReg0);
234-
Dst1.setReg(NewReg1);
236+
for (unsigned I = MI->getNumOperands() - 1; I > 0; --I)
237+
MI->RemoveOperand(I);
238+
MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF));
235239

236240
if (Fold.isCommuted())
237241
TII.commuteInstruction(*Inst32, false);

llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -590,3 +590,59 @@ body: |
590590
S_ENDPGM implicit %2
591591
592592
...
593+
594+
---
595+
name: shrink_add_kill_flags_src0
596+
tracksRegLiveness: true
597+
body: |
598+
bb.0:
599+
liveins: $vgpr0
600+
; GCN-LABEL: name: shrink_add_kill_flags_src0
601+
; GCN: liveins: $vgpr0
602+
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
603+
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
604+
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 killed [[V_MOV_B32_e32_]], [[COPY]], implicit-def $vcc, implicit $exec
605+
; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
606+
%0:vgpr_32 = COPY $vgpr0
607+
%1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
608+
%2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 killed %1, %0, 0, implicit $exec
609+
S_ENDPGM 0, implicit %2
610+
...
611+
612+
---
613+
name: shrink_add_kill_flags_src1
614+
tracksRegLiveness: true
615+
body: |
616+
bb.0:
617+
liveins: $vgpr0
618+
; GCN-LABEL: name: shrink_add_kill_flags_src1
619+
; GCN: liveins: $vgpr0
620+
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
621+
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
622+
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_MOV_B32_e32_]], killed [[COPY]], implicit-def $vcc, implicit $exec
623+
; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
624+
%0:vgpr_32 = COPY $vgpr0
625+
%1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
626+
%2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 %1, killed %0, 0, implicit $exec
627+
S_ENDPGM 0, implicit %2
628+
...
629+
630+
---
631+
name: shrink_addc_kill_flags_src2
632+
tracksRegLiveness: true
633+
body: |
634+
bb.0:
635+
liveins: $vgpr0, $vcc
636+
; GCN-LABEL: name: shrink_addc_kill_flags_src2
637+
; GCN: liveins: $vgpr0, $vcc
638+
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
639+
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
640+
; GCN: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $vcc
641+
; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[V_MOV_B32_e32_]], [[COPY]], [[COPY1]], 0, implicit $exec
642+
; GCN: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]]
643+
%0:vgpr_32 = COPY $vgpr0
644+
%1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
645+
%2:sreg_64_xexec = COPY $vcc
646+
%3:vgpr_32, %4:sreg_64_xexec = V_ADDC_U32_e64 %1, %0, %2, 0, implicit $exec
647+
S_ENDPGM 0, implicit %3
648+
...

0 commit comments

Comments
 (0)