Skip to content

Commit fc37dcc

Browse files
committed
AMDGPU: More consistently use the fold list instead of direct mutation
There were 2 parallel fold check mechanisms, so consistently use the fold list. The worklist management here is still not good. Other types of folds are not using it, and we should probably rewrite the pass to look more like peephole-opt. This should be an alternative fix to skipping commute if the operands are the same (#127562). The new test is still not broken as-is, but demonstrates failures in a future patch.
1 parent cd10c01 commit fc37dcc

File tree

2 files changed

+67
-18
lines changed

2 files changed

+67
-18
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ class SIFoldOperandsImpl {
114114
bool
115115
getRegSeqInit(SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
116116
Register UseReg, uint8_t OpTy) const;
117-
bool tryToFoldACImm(const MachineOperand &OpToFold, MachineInstr *UseMI,
117+
bool tryToFoldACImm(MachineOperand &OpToFold, MachineInstr *UseMI,
118118
unsigned UseOpIdx,
119119
SmallVectorImpl<FoldCandidate> &FoldList) const;
120120
void foldOperand(MachineOperand &OpToFold,
@@ -573,11 +573,6 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
573573
return true;
574574
}
575575

576-
static bool isUseMIInFoldList(ArrayRef<FoldCandidate> FoldList,
577-
const MachineInstr *MI) {
578-
return any_of(FoldList, [&](const auto &C) { return C.UseMI == MI; });
579-
}
580-
581576
static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
582577
MachineInstr *MI, unsigned OpNo,
583578
MachineOperand *FoldOp, bool Commuted = false,
@@ -678,12 +673,6 @@ bool SIFoldOperandsImpl::tryAddToFoldList(
678673
}
679674
}
680675

681-
// If we are already folding into another operand of MI, then
682-
// we can't commute the instruction, otherwise we risk making the
683-
// other fold illegal.
684-
if (isUseMIInFoldList(FoldList, MI))
685-
return false;
686-
687676
// Operand is not legal, so try to commute the instruction to
688677
// see if this makes it possible to fold.
689678
unsigned CommuteOpNo = TargetInstrInfo::CommuteAnyOperandIndex;
@@ -814,7 +803,7 @@ bool SIFoldOperandsImpl::getRegSeqInit(
814803
}
815804

816805
bool SIFoldOperandsImpl::tryToFoldACImm(
817-
const MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx,
806+
MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx,
818807
SmallVectorImpl<FoldCandidate> &FoldList) const {
819808
const MCInstrDesc &Desc = UseMI->getDesc();
820809
if (UseOpIdx >= Desc.getNumOperands())
@@ -825,7 +814,7 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
825814

826815
uint8_t OpTy = Desc.operands()[UseOpIdx].OperandType;
827816
if (OpToFold.isImm() && TII->isOperandLegal(*UseMI, UseOpIdx, &OpToFold)) {
828-
UseMI->getOperand(UseOpIdx).ChangeToImmediate(OpToFold.getImm());
817+
appendFoldCandidate(FoldList, UseMI, UseOpIdx, &OpToFold);
829818
return true;
830819
}
831820

@@ -836,16 +825,13 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
836825
if (!UseReg.isVirtual())
837826
return false;
838827

839-
if (isUseMIInFoldList(FoldList, UseMI))
840-
return false;
841-
842828
// Maybe it is just a COPY of an immediate itself.
843829
MachineInstr *Def = MRI->getVRegDef(UseReg);
844830
MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
845831
if (!UseOp.getSubReg() && Def && TII->isFoldableCopy(*Def)) {
846832
MachineOperand &DefOp = Def->getOperand(1);
847833
if (DefOp.isImm() && TII->isOperandLegal(*UseMI, UseOpIdx, &DefOp)) {
848-
UseMI->getOperand(UseOpIdx).ChangeToImmediate(DefOp.getImm());
834+
appendFoldCandidate(FoldList, UseMI, UseOpIdx, &DefOp);
849835
return true;
850836
}
851837
}
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=peephole-opt,si-fold-operands -o - %s | FileCheck %s
3+
4+
# Check for assert when trying later folds after commuting an
5+
# instruction where both operands are the same register. This depended
6+
# on use list ordering, so we need to run peephole-opt first to
7+
# reproduce.
8+
9+
---
10+
name: commute_add_same_inputs_assert
11+
tracksRegLiveness: true
12+
body: |
13+
bb.0:
14+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
15+
16+
; CHECK-LABEL: name: commute_add_same_inputs_assert
17+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
18+
; CHECK-NEXT: {{ $}}
19+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
20+
; CHECK-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 0, killed [[COPY]], implicit-def $vcc, implicit $exec
21+
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
22+
; CHECK-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 0, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $vcc, implicit $exec
23+
; CHECK-NEXT: $vgpr4 = COPY [[V_ADDC_U32_e32_]]
24+
; CHECK-NEXT: SI_RETURN implicit $vgpr4
25+
%0:vgpr_32 = COPY $vgpr0
26+
%1:sreg_64 = S_MOV_B64 0
27+
%2:sreg_32 = COPY %1.sub0
28+
%3:vgpr_32 = V_ADD_CO_U32_e32 %2, killed %0, implicit-def $vcc, implicit $exec
29+
%4:vgpr_32 = COPY %2
30+
%5:vgpr_32 = COPY %2
31+
%6:vgpr_32 = V_ADDC_U32_e32 %4, %5, implicit-def $vcc, implicit $vcc, implicit $exec
32+
$vgpr4 = COPY %6
33+
SI_RETURN implicit $vgpr4
34+
35+
...
36+
37+
---
38+
name: commute_sub_same_inputs_assert
39+
tracksRegLiveness: true
40+
body: |
41+
bb.0:
42+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
43+
44+
; CHECK-LABEL: name: commute_sub_same_inputs_assert
45+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
46+
; CHECK-NEXT: {{ $}}
47+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
48+
; CHECK-NEXT: [[V_SUB_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_CO_U32_e32 0, killed [[COPY]], implicit-def $vcc, implicit $exec
49+
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
50+
; CHECK-NEXT: [[V_SUBBREV_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUBBREV_U32_e32 0, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $vcc, implicit $exec
51+
; CHECK-NEXT: $vgpr4 = COPY [[V_SUBBREV_U32_e32_]]
52+
; CHECK-NEXT: SI_RETURN implicit $vgpr4
53+
%0:vgpr_32 = COPY $vgpr0
54+
%1:sreg_64 = S_MOV_B64 0
55+
%2:sreg_32 = COPY %1.sub0
56+
%3:vgpr_32 = V_SUB_CO_U32_e32 %2, killed %0, implicit-def $vcc, implicit $exec
57+
%4:vgpr_32 = COPY %2
58+
%5:vgpr_32 = COPY %2
59+
%6:vgpr_32 = V_SUBB_U32_e32 %5, %4, implicit-def $vcc, implicit $vcc, implicit $exec
60+
$vgpr4 = COPY %6
61+
SI_RETURN implicit $vgpr4
62+
63+
...

0 commit comments

Comments
 (0)