Skip to content

Commit ee340d6

Browse files
Ana MihajlovicAna Mihajlovic
authored andcommitted
update tests
1 parent 44c4fb0 commit ee340d6

13 files changed

+3297
-2896
lines changed

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Lines changed: 55 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ class SIShrinkInstructions {
5353
unsigned SubReg) const;
5454
Register trySwapCndOperands(MachineInstr &MI) const;
5555
bool
56-
shouldSwapCndOperands(MachineInstr &MI, const SIInstrInfo &TII,
56+
shouldSwapCndOperands(MachineInstr &MI,
5757
SmallVector<MachineOperand *, 4> &UsesToProcess) const;
5858
unsigned getInverseCompareOpcode(MachineInstr &MI) const;
5959
TargetInstrInfo::RegSubRegPair getSubRegForIndex(Register Reg, unsigned Sub,
@@ -871,45 +871,76 @@ unsigned SIShrinkInstructions::getInverseCompareOpcode(MachineInstr &MI) const {
871871
return AMDGPU::V_CMP_EQ_F32_e64;
872872
case AMDGPU::V_CMP_GE_F32_e64:
873873
return AMDGPU::V_CMP_NGE_F32_e64;
874+
case AMDGPU::V_CMP_NGE_F32_e64:
875+
return AMDGPU::V_CMP_GE_F32_e64;
874876
case AMDGPU::V_CMP_LE_F32_e64:
875877
return AMDGPU::V_CMP_NLE_F32_e64;
878+
case AMDGPU::V_CMP_NLE_F32_e32:
879+
return AMDGPU::V_CMP_LE_F32_e32;
876880
case AMDGPU::V_CMP_GT_F32_e64:
877881
return AMDGPU::V_CMP_NGT_F32_e64;
882+
case AMDGPU::V_CMP_NGT_F32_e64:
883+
return AMDGPU::V_CMP_GT_F32_e64;
878884
case AMDGPU::V_CMP_LT_F32_e64:
879885
return AMDGPU::V_CMP_NLT_F32_e64;
886+
case AMDGPU::V_CMP_NLT_F32_e64:
887+
return AMDGPU::V_CMP_LT_F32_e64;
888+
case AMDGPU::V_CMP_LG_F32_e64:
889+
return AMDGPU::V_CMP_NLG_F32_e64;
890+
case AMDGPU::V_CMP_NLG_F32_e64:
891+
return AMDGPU::V_CMP_LG_F32_e64;
892+
case AMDGPU::V_CMP_O_F32_e64:
893+
return AMDGPU::V_CMP_U_F32_e64;
894+
case AMDGPU::V_CMP_U_F32_e64:
895+
return AMDGPU::V_CMP_O_F32_e64;
880896
// float 64
881897
case AMDGPU::V_CMP_EQ_F64_e64:
882898
return AMDGPU::V_CMP_NEQ_F64_e64;
883899
case AMDGPU::V_CMP_NEQ_F64_e64:
884900
return AMDGPU::V_CMP_EQ_F64_e64;
885901
case AMDGPU::V_CMP_GE_F64_e64:
886902
return AMDGPU::V_CMP_NGE_F64_e64;
903+
case AMDGPU::V_CMP_NGE_F64_e64:
904+
return AMDGPU::V_CMP_GE_F64_e64;
887905
case AMDGPU::V_CMP_LE_F64_e64:
888906
return AMDGPU::V_CMP_NLE_F64_e64;
907+
case AMDGPU::V_CMP_NLE_F64_e32:
908+
return AMDGPU::V_CMP_LE_F64_e32;
889909
case AMDGPU::V_CMP_GT_F64_e64:
890910
return AMDGPU::V_CMP_NGT_F64_e64;
911+
case AMDGPU::V_CMP_NGT_F64_e64:
912+
return AMDGPU::V_CMP_GT_F32_e64;
891913
case AMDGPU::V_CMP_LT_F64_e64:
892914
return AMDGPU::V_CMP_NLT_F64_e64;
915+
case AMDGPU::V_CMP_NLT_F64_e64:
916+
return AMDGPU::V_CMP_LT_F64_e64;
917+
case AMDGPU::V_CMP_LG_F64_e64:
918+
return AMDGPU::V_CMP_NLG_F64_e64;
919+
case AMDGPU::V_CMP_NLG_F64_e64:
920+
return AMDGPU::V_CMP_LG_F64_e64;
921+
case AMDGPU::V_CMP_O_F64_e64:
922+
return AMDGPU::V_CMP_U_F64_e64;
923+
case AMDGPU::V_CMP_U_F64_e64:
924+
return AMDGPU::V_CMP_O_F64_e64;
893925
default:
894926
return 0;
895927
}
896928
}
897929

898930
bool SIShrinkInstructions::shouldSwapCndOperands(
899-
MachineInstr &MI, const SIInstrInfo &TII,
900-
SmallVector<MachineOperand *, 4> &UsesToProcess) const {
931+
MachineInstr &MI, SmallVector<MachineOperand *, 4> &UsesToProcess) const {
901932
auto AllUses = MRI->use_nodbg_operands(MI.getOperand(0).getReg());
902933
bool ShouldSwap = false;
903934

904935
for (auto &Use : AllUses) {
905936
MachineInstr *UseInst = Use.getParent();
906937
if (UseInst->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
907938
return false;
908-
MachineOperand *Src0 = TII.getNamedOperand(*UseInst, AMDGPU::OpName::src0);
909-
MachineOperand *Src1 = TII.getNamedOperand(*UseInst, AMDGPU::OpName::src1);
939+
MachineOperand &Src0 = UseInst->getOperand(2);
940+
MachineOperand &Src1 = UseInst->getOperand(4);
910941

911-
auto Src0Imm = Src0->isImm();
912-
auto Src1Imm = Src1->isImm();
942+
bool Src0Imm = Src0.isImm();
943+
bool Src1Imm = Src1.isImm();
913944

914945
if (!Src1Imm && Src0Imm)
915946
return false;
@@ -922,32 +953,30 @@ bool SIShrinkInstructions::shouldSwapCndOperands(
922953
return ShouldSwap;
923954
}
924955

925-
void swapCndOperands(MachineInstr &MI) {
926-
MachineOperand Op2 = MI.getOperand(2);
956+
static void swapCndOperands(MachineInstr &MI) {
957+
MachineOperand &Op2 = MI.getOperand(2);
927958
MachineOperand Op4 = MI.getOperand(4);
928959

929960
if (Op2.isReg()) {
930961
MI.getOperand(4).ChangeToRegister(
931962
Op2.getReg(), Op2.isDef(), Op2.isImplicit(), Op2.isKill(), Op2.isDead(),
932963
Op2.isUndef(), Op2.isDebug());
933-
if (Op2.getSubReg() != AMDGPU::NoSubRegister)
934-
MI.getOperand(4).setSubReg(Op2.getSubReg());
964+
MI.getOperand(4).setSubReg(Op2.getSubReg());
935965
} else if (Op2.isImm()) {
936966
MI.getOperand(4).ChangeToImmediate(Op2.getImm());
937967
}
938968

939969
if (Op4.isReg()) {
940-
MI.getOperand(2).setReg(Op4.getReg());
941-
if (Op4.getSubReg() != AMDGPU::NoSubRegister)
942-
MI.getOperand(2).setSubReg(Op4.getSubReg());
970+
Op2.setReg(Op4.getReg());
971+
Op2.setSubReg(Op4.getSubReg());
943972
} else if (Op4.isImm()) {
944-
MI.getOperand(2).ChangeToImmediate(Op4.getImm());
973+
Op2.ChangeToImmediate(Op4.getImm());
945974
}
946975

947-
MachineOperand Op1 = MI.getOperand(1);
948-
MachineOperand Op3 = MI.getOperand(3);
949-
MI.getOperand(1).setImm(Op3.getImm());
950-
MI.getOperand(3).setImm(Op1.getImm());
976+
auto Op1Imm = MI.getOperand(1).getImm();
977+
auto Op3Imm = MI.getOperand(3).getImm();
978+
MI.getOperand(1).setImm(Op3Imm);
979+
MI.getOperand(3).setImm(Op1Imm);
951980
}
952981

953982
Register SIShrinkInstructions::trySwapCndOperands(MachineInstr &MI) const {
@@ -956,8 +985,8 @@ Register SIShrinkInstructions::trySwapCndOperands(MachineInstr &MI) const {
956985
unsigned Opcode = getInverseCompareOpcode(MI);
957986
SmallVector<MachineOperand *, 4> UsesToProcess;
958987
if (!Opcode ||
959-
!SIShrinkInstructions::shouldSwapCndOperands(MI, *TII, UsesToProcess))
960-
return AMDGPU::NoRegister;
988+
!SIShrinkInstructions::shouldSwapCndOperands(MI, UsesToProcess))
989+
return Reg;
961990

962991
auto DL = MI.getDebugLoc();
963992
Register NewVCC = MRI->createVirtualRegister(MRI->getRegClass(Reg));
@@ -967,11 +996,11 @@ Register SIShrinkInstructions::trySwapCndOperands(MachineInstr &MI) const {
967996
InverseCompare->setFlags(MI.getFlags());
968997

969998
unsigned OpNum = MI.getNumExplicitOperands();
970-
for (unsigned i = 1; i < OpNum; i++) {
971-
MachineOperand Op = MI.getOperand(i);
999+
for (unsigned Idx = 1; Idx < OpNum; Idx++) {
1000+
MachineOperand Op = MI.getOperand(Idx);
9721001
InverseCompare.add(Op);
9731002
if (Op.isReg() && Op.isKill())
974-
InverseCompare->getOperand(i).setIsKill(false);
1003+
InverseCompare->getOperand(Idx).setIsKill(false);
9751004
}
9761005

9771006
for (auto &Use : UsesToProcess) {
@@ -995,6 +1024,7 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
9951024
unsigned VCCReg = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
9961025

9971026
std::vector<unsigned> I1Defs;
1027+
9981028
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
9991029
BI != BE; ++BI) {
10001030

@@ -1153,6 +1183,7 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
11531183
// dst.
11541184
Register DstReg = Op0.getReg();
11551185
if (DstReg.isVirtual()) {
1186+
DstReg = trySwapCndOperands(MI);
11561187
// VOPC instructions can only write to the VCC register. We can't
11571188
// force them to use VCC here, because this is only one register and
11581189
// cannot deal with sequences which would require multiple copies of
@@ -1162,9 +1193,6 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
11621193
// provide a hint to the register allocator to use VCC and then we
11631194
// will run this pass again after RA and shrink it if it outputs to
11641195
// VCC.
1165-
Register NewVCC = trySwapCndOperands(MI);
1166-
DstReg = NewVCC == AMDGPU::NoRegister ? DstReg : NewVCC;
1167-
11681196
MRI->setRegAllocationHint(DstReg, 0, VCCReg);
11691197
continue;
11701198
}

0 commit comments

Comments
 (0)