Skip to content

Commit 44c4fb0

Browse files
Ana MihajlovicAna Mihajlovic
authored andcommitted
covered 64bit case, updated tests, added operading swapping instead of creating new instruction
1 parent e85470a commit 44c4fb0

File tree

13 files changed

+626
-513
lines changed

13 files changed

+626
-513
lines changed

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Lines changed: 111 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,10 @@ class SIShrinkInstructions {
5151
unsigned SubReg) const;
5252
bool instModifiesReg(const MachineInstr *MI, unsigned Reg,
5353
unsigned SubReg) const;
54-
bool trySwitchOperands(MachineInstr &MI, Register *OldVCC,
55-
Register *NewVCC) const;
56-
bool shouldSwitchOperands(MachineRegisterInfo &MRI, MachineInstr &MI,
57-
const SIInstrInfo &TII) const;
54+
Register trySwapCndOperands(MachineInstr &MI) const;
55+
bool
56+
shouldSwapCndOperands(MachineInstr &MI, const SIInstrInfo &TII,
57+
SmallVector<MachineOperand *, 4> &UsesToProcess) const;
5858
unsigned getInverseCompareOpcode(MachineInstr &MI) const;
5959
TargetInstrInfo::RegSubRegPair getSubRegForIndex(Register Reg, unsigned Sub,
6060
unsigned I) const;
@@ -851,92 +851,137 @@ unsigned SIShrinkInstructions::getInverseCompareOpcode(MachineInstr &MI) const {
851851
return AMDGPU::V_CMP_LE_U32_e64;
852852
case AMDGPU::V_CMP_LT_U32_e64:
853853
return AMDGPU::V_CMP_GE_U32_e64;
854-
// float 32
854+
// unsigned 64
855+
case AMDGPU::V_CMP_EQ_U64_e64:
856+
return AMDGPU::V_CMP_NE_U64_e64;
857+
case AMDGPU::V_CMP_NE_U64_e64:
858+
return AMDGPU::V_CMP_EQ_U64_e64;
859+
case AMDGPU::V_CMP_GE_U64_e64:
860+
return AMDGPU::V_CMP_LT_U64_e64;
861+
case AMDGPU::V_CMP_LE_U64_e64:
862+
return AMDGPU::V_CMP_GT_U64_e64;
863+
case AMDGPU::V_CMP_GT_U64_e64:
864+
return AMDGPU::V_CMP_LE_U64_e64;
865+
case AMDGPU::V_CMP_LT_U64_e64:
866+
return AMDGPU::V_CMP_GE_U64_e64;
867+
// float 32
855868
case AMDGPU::V_CMP_EQ_F32_e64:
856869
return AMDGPU::V_CMP_NEQ_F32_e64;
857870
case AMDGPU::V_CMP_NEQ_F32_e64:
858871
return AMDGPU::V_CMP_EQ_F32_e64;
859872
case AMDGPU::V_CMP_GE_F32_e64:
860-
return AMDGPU::V_CMP_LT_F32_e64;
873+
return AMDGPU::V_CMP_NGE_F32_e64;
861874
case AMDGPU::V_CMP_LE_F32_e64:
862-
return AMDGPU::V_CMP_GT_F32_e64;
875+
return AMDGPU::V_CMP_NLE_F32_e64;
863876
case AMDGPU::V_CMP_GT_F32_e64:
864-
return AMDGPU::V_CMP_LE_F32_e64;
877+
return AMDGPU::V_CMP_NGT_F32_e64;
865878
case AMDGPU::V_CMP_LT_F32_e64:
866-
return AMDGPU::V_CMP_GE_F32_e64;
879+
return AMDGPU::V_CMP_NLT_F32_e64;
880+
// float 64
881+
case AMDGPU::V_CMP_EQ_F64_e64:
882+
return AMDGPU::V_CMP_NEQ_F64_e64;
883+
case AMDGPU::V_CMP_NEQ_F64_e64:
884+
return AMDGPU::V_CMP_EQ_F64_e64;
885+
case AMDGPU::V_CMP_GE_F64_e64:
886+
return AMDGPU::V_CMP_NGE_F64_e64;
887+
case AMDGPU::V_CMP_LE_F64_e64:
888+
return AMDGPU::V_CMP_NLE_F64_e64;
889+
case AMDGPU::V_CMP_GT_F64_e64:
890+
return AMDGPU::V_CMP_NGT_F64_e64;
891+
case AMDGPU::V_CMP_LT_F64_e64:
892+
return AMDGPU::V_CMP_NLT_F64_e64;
867893
default:
868894
return 0;
869895
}
870896
}
871897

872-
bool SIShrinkInstructions::shouldSwitchOperands(MachineRegisterInfo &MRI,
873-
MachineInstr &MI,
874-
const SIInstrInfo &TII) const {
875-
auto allUses = MRI.use_nodbg_operands(MI.getOperand(5).getReg());
876-
unsigned Count = 0;
898+
bool SIShrinkInstructions::shouldSwapCndOperands(
899+
MachineInstr &MI, const SIInstrInfo &TII,
900+
SmallVector<MachineOperand *, 4> &UsesToProcess) const {
901+
auto AllUses = MRI->use_nodbg_operands(MI.getOperand(0).getReg());
902+
bool ShouldSwap = false;
877903

878-
for (auto &Use : allUses) {
879-
if (Use.getParent()->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
904+
for (auto &Use : AllUses) {
905+
MachineInstr *UseInst = Use.getParent();
906+
if (UseInst->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
880907
return false;
881-
MachineOperand *Src0 =
882-
TII.getNamedOperand(*Use.getParent(), AMDGPU::OpName::src0);
883-
MachineOperand *Src1 =
884-
TII.getNamedOperand(*Use.getParent(), AMDGPU::OpName::src1);
908+
MachineOperand *Src0 = TII.getNamedOperand(*UseInst, AMDGPU::OpName::src0);
909+
MachineOperand *Src1 = TII.getNamedOperand(*UseInst, AMDGPU::OpName::src1);
885910

886911
auto Src0Imm = Src0->isImm();
887912
auto Src1Imm = Src1->isImm();
888913

889914
if (!Src1Imm && Src0Imm)
890915
return false;
891-
if (Src1Imm && !Src0Imm)
892-
Count++;
916+
917+
UsesToProcess.push_back(&Use);
918+
919+
if (Src1Imm && !Src0Imm && !UseInst->getOperand(1).getImm())
920+
ShouldSwap = true;
893921
}
894-
return (Count >= 1);
922+
return ShouldSwap;
895923
}
896924

897-
// OldVCC and NewVCC are used to remember VCC after inverting comparison
898-
bool SIShrinkInstructions::trySwitchOperands(MachineInstr &MI, Register *OldVCC,
899-
Register *NewVCC) const {
900-
const DebugLoc &DL = MI.getDebugLoc();
901-
auto Reg = MI.getOperand(5).getReg();
902-
if (!Reg.isVirtual())
903-
return false;
925+
void swapCndOperands(MachineInstr &MI) {
926+
MachineOperand Op2 = MI.getOperand(2);
927+
MachineOperand Op4 = MI.getOperand(4);
928+
929+
if (Op2.isReg()) {
930+
MI.getOperand(4).ChangeToRegister(
931+
Op2.getReg(), Op2.isDef(), Op2.isImplicit(), Op2.isKill(), Op2.isDead(),
932+
Op2.isUndef(), Op2.isDebug());
933+
if (Op2.getSubReg() != AMDGPU::NoSubRegister)
934+
MI.getOperand(4).setSubReg(Op2.getSubReg());
935+
} else if (Op2.isImm()) {
936+
MI.getOperand(4).ChangeToImmediate(Op2.getImm());
937+
}
904938

905-
if (*OldVCC != Reg) {
906-
MachineInstr *DefMI = MRI->getVRegDef(Reg);
907-
if (DefMI) {
908-
unsigned Opcode = getInverseCompareOpcode(*DefMI);
909-
if (Opcode &&
910-
SIShrinkInstructions::shouldSwitchOperands(*MRI, MI, *TII)) {
911-
auto cmpDL = DefMI->getDebugLoc();
912-
*NewVCC = MRI->createVirtualRegister(MRI->getRegClass(Reg));
913-
*OldVCC = Reg;
914-
MachineInstrBuilder InverseCompare = BuildMI(
915-
*DefMI->getParent(), DefMI, cmpDL, TII->get(Opcode), *NewVCC);
916-
InverseCompare->setFlags(DefMI->getFlags());
917-
918-
unsigned OpNum = DefMI->getNumExplicitOperands();
919-
for (unsigned i = 1; i < OpNum; i++) {
920-
MachineOperand Op = DefMI->getOperand(i);
921-
InverseCompare.add(Op);
922-
if (Op.isReg() && Op.isKill())
923-
InverseCompare->getOperand(i).setIsKill(false);
924-
}
925-
}
926-
}
939+
if (Op4.isReg()) {
940+
MI.getOperand(2).setReg(Op4.getReg());
941+
if (Op4.getSubReg() != AMDGPU::NoSubRegister)
942+
MI.getOperand(2).setSubReg(Op4.getSubReg());
943+
} else if (Op4.isImm()) {
944+
MI.getOperand(2).ChangeToImmediate(Op4.getImm());
927945
}
928-
if (*OldVCC == Reg) {
929-
BuildMI(*MI.getParent(), MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64),
930-
MI.getOperand(0).getReg())
931-
.add(MI.getOperand(3))
932-
.add(MI.getOperand(4))
933-
.add(MI.getOperand(1))
934-
.add(MI.getOperand(2))
935-
.addReg(*NewVCC);
936-
MI.eraseFromParent();
937-
return true;
946+
947+
MachineOperand Op1 = MI.getOperand(1);
948+
MachineOperand Op3 = MI.getOperand(3);
949+
MI.getOperand(1).setImm(Op3.getImm());
950+
MI.getOperand(3).setImm(Op1.getImm());
951+
}
952+
953+
Register SIShrinkInstructions::trySwapCndOperands(MachineInstr &MI) const {
954+
Register Reg = MI.getOperand(0).getReg();
955+
956+
unsigned Opcode = getInverseCompareOpcode(MI);
957+
SmallVector<MachineOperand *, 4> UsesToProcess;
958+
if (!Opcode ||
959+
!SIShrinkInstructions::shouldSwapCndOperands(MI, *TII, UsesToProcess))
960+
return AMDGPU::NoRegister;
961+
962+
auto DL = MI.getDebugLoc();
963+
Register NewVCC = MRI->createVirtualRegister(MRI->getRegClass(Reg));
964+
965+
MachineInstrBuilder InverseCompare =
966+
BuildMI(*MI.getParent(), MI, DL, TII->get(Opcode), NewVCC);
967+
InverseCompare->setFlags(MI.getFlags());
968+
969+
unsigned OpNum = MI.getNumExplicitOperands();
970+
for (unsigned i = 1; i < OpNum; i++) {
971+
MachineOperand Op = MI.getOperand(i);
972+
InverseCompare.add(Op);
973+
if (Op.isReg() && Op.isKill())
974+
InverseCompare->getOperand(i).setIsKill(false);
938975
}
939-
return false;
976+
977+
for (auto &Use : UsesToProcess) {
978+
MachineInstr *Inst = Use->getParent();
979+
swapCndOperands(*Inst);
980+
}
981+
982+
MRI->replaceRegWith(Reg, NewVCC);
983+
MI.eraseFromParent();
984+
return NewVCC;
940985
}
941986

942987
bool SIShrinkInstructions::run(MachineFunction &MF) {
@@ -950,9 +995,6 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
950995
unsigned VCCReg = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
951996

952997
std::vector<unsigned> I1Defs;
953-
Register OldVCC = AMDGPU::NoRegister;
954-
Register NewVCC = AMDGPU::NoRegister;
955-
956998
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
957999
BI != BE; ++BI) {
9581000

@@ -1083,10 +1125,6 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
10831125
continue;
10841126
}
10851127

1086-
if (MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64 &&
1087-
trySwitchOperands(MI, &OldVCC, &NewVCC))
1088-
MRI->setRegAllocationHint(NewVCC, 0, VCCReg);
1089-
10901128
// If there is no chance we will shrink it and use VCC as sdst to get
10911129
// a 32 bit form try to replace dead sdst with NULL.
10921130
if (TII->isVOP3(MI.getOpcode())) {
@@ -1124,6 +1162,9 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
11241162
// provide a hint to the register allocator to use VCC and then we
11251163
// will run this pass again after RA and shrink it if it outputs to
11261164
// VCC.
1165+
Register NewVCC = trySwapCndOperands(MI);
1166+
DstReg = NewVCC == AMDGPU::NoRegister ? DstReg : NewVCC;
1167+
11271168
MRI->setRegAllocationHint(DstReg, 0, VCCReg);
11281169
continue;
11291170
}

0 commit comments

Comments
 (0)