Skip to content

Commit 694fb6f

Browse files
Ana MihajlovicAna Mihajlovic
authored andcommitted
covered 64bit case, updated tests, added operading swapping instead of creating new instruction
1 parent a426f2a commit 694fb6f

File tree

13 files changed

+623
-524
lines changed

13 files changed

+623
-524
lines changed

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Lines changed: 111 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,10 @@ class SIShrinkInstructions {
5151
unsigned SubReg) const;
5252
bool instModifiesReg(const MachineInstr *MI, unsigned Reg,
5353
unsigned SubReg) const;
54-
bool trySwitchOperands(MachineInstr &MI, Register *OldVCC,
55-
Register *NewVCC) const;
56-
bool shouldSwitchOperands(MachineRegisterInfo &MRI, MachineInstr &MI,
57-
const SIInstrInfo &TII) const;
54+
Register trySwapCndOperands(MachineInstr &MI) const;
55+
bool
56+
shouldSwapCndOperands(MachineInstr &MI, const SIInstrInfo &TII,
57+
SmallVector<MachineOperand *, 4> &UsesToProcess) const;
5858
unsigned getInverseCompareOpcode(MachineInstr &MI) const;
5959
TargetInstrInfo::RegSubRegPair getSubRegForIndex(Register Reg, unsigned Sub,
6060
unsigned I) const;
@@ -850,92 +850,137 @@ unsigned SIShrinkInstructions::getInverseCompareOpcode(MachineInstr &MI) const {
850850
return AMDGPU::V_CMP_LE_U32_e64;
851851
case AMDGPU::V_CMP_LT_U32_e64:
852852
return AMDGPU::V_CMP_GE_U32_e64;
853-
// float 32
853+
// unsigned 64
854+
case AMDGPU::V_CMP_EQ_U64_e64:
855+
return AMDGPU::V_CMP_NE_U64_e64;
856+
case AMDGPU::V_CMP_NE_U64_e64:
857+
return AMDGPU::V_CMP_EQ_U64_e64;
858+
case AMDGPU::V_CMP_GE_U64_e64:
859+
return AMDGPU::V_CMP_LT_U64_e64;
860+
case AMDGPU::V_CMP_LE_U64_e64:
861+
return AMDGPU::V_CMP_GT_U64_e64;
862+
case AMDGPU::V_CMP_GT_U64_e64:
863+
return AMDGPU::V_CMP_LE_U64_e64;
864+
case AMDGPU::V_CMP_LT_U64_e64:
865+
return AMDGPU::V_CMP_GE_U64_e64;
866+
// float 32
854867
case AMDGPU::V_CMP_EQ_F32_e64:
855868
return AMDGPU::V_CMP_NEQ_F32_e64;
856869
case AMDGPU::V_CMP_NEQ_F32_e64:
857870
return AMDGPU::V_CMP_EQ_F32_e64;
858871
case AMDGPU::V_CMP_GE_F32_e64:
859-
return AMDGPU::V_CMP_LT_F32_e64;
872+
return AMDGPU::V_CMP_NGE_F32_e64;
860873
case AMDGPU::V_CMP_LE_F32_e64:
861-
return AMDGPU::V_CMP_GT_F32_e64;
874+
return AMDGPU::V_CMP_NLE_F32_e64;
862875
case AMDGPU::V_CMP_GT_F32_e64:
863-
return AMDGPU::V_CMP_LE_F32_e64;
876+
return AMDGPU::V_CMP_NGT_F32_e64;
864877
case AMDGPU::V_CMP_LT_F32_e64:
865-
return AMDGPU::V_CMP_GE_F32_e64;
878+
return AMDGPU::V_CMP_NLT_F32_e64;
879+
// float 64
880+
case AMDGPU::V_CMP_EQ_F64_e64:
881+
return AMDGPU::V_CMP_NEQ_F64_e64;
882+
case AMDGPU::V_CMP_NEQ_F64_e64:
883+
return AMDGPU::V_CMP_EQ_F64_e64;
884+
case AMDGPU::V_CMP_GE_F64_e64:
885+
return AMDGPU::V_CMP_NGE_F64_e64;
886+
case AMDGPU::V_CMP_LE_F64_e64:
887+
return AMDGPU::V_CMP_NLE_F64_e64;
888+
case AMDGPU::V_CMP_GT_F64_e64:
889+
return AMDGPU::V_CMP_NGT_F64_e64;
890+
case AMDGPU::V_CMP_LT_F64_e64:
891+
return AMDGPU::V_CMP_NLT_F64_e64;
866892
default:
867893
return 0;
868894
}
869895
}
870896

871-
bool SIShrinkInstructions::shouldSwitchOperands(MachineRegisterInfo &MRI,
872-
MachineInstr &MI,
873-
const SIInstrInfo &TII) const {
874-
auto allUses = MRI.use_nodbg_operands(MI.getOperand(5).getReg());
875-
unsigned Count = 0;
897+
bool SIShrinkInstructions::shouldSwapCndOperands(
898+
MachineInstr &MI, const SIInstrInfo &TII,
899+
SmallVector<MachineOperand *, 4> &UsesToProcess) const {
900+
auto AllUses = MRI->use_nodbg_operands(MI.getOperand(0).getReg());
901+
bool ShouldSwap = false;
876902

877-
for (auto &Use : allUses) {
878-
if (Use.getParent()->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
903+
for (auto &Use : AllUses) {
904+
MachineInstr *UseInst = Use.getParent();
905+
if (UseInst->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
879906
return false;
880-
MachineOperand *Src0 =
881-
TII.getNamedOperand(*Use.getParent(), AMDGPU::OpName::src0);
882-
MachineOperand *Src1 =
883-
TII.getNamedOperand(*Use.getParent(), AMDGPU::OpName::src1);
907+
MachineOperand *Src0 = TII.getNamedOperand(*UseInst, AMDGPU::OpName::src0);
908+
MachineOperand *Src1 = TII.getNamedOperand(*UseInst, AMDGPU::OpName::src1);
884909

885910
auto Src0Imm = Src0->isImm();
886911
auto Src1Imm = Src1->isImm();
887912

888913
if (!Src1Imm && Src0Imm)
889914
return false;
890-
if (Src1Imm && !Src0Imm)
891-
Count++;
915+
916+
UsesToProcess.push_back(&Use);
917+
918+
if (Src1Imm && !Src0Imm && !UseInst->getOperand(1).getImm())
919+
ShouldSwap = true;
892920
}
893-
return (Count >= 1);
921+
return ShouldSwap;
894922
}
895923

896-
// OldVCC and NewVCC are used to remember VCC after inverting comparison
897-
bool SIShrinkInstructions::trySwitchOperands(MachineInstr &MI, Register *OldVCC,
898-
Register *NewVCC) const {
899-
const DebugLoc &DL = MI.getDebugLoc();
900-
auto Reg = MI.getOperand(5).getReg();
901-
if (!Reg.isVirtual())
902-
return false;
924+
void swapCndOperands(MachineInstr &MI) {
925+
MachineOperand Op2 = MI.getOperand(2);
926+
MachineOperand Op4 = MI.getOperand(4);
927+
928+
if (Op2.isReg()) {
929+
MI.getOperand(4).ChangeToRegister(
930+
Op2.getReg(), Op2.isDef(), Op2.isImplicit(), Op2.isKill(), Op2.isDead(),
931+
Op2.isUndef(), Op2.isDebug());
932+
if (Op2.getSubReg() != AMDGPU::NoSubRegister)
933+
MI.getOperand(4).setSubReg(Op2.getSubReg());
934+
} else if (Op2.isImm()) {
935+
MI.getOperand(4).ChangeToImmediate(Op2.getImm());
936+
}
903937

904-
if (*OldVCC != Reg) {
905-
MachineInstr *DefMI = MRI->getVRegDef(Reg);
906-
if (DefMI) {
907-
unsigned Opcode = getInverseCompareOpcode(*DefMI);
908-
if (Opcode &&
909-
SIShrinkInstructions::shouldSwitchOperands(*MRI, MI, *TII)) {
910-
auto cmpDL = DefMI->getDebugLoc();
911-
*NewVCC = MRI->createVirtualRegister(MRI->getRegClass(Reg));
912-
*OldVCC = Reg;
913-
MachineInstrBuilder InverseCompare = BuildMI(
914-
*DefMI->getParent(), DefMI, cmpDL, TII->get(Opcode), *NewVCC);
915-
InverseCompare->setFlags(DefMI->getFlags());
916-
917-
unsigned OpNum = DefMI->getNumExplicitOperands();
918-
for (unsigned i = 1; i < OpNum; i++) {
919-
MachineOperand Op = DefMI->getOperand(i);
920-
InverseCompare.add(Op);
921-
if (Op.isReg() && Op.isKill())
922-
InverseCompare->getOperand(i).setIsKill(false);
923-
}
924-
}
925-
}
938+
if (Op4.isReg()) {
939+
MI.getOperand(2).setReg(Op4.getReg());
940+
if (Op4.getSubReg() != AMDGPU::NoSubRegister)
941+
MI.getOperand(2).setSubReg(Op4.getSubReg());
942+
} else if (Op4.isImm()) {
943+
MI.getOperand(2).ChangeToImmediate(Op4.getImm());
926944
}
927-
if (*OldVCC == Reg) {
928-
BuildMI(*MI.getParent(), MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64),
929-
MI.getOperand(0).getReg())
930-
.add(MI.getOperand(3))
931-
.add(MI.getOperand(4))
932-
.add(MI.getOperand(1))
933-
.add(MI.getOperand(2))
934-
.addReg(*NewVCC);
935-
MI.eraseFromParent();
936-
return true;
945+
946+
MachineOperand Op1 = MI.getOperand(1);
947+
MachineOperand Op3 = MI.getOperand(3);
948+
MI.getOperand(1).setImm(Op3.getImm());
949+
MI.getOperand(3).setImm(Op1.getImm());
950+
}
951+
952+
Register SIShrinkInstructions::trySwapCndOperands(MachineInstr &MI) const {
953+
Register Reg = MI.getOperand(0).getReg();
954+
955+
unsigned Opcode = getInverseCompareOpcode(MI);
956+
SmallVector<MachineOperand *, 4> UsesToProcess;
957+
if (!Opcode ||
958+
!SIShrinkInstructions::shouldSwapCndOperands(MI, *TII, UsesToProcess))
959+
return AMDGPU::NoRegister;
960+
961+
auto DL = MI.getDebugLoc();
962+
Register NewVCC = MRI->createVirtualRegister(MRI->getRegClass(Reg));
963+
964+
MachineInstrBuilder InverseCompare =
965+
BuildMI(*MI.getParent(), MI, DL, TII->get(Opcode), NewVCC);
966+
InverseCompare->setFlags(MI.getFlags());
967+
968+
unsigned OpNum = MI.getNumExplicitOperands();
969+
for (unsigned i = 1; i < OpNum; i++) {
970+
MachineOperand Op = MI.getOperand(i);
971+
InverseCompare.add(Op);
972+
if (Op.isReg() && Op.isKill())
973+
InverseCompare->getOperand(i).setIsKill(false);
937974
}
938-
return false;
975+
976+
for (auto &Use : UsesToProcess) {
977+
MachineInstr *Inst = Use->getParent();
978+
swapCndOperands(*Inst);
979+
}
980+
981+
MRI->replaceRegWith(Reg, NewVCC);
982+
MI.eraseFromParent();
983+
return NewVCC;
939984
}
940985

941986
bool SIShrinkInstructions::run(MachineFunction &MF) {
@@ -1077,10 +1122,6 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
10771122
continue;
10781123
}
10791124

1080-
if (MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64 &&
1081-
trySwitchOperands(MI, &OldVCC, &NewVCC))
1082-
MRI->setRegAllocationHint(NewVCC, 0, VCCReg);
1083-
10841125
// If there is no chance we will shrink it and use VCC as sdst to get
10851126
// a 32 bit form try to replace dead sdst with NULL.
10861127
if (TII->isVOP3(MI.getOpcode())) {
@@ -1118,6 +1159,9 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
11181159
// provide a hint to the register allocator to use VCC and then we
11191160
// will run this pass again after RA and shrink it if it outputs to
11201161
// VCC.
1162+
Register NewVCC = trySwapCndOperands(MI);
1163+
DstReg = NewVCC == AMDGPU::NoRegister ? DstReg : NewVCC;
1164+
11211165
MRI->setRegAllocationHint(DstReg, 0, VCCReg);
11221166
continue;
11231167
}

0 commit comments

Comments
 (0)