@@ -53,7 +53,7 @@ class SIShrinkInstructions {
5353 unsigned SubReg) const ;
5454 Register trySwapCndOperands (MachineInstr &MI) const ;
5555 bool
56- shouldSwapCndOperands (MachineInstr &MI, const SIInstrInfo &TII,
56+ shouldSwapCndOperands (MachineInstr &MI,
5757 SmallVector<MachineOperand *, 4 > &UsesToProcess) const ;
5858 unsigned getInverseCompareOpcode (MachineInstr &MI) const ;
5959 TargetInstrInfo::RegSubRegPair getSubRegForIndex (Register Reg, unsigned Sub,
@@ -871,45 +871,76 @@ unsigned SIShrinkInstructions::getInverseCompareOpcode(MachineInstr &MI) const {
871871 return AMDGPU::V_CMP_EQ_F32_e64;
872872 case AMDGPU::V_CMP_GE_F32_e64:
873873 return AMDGPU::V_CMP_NGE_F32_e64;
874+ case AMDGPU::V_CMP_NGE_F32_e64:
875+ return AMDGPU::V_CMP_GE_F32_e64;
874876 case AMDGPU::V_CMP_LE_F32_e64:
875877 return AMDGPU::V_CMP_NLE_F32_e64;
878+ case AMDGPU::V_CMP_NLE_F32_e32:
879+ return AMDGPU::V_CMP_LE_F32_e32;
876880 case AMDGPU::V_CMP_GT_F32_e64:
877881 return AMDGPU::V_CMP_NGT_F32_e64;
882+ case AMDGPU::V_CMP_NGT_F32_e64:
883+ return AMDGPU::V_CMP_GT_F32_e64;
878884 case AMDGPU::V_CMP_LT_F32_e64:
879885 return AMDGPU::V_CMP_NLT_F32_e64;
886+ case AMDGPU::V_CMP_NLT_F32_e64:
887+ return AMDGPU::V_CMP_LT_F32_e64;
888+ case AMDGPU::V_CMP_LG_F32_e64:
889+ return AMDGPU::V_CMP_NLG_F32_e64;
890+ case AMDGPU::V_CMP_NLG_F32_e64:
891+ return AMDGPU::V_CMP_LG_F32_e64;
892+ case AMDGPU::V_CMP_O_F32_e64:
893+ return AMDGPU::V_CMP_U_F32_e64;
894+ case AMDGPU::V_CMP_U_F32_e64:
895+ return AMDGPU::V_CMP_O_F32_e64;
880896 // float 64
881897 case AMDGPU::V_CMP_EQ_F64_e64:
882898 return AMDGPU::V_CMP_NEQ_F64_e64;
883899 case AMDGPU::V_CMP_NEQ_F64_e64:
884900 return AMDGPU::V_CMP_EQ_F64_e64;
885901 case AMDGPU::V_CMP_GE_F64_e64:
886902 return AMDGPU::V_CMP_NGE_F64_e64;
903+ case AMDGPU::V_CMP_NGE_F64_e64:
904+ return AMDGPU::V_CMP_GE_F64_e64;
887905 case AMDGPU::V_CMP_LE_F64_e64:
888906 return AMDGPU::V_CMP_NLE_F64_e64;
907+ case AMDGPU::V_CMP_NLE_F64_e32:
908+ return AMDGPU::V_CMP_LE_F64_e32;
889909 case AMDGPU::V_CMP_GT_F64_e64:
890910 return AMDGPU::V_CMP_NGT_F64_e64;
911+ case AMDGPU::V_CMP_NGT_F64_e64:
912+ return AMDGPU::V_CMP_GT_F32_e64;
891913 case AMDGPU::V_CMP_LT_F64_e64:
892914 return AMDGPU::V_CMP_NLT_F64_e64;
915+ case AMDGPU::V_CMP_NLT_F64_e64:
916+ return AMDGPU::V_CMP_LT_F64_e64;
917+ case AMDGPU::V_CMP_LG_F64_e64:
918+ return AMDGPU::V_CMP_NLG_F64_e64;
919+ case AMDGPU::V_CMP_NLG_F64_e64:
920+ return AMDGPU::V_CMP_LG_F64_e64;
921+ case AMDGPU::V_CMP_O_F64_e64:
922+ return AMDGPU::V_CMP_U_F64_e64;
923+ case AMDGPU::V_CMP_U_F64_e64:
924+ return AMDGPU::V_CMP_O_F64_e64;
893925 default :
894926 return 0 ;
895927 }
896928}
897929
898930bool SIShrinkInstructions::shouldSwapCndOperands (
899- MachineInstr &MI, const SIInstrInfo &TII,
900- SmallVector<MachineOperand *, 4 > &UsesToProcess) const {
931+ MachineInstr &MI, SmallVector<MachineOperand *, 4 > &UsesToProcess) const {
901932 auto AllUses = MRI->use_nodbg_operands (MI.getOperand (0 ).getReg ());
902933 bool ShouldSwap = false ;
903934
904935 for (auto &Use : AllUses) {
905936 MachineInstr *UseInst = Use.getParent ();
906937 if (UseInst->getOpcode () != AMDGPU::V_CNDMASK_B32_e64)
907938 return false ;
908- MachineOperand * Src0 = TII. getNamedOperand (* UseInst, AMDGPU::OpName::src0 );
909- MachineOperand * Src1 = TII. getNamedOperand (* UseInst, AMDGPU::OpName::src1 );
939+ MachineOperand & Src0 = UseInst-> getOperand ( 2 );
940+ MachineOperand & Src1 = UseInst-> getOperand ( 4 );
910941
911- auto Src0Imm = Src0-> isImm ();
912- auto Src1Imm = Src1-> isImm ();
942+ bool Src0Imm = Src0. isImm ();
943+ bool Src1Imm = Src1. isImm ();
913944
914945 if (!Src1Imm && Src0Imm)
915946 return false ;
@@ -922,32 +953,30 @@ bool SIShrinkInstructions::shouldSwapCndOperands(
922953 return ShouldSwap;
923954}
924955
925- void swapCndOperands (MachineInstr &MI) {
926- MachineOperand Op2 = MI.getOperand (2 );
956+ static void swapCndOperands (MachineInstr &MI) {
957+ MachineOperand & Op2 = MI.getOperand (2 );
927958 MachineOperand Op4 = MI.getOperand (4 );
928959
929960 if (Op2.isReg ()) {
930961 MI.getOperand (4 ).ChangeToRegister (
931962 Op2.getReg (), Op2.isDef (), Op2.isImplicit (), Op2.isKill (), Op2.isDead (),
932963 Op2.isUndef (), Op2.isDebug ());
933- if (Op2.getSubReg () != AMDGPU::NoSubRegister)
934- MI.getOperand (4 ).setSubReg (Op2.getSubReg ());
964+ MI.getOperand (4 ).setSubReg (Op2.getSubReg ());
935965 } else if (Op2.isImm ()) {
936966 MI.getOperand (4 ).ChangeToImmediate (Op2.getImm ());
937967 }
938968
939969 if (Op4.isReg ()) {
940- MI.getOperand (2 ).setReg (Op4.getReg ());
941- if (Op4.getSubReg () != AMDGPU::NoSubRegister)
942- MI.getOperand (2 ).setSubReg (Op4.getSubReg ());
970+ Op2.setReg (Op4.getReg ());
971+ Op2.setSubReg (Op4.getSubReg ());
943972 } else if (Op4.isImm ()) {
944- MI. getOperand ( 2 ) .ChangeToImmediate (Op4.getImm ());
973+ Op2 .ChangeToImmediate (Op4.getImm ());
945974 }
946975
947- MachineOperand Op1 = MI.getOperand (1 );
948- MachineOperand Op3 = MI.getOperand (3 );
949- MI.getOperand (1 ).setImm (Op3. getImm () );
950- MI.getOperand (3 ).setImm (Op1. getImm () );
976+ auto Op1Imm = MI.getOperand (1 ). getImm ( );
977+ auto Op3Imm = MI.getOperand (3 ). getImm ( );
978+ MI.getOperand (1 ).setImm (Op3Imm );
979+ MI.getOperand (3 ).setImm (Op1Imm );
951980}
952981
953982Register SIShrinkInstructions::trySwapCndOperands (MachineInstr &MI) const {
@@ -956,8 +985,8 @@ Register SIShrinkInstructions::trySwapCndOperands(MachineInstr &MI) const {
956985 unsigned Opcode = getInverseCompareOpcode (MI);
957986 SmallVector<MachineOperand *, 4 > UsesToProcess;
958987 if (!Opcode ||
959- !SIShrinkInstructions::shouldSwapCndOperands (MI, *TII, UsesToProcess))
960- return AMDGPU::NoRegister ;
988+ !SIShrinkInstructions::shouldSwapCndOperands (MI, UsesToProcess))
989+ return Reg ;
961990
962991 auto DL = MI.getDebugLoc ();
963992 Register NewVCC = MRI->createVirtualRegister (MRI->getRegClass (Reg));
@@ -967,11 +996,11 @@ Register SIShrinkInstructions::trySwapCndOperands(MachineInstr &MI) const {
967996 InverseCompare->setFlags (MI.getFlags ());
968997
969998 unsigned OpNum = MI.getNumExplicitOperands ();
970- for (unsigned i = 1 ; i < OpNum; i ++) {
971- MachineOperand Op = MI.getOperand (i );
999+ for (unsigned Idx = 1 ; Idx < OpNum; Idx ++) {
1000+ MachineOperand Op = MI.getOperand (Idx );
9721001 InverseCompare.add (Op);
9731002 if (Op.isReg () && Op.isKill ())
974- InverseCompare->getOperand (i ).setIsKill (false );
1003+ InverseCompare->getOperand (Idx ).setIsKill (false );
9751004 }
9761005
9771006 for (auto &Use : UsesToProcess) {
@@ -995,6 +1024,7 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
9951024 unsigned VCCReg = ST->isWave32 () ? AMDGPU::VCC_LO : AMDGPU::VCC;
9961025
9971026 std::vector<unsigned > I1Defs;
1027+
9981028 for (MachineFunction::iterator BI = MF.begin (), BE = MF.end ();
9991029 BI != BE; ++BI) {
10001030
@@ -1153,6 +1183,7 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
11531183 // dst.
11541184 Register DstReg = Op0.getReg ();
11551185 if (DstReg.isVirtual ()) {
1186+ DstReg = trySwapCndOperands (MI);
11561187 // VOPC instructions can only write to the VCC register. We can't
11571188 // force them to use VCC here, because this is only one register and
11581189 // cannot deal with sequences which would require multiple copies of
@@ -1162,9 +1193,6 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
11621193 // provide a hint to the register allocator to use VCC and then we
11631194 // will run this pass again after RA and shrink it if it outputs to
11641195 // VCC.
1165- Register NewVCC = trySwapCndOperands (MI);
1166- DstReg = NewVCC == AMDGPU::NoRegister ? DstReg : NewVCC;
1167-
11681196 MRI->setRegAllocationHint (DstReg, 0 , VCCReg);
11691197 continue ;
11701198 }
0 commit comments