@@ -53,7 +53,7 @@ class SIShrinkInstructions {
5353 unsigned SubReg) const ;
5454 Register trySwapCndOperands (MachineInstr &MI) const ;
5555 bool
56- shouldSwapCndOperands (MachineInstr &MI, const SIInstrInfo &TII,
56+ shouldSwapCndOperands (MachineInstr &MI,
5757 SmallVector<MachineOperand *, 4 > &UsesToProcess) const ;
5858 unsigned getInverseCompareOpcode (MachineInstr &MI) const ;
5959 TargetInstrInfo::RegSubRegPair getSubRegForIndex (Register Reg, unsigned Sub,
@@ -870,45 +870,76 @@ unsigned SIShrinkInstructions::getInverseCompareOpcode(MachineInstr &MI) const {
870870 return AMDGPU::V_CMP_EQ_F32_e64;
871871 case AMDGPU::V_CMP_GE_F32_e64:
872872 return AMDGPU::V_CMP_NGE_F32_e64;
873+ case AMDGPU::V_CMP_NGE_F32_e64:
874+ return AMDGPU::V_CMP_GE_F32_e64;
873875 case AMDGPU::V_CMP_LE_F32_e64:
874876 return AMDGPU::V_CMP_NLE_F32_e64;
877+ case AMDGPU::V_CMP_NLE_F32_e32:
878+ return AMDGPU::V_CMP_LE_F32_e32;
875879 case AMDGPU::V_CMP_GT_F32_e64:
876880 return AMDGPU::V_CMP_NGT_F32_e64;
881+ case AMDGPU::V_CMP_NGT_F32_e64:
882+ return AMDGPU::V_CMP_GT_F32_e64;
877883 case AMDGPU::V_CMP_LT_F32_e64:
878884 return AMDGPU::V_CMP_NLT_F32_e64;
885+ case AMDGPU::V_CMP_NLT_F32_e64:
886+ return AMDGPU::V_CMP_LT_F32_e64;
887+ case AMDGPU::V_CMP_LG_F32_e64:
888+ return AMDGPU::V_CMP_NLG_F32_e64;
889+ case AMDGPU::V_CMP_NLG_F32_e64:
890+ return AMDGPU::V_CMP_LG_F32_e64;
891+ case AMDGPU::V_CMP_O_F32_e64:
892+ return AMDGPU::V_CMP_U_F32_e64;
893+ case AMDGPU::V_CMP_U_F32_e64:
894+ return AMDGPU::V_CMP_O_F32_e64;
879895 // float 64
880896 case AMDGPU::V_CMP_EQ_F64_e64:
881897 return AMDGPU::V_CMP_NEQ_F64_e64;
882898 case AMDGPU::V_CMP_NEQ_F64_e64:
883899 return AMDGPU::V_CMP_EQ_F64_e64;
884900 case AMDGPU::V_CMP_GE_F64_e64:
885901 return AMDGPU::V_CMP_NGE_F64_e64;
902+ case AMDGPU::V_CMP_NGE_F64_e64:
903+ return AMDGPU::V_CMP_GE_F64_e64;
886904 case AMDGPU::V_CMP_LE_F64_e64:
887905 return AMDGPU::V_CMP_NLE_F64_e64;
906+ case AMDGPU::V_CMP_NLE_F64_e32:
907+ return AMDGPU::V_CMP_LE_F64_e32;
888908 case AMDGPU::V_CMP_GT_F64_e64:
889909 return AMDGPU::V_CMP_NGT_F64_e64;
910+ case AMDGPU::V_CMP_NGT_F64_e64:
911+ return AMDGPU::V_CMP_GT_F32_e64;
890912 case AMDGPU::V_CMP_LT_F64_e64:
891913 return AMDGPU::V_CMP_NLT_F64_e64;
914+ case AMDGPU::V_CMP_NLT_F64_e64:
915+ return AMDGPU::V_CMP_LT_F64_e64;
916+ case AMDGPU::V_CMP_LG_F64_e64:
917+ return AMDGPU::V_CMP_NLG_F64_e64;
918+ case AMDGPU::V_CMP_NLG_F64_e64:
919+ return AMDGPU::V_CMP_LG_F64_e64;
920+ case AMDGPU::V_CMP_O_F64_e64:
921+ return AMDGPU::V_CMP_U_F64_e64;
922+ case AMDGPU::V_CMP_U_F64_e64:
923+ return AMDGPU::V_CMP_O_F64_e64;
892924 default :
893925 return 0 ;
894926 }
895927}
896928
897929bool SIShrinkInstructions::shouldSwapCndOperands (
898- MachineInstr &MI, const SIInstrInfo &TII,
899- SmallVector<MachineOperand *, 4 > &UsesToProcess) const {
930+ MachineInstr &MI, SmallVector<MachineOperand *, 4 > &UsesToProcess) const {
900931 auto AllUses = MRI->use_nodbg_operands (MI.getOperand (0 ).getReg ());
901932 bool ShouldSwap = false ;
902933
903934 for (auto &Use : AllUses) {
904935 MachineInstr *UseInst = Use.getParent ();
905936 if (UseInst->getOpcode () != AMDGPU::V_CNDMASK_B32_e64)
906937 return false ;
907- MachineOperand * Src0 = TII. getNamedOperand (* UseInst, AMDGPU::OpName::src0 );
908- MachineOperand * Src1 = TII. getNamedOperand (* UseInst, AMDGPU::OpName::src1 );
938+ MachineOperand & Src0 = UseInst-> getOperand ( 2 );
939+ MachineOperand & Src1 = UseInst-> getOperand ( 4 );
909940
910- auto Src0Imm = Src0-> isImm ();
911- auto Src1Imm = Src1-> isImm ();
941+ bool Src0Imm = Src0. isImm ();
942+ bool Src1Imm = Src1. isImm ();
912943
913944 if (!Src1Imm && Src0Imm)
914945 return false ;
@@ -921,32 +952,30 @@ bool SIShrinkInstructions::shouldSwapCndOperands(
921952 return ShouldSwap;
922953}
923954
924- void swapCndOperands (MachineInstr &MI) {
925- MachineOperand Op2 = MI.getOperand (2 );
955+ static void swapCndOperands (MachineInstr &MI) {
956+ MachineOperand & Op2 = MI.getOperand (2 );
926957 MachineOperand Op4 = MI.getOperand (4 );
927958
928959 if (Op2.isReg ()) {
929960 MI.getOperand (4 ).ChangeToRegister (
930961 Op2.getReg (), Op2.isDef (), Op2.isImplicit (), Op2.isKill (), Op2.isDead (),
931962 Op2.isUndef (), Op2.isDebug ());
932- if (Op2.getSubReg () != AMDGPU::NoSubRegister)
933- MI.getOperand (4 ).setSubReg (Op2.getSubReg ());
963+ MI.getOperand (4 ).setSubReg (Op2.getSubReg ());
934964 } else if (Op2.isImm ()) {
935965 MI.getOperand (4 ).ChangeToImmediate (Op2.getImm ());
936966 }
937967
938968 if (Op4.isReg ()) {
939- MI.getOperand (2 ).setReg (Op4.getReg ());
940- if (Op4.getSubReg () != AMDGPU::NoSubRegister)
941- MI.getOperand (2 ).setSubReg (Op4.getSubReg ());
969+ Op2.setReg (Op4.getReg ());
970+ Op2.setSubReg (Op4.getSubReg ());
942971 } else if (Op4.isImm ()) {
943- MI. getOperand ( 2 ) .ChangeToImmediate (Op4.getImm ());
972+ Op2 .ChangeToImmediate (Op4.getImm ());
944973 }
945974
946- MachineOperand Op1 = MI.getOperand (1 );
947- MachineOperand Op3 = MI.getOperand (3 );
948- MI.getOperand (1 ).setImm (Op3. getImm () );
949- MI.getOperand (3 ).setImm (Op1. getImm () );
975+ auto Op1Imm = MI.getOperand (1 ). getImm ( );
976+ auto Op3Imm = MI.getOperand (3 ). getImm ( );
977+ MI.getOperand (1 ).setImm (Op3Imm );
978+ MI.getOperand (3 ).setImm (Op1Imm );
950979}
951980
952981Register SIShrinkInstructions::trySwapCndOperands (MachineInstr &MI) const {
@@ -955,8 +984,8 @@ Register SIShrinkInstructions::trySwapCndOperands(MachineInstr &MI) const {
955984 unsigned Opcode = getInverseCompareOpcode (MI);
956985 SmallVector<MachineOperand *, 4 > UsesToProcess;
957986 if (!Opcode ||
958- !SIShrinkInstructions::shouldSwapCndOperands (MI, *TII, UsesToProcess))
959- return AMDGPU::NoRegister ;
987+ !SIShrinkInstructions::shouldSwapCndOperands (MI, UsesToProcess))
988+ return Reg ;
960989
961990 auto DL = MI.getDebugLoc ();
962991 Register NewVCC = MRI->createVirtualRegister (MRI->getRegClass (Reg));
@@ -966,11 +995,11 @@ Register SIShrinkInstructions::trySwapCndOperands(MachineInstr &MI) const {
966995 InverseCompare->setFlags (MI.getFlags ());
967996
968997 unsigned OpNum = MI.getNumExplicitOperands ();
969- for (unsigned i = 1 ; i < OpNum; i ++) {
970- MachineOperand Op = MI.getOperand (i );
998+ for (unsigned Idx = 1 ; Idx < OpNum; Idx ++) {
999+ MachineOperand Op = MI.getOperand (Idx );
9711000 InverseCompare.add (Op);
9721001 if (Op.isReg () && Op.isKill ())
973- InverseCompare->getOperand (i ).setIsKill (false );
1002+ InverseCompare->getOperand (Idx ).setIsKill (false );
9741003 }
9751004
9761005 for (auto &Use : UsesToProcess) {
@@ -993,6 +1022,11 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
9931022
9941023 unsigned VCCReg = ST->isWave32 () ? AMDGPU::VCC_LO : AMDGPU::VCC;
9951024
1025+ <<<<<<< HEAD
1026+ =======
1027+ std::vector<unsigned > I1Defs;
1028+
1029+ >>>>>>> 1336afc5defe (update tests)
9961030 for (MachineFunction::iterator BI = MF.begin (), BE = MF.end ();
9971031 BI != BE; ++BI) {
9981032
@@ -1150,6 +1184,7 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
11501184 // dst.
11511185 Register DstReg = Op0.getReg ();
11521186 if (DstReg.isVirtual ()) {
1187+ DstReg = trySwapCndOperands (MI);
11531188 // VOPC instructions can only write to the VCC register. We can't
11541189 // force them to use VCC here, because this is only one register and
11551190 // cannot deal with sequences which would require multiple copies of
@@ -1159,9 +1194,6 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
11591194 // provide a hint to the register allocator to use VCC and then we
11601195 // will run this pass again after RA and shrink it if it outputs to
11611196 // VCC.
1162- Register NewVCC = trySwapCndOperands (MI);
1163- DstReg = NewVCC == AMDGPU::NoRegister ? DstReg : NewVCC;
1164-
11651197 MRI->setRegAllocationHint (DstReg, 0 , VCCReg);
11661198 continue ;
11671199 }
0 commit comments