@@ -51,10 +51,10 @@ class SIShrinkInstructions {
5151 unsigned SubReg) const ;
5252 bool instModifiesReg (const MachineInstr *MI, unsigned Reg,
5353 unsigned SubReg) const ;
54- bool trySwitchOperands (MachineInstr &MI, Register *OldVCC,
55- Register *NewVCC) const ;
56- bool shouldSwitchOperands (MachineRegisterInfo &MRI, MachineInstr &MI ,
57- const SIInstrInfo &TII ) const ;
54+ Register trySwapCndOperands (MachineInstr &MI) const ;
55+ bool
56+ shouldSwapCndOperands (MachineInstr &MI, const SIInstrInfo &TII ,
57+ SmallVector<MachineOperand *, 4 > &UsesToProcess ) const ;
5858 unsigned getInverseCompareOpcode (MachineInstr &MI) const ;
5959 TargetInstrInfo::RegSubRegPair getSubRegForIndex (Register Reg, unsigned Sub,
6060 unsigned I) const ;
@@ -850,92 +850,137 @@ unsigned SIShrinkInstructions::getInverseCompareOpcode(MachineInstr &MI) const {
850850 return AMDGPU::V_CMP_LE_U32_e64;
851851 case AMDGPU::V_CMP_LT_U32_e64:
852852 return AMDGPU::V_CMP_GE_U32_e64;
853- // float 32
853+ // unsigned 64
854+ case AMDGPU::V_CMP_EQ_U64_e64:
855+ return AMDGPU::V_CMP_NE_U64_e64;
856+ case AMDGPU::V_CMP_NE_U64_e64:
857+ return AMDGPU::V_CMP_EQ_U64_e64;
858+ case AMDGPU::V_CMP_GE_U64_e64:
859+ return AMDGPU::V_CMP_LT_U64_e64;
860+ case AMDGPU::V_CMP_LE_U64_e64:
861+ return AMDGPU::V_CMP_GT_U64_e64;
862+ case AMDGPU::V_CMP_GT_U64_e64:
863+ return AMDGPU::V_CMP_LE_U64_e64;
864+ case AMDGPU::V_CMP_LT_U64_e64:
865+ return AMDGPU::V_CMP_GE_U64_e64;
866+ // float 32
854867 case AMDGPU::V_CMP_EQ_F32_e64:
855868 return AMDGPU::V_CMP_NEQ_F32_e64;
856869 case AMDGPU::V_CMP_NEQ_F32_e64:
857870 return AMDGPU::V_CMP_EQ_F32_e64;
858871 case AMDGPU::V_CMP_GE_F32_e64:
859- return AMDGPU::V_CMP_LT_F32_e64 ;
872+ return AMDGPU::V_CMP_NGE_F32_e64 ;
860873 case AMDGPU::V_CMP_LE_F32_e64:
861- return AMDGPU::V_CMP_GT_F32_e64 ;
874+ return AMDGPU::V_CMP_NLE_F32_e64 ;
862875 case AMDGPU::V_CMP_GT_F32_e64:
863- return AMDGPU::V_CMP_LE_F32_e64 ;
876+ return AMDGPU::V_CMP_NGT_F32_e64 ;
864877 case AMDGPU::V_CMP_LT_F32_e64:
865- return AMDGPU::V_CMP_GE_F32_e64;
878+ return AMDGPU::V_CMP_NLT_F32_e64;
879+ // float 64
880+ case AMDGPU::V_CMP_EQ_F64_e64:
881+ return AMDGPU::V_CMP_NEQ_F64_e64;
882+ case AMDGPU::V_CMP_NEQ_F64_e64:
883+ return AMDGPU::V_CMP_EQ_F64_e64;
884+ case AMDGPU::V_CMP_GE_F64_e64:
885+ return AMDGPU::V_CMP_NGE_F64_e64;
886+ case AMDGPU::V_CMP_LE_F64_e64:
887+ return AMDGPU::V_CMP_NLE_F64_e64;
888+ case AMDGPU::V_CMP_GT_F64_e64:
889+ return AMDGPU::V_CMP_NGT_F64_e64;
890+ case AMDGPU::V_CMP_LT_F64_e64:
891+ return AMDGPU::V_CMP_NLT_F64_e64;
866892 default :
867893 return 0 ;
868894 }
869895}
870896
871- bool SIShrinkInstructions::shouldSwitchOperands (MachineRegisterInfo &MRI,
872- MachineInstr &MI ,
873- const SIInstrInfo &TII ) const {
874- auto allUses = MRI. use_nodbg_operands (MI.getOperand (5 ).getReg ());
875- unsigned Count = 0 ;
897+ bool SIShrinkInstructions::shouldSwapCndOperands (
898+ MachineInstr &MI, const SIInstrInfo &TII ,
899+ SmallVector<MachineOperand *, 4 > &UsesToProcess ) const {
900+ auto AllUses = MRI-> use_nodbg_operands (MI.getOperand (0 ).getReg ());
901+ bool ShouldSwap = false ;
876902
877- for (auto &Use : allUses) {
878- if (Use.getParent ()->getOpcode () != AMDGPU::V_CNDMASK_B32_e64)
903+ for (auto &Use : AllUses) {
904+ MachineInstr *UseInst = Use.getParent ();
905+ if (UseInst->getOpcode () != AMDGPU::V_CNDMASK_B32_e64)
879906 return false ;
880- MachineOperand *Src0 =
881- TII.getNamedOperand (*Use.getParent (), AMDGPU::OpName::src0);
882- MachineOperand *Src1 =
883- TII.getNamedOperand (*Use.getParent (), AMDGPU::OpName::src1);
907+ MachineOperand *Src0 = TII.getNamedOperand (*UseInst, AMDGPU::OpName::src0);
908+ MachineOperand *Src1 = TII.getNamedOperand (*UseInst, AMDGPU::OpName::src1);
884909
885910 auto Src0Imm = Src0->isImm ();
886911 auto Src1Imm = Src1->isImm ();
887912
888913 if (!Src1Imm && Src0Imm)
889914 return false ;
890- if (Src1Imm && !Src0Imm)
891- Count++;
915+
916+ UsesToProcess.push_back (&Use);
917+
918+ if (Src1Imm && !Src0Imm && !UseInst->getOperand (1 ).getImm ())
919+ ShouldSwap = true ;
892920 }
893- return (Count >= 1 ) ;
921+ return ShouldSwap ;
894922}
895923
896- // OldVCC and NewVCC are used to remember VCC after inverting comparison
897- bool SIShrinkInstructions::trySwitchOperands (MachineInstr &MI, Register *OldVCC,
898- Register *NewVCC) const {
899- const DebugLoc &DL = MI.getDebugLoc ();
900- auto Reg = MI.getOperand (5 ).getReg ();
901- if (!Reg.isVirtual ())
902- return false ;
924+ void swapCndOperands (MachineInstr &MI) {
925+ MachineOperand Op2 = MI.getOperand (2 );
926+ MachineOperand Op4 = MI.getOperand (4 );
927+
928+ if (Op2.isReg ()) {
929+ MI.getOperand (4 ).ChangeToRegister (
930+ Op2.getReg (), Op2.isDef (), Op2.isImplicit (), Op2.isKill (), Op2.isDead (),
931+ Op2.isUndef (), Op2.isDebug ());
932+ if (Op2.getSubReg () != AMDGPU::NoSubRegister)
933+ MI.getOperand (4 ).setSubReg (Op2.getSubReg ());
934+ } else if (Op2.isImm ()) {
935+ MI.getOperand (4 ).ChangeToImmediate (Op2.getImm ());
936+ }
903937
904- if (*OldVCC != Reg) {
905- MachineInstr *DefMI = MRI->getVRegDef (Reg);
906- if (DefMI) {
907- unsigned Opcode = getInverseCompareOpcode (*DefMI);
908- if (Opcode &&
909- SIShrinkInstructions::shouldSwitchOperands (*MRI, MI, *TII)) {
910- auto cmpDL = DefMI->getDebugLoc ();
911- *NewVCC = MRI->createVirtualRegister (MRI->getRegClass (Reg));
912- *OldVCC = Reg;
913- MachineInstrBuilder InverseCompare = BuildMI (
914- *DefMI->getParent (), DefMI, cmpDL, TII->get (Opcode), *NewVCC);
915- InverseCompare->setFlags (DefMI->getFlags ());
916-
917- unsigned OpNum = DefMI->getNumExplicitOperands ();
918- for (unsigned i = 1 ; i < OpNum; i++) {
919- MachineOperand Op = DefMI->getOperand (i);
920- InverseCompare.add (Op);
921- if (Op.isReg () && Op.isKill ())
922- InverseCompare->getOperand (i).setIsKill (false );
923- }
924- }
925- }
938+ if (Op4.isReg ()) {
939+ MI.getOperand (2 ).setReg (Op4.getReg ());
940+ if (Op4.getSubReg () != AMDGPU::NoSubRegister)
941+ MI.getOperand (2 ).setSubReg (Op4.getSubReg ());
942+ } else if (Op4.isImm ()) {
943+ MI.getOperand (2 ).ChangeToImmediate (Op4.getImm ());
926944 }
927- if (*OldVCC == Reg) {
928- BuildMI (*MI.getParent (), MI, DL, TII->get (AMDGPU::V_CNDMASK_B32_e64),
929- MI.getOperand (0 ).getReg ())
930- .add (MI.getOperand (3 ))
931- .add (MI.getOperand (4 ))
932- .add (MI.getOperand (1 ))
933- .add (MI.getOperand (2 ))
934- .addReg (*NewVCC);
935- MI.eraseFromParent ();
936- return true ;
945+
946+ MachineOperand Op1 = MI.getOperand (1 );
947+ MachineOperand Op3 = MI.getOperand (3 );
948+ MI.getOperand (1 ).setImm (Op3.getImm ());
949+ MI.getOperand (3 ).setImm (Op1.getImm ());
950+ }
951+
952+ Register SIShrinkInstructions::trySwapCndOperands (MachineInstr &MI) const {
953+ Register Reg = MI.getOperand (0 ).getReg ();
954+
955+ unsigned Opcode = getInverseCompareOpcode (MI);
956+ SmallVector<MachineOperand *, 4 > UsesToProcess;
957+ if (!Opcode ||
958+ !SIShrinkInstructions::shouldSwapCndOperands (MI, *TII, UsesToProcess))
959+ return AMDGPU::NoRegister;
960+
961+ auto DL = MI.getDebugLoc ();
962+ Register NewVCC = MRI->createVirtualRegister (MRI->getRegClass (Reg));
963+
964+ MachineInstrBuilder InverseCompare =
965+ BuildMI (*MI.getParent (), MI, DL, TII->get (Opcode), NewVCC);
966+ InverseCompare->setFlags (MI.getFlags ());
967+
968+ unsigned OpNum = MI.getNumExplicitOperands ();
969+ for (unsigned i = 1 ; i < OpNum; i++) {
970+ MachineOperand Op = MI.getOperand (i);
971+ InverseCompare.add (Op);
972+ if (Op.isReg () && Op.isKill ())
973+ InverseCompare->getOperand (i).setIsKill (false );
937974 }
938- return false ;
975+
976+ for (auto &Use : UsesToProcess) {
977+ MachineInstr *Inst = Use->getParent ();
978+ swapCndOperands (*Inst);
979+ }
980+
981+ MRI->replaceRegWith (Reg, NewVCC);
982+ MI.eraseFromParent ();
983+ return NewVCC;
939984}
940985
941986bool SIShrinkInstructions::run (MachineFunction &MF) {
@@ -1077,10 +1122,6 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
10771122 continue ;
10781123 }
10791124
1080- if (MI.getOpcode () == AMDGPU::V_CNDMASK_B32_e64 &&
1081- trySwitchOperands (MI, &OldVCC, &NewVCC))
1082- MRI->setRegAllocationHint (NewVCC, 0 , VCCReg);
1083-
10841125 // If there is no chance we will shrink it and use VCC as sdst to get
10851126 // a 32 bit form try to replace dead sdst with NULL.
10861127 if (TII->isVOP3 (MI.getOpcode ())) {
@@ -1118,6 +1159,9 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
11181159 // provide a hint to the register allocator to use VCC and then we
11191160 // will run this pass again after RA and shrink it if it outputs to
11201161 // VCC.
1162+ Register NewVCC = trySwapCndOperands (MI);
1163+ DstReg = NewVCC == AMDGPU::NoRegister ? DstReg : NewVCC;
1164+
11211165 MRI->setRegAllocationHint (DstReg, 0 , VCCReg);
11221166 continue ;
11231167 }
0 commit comments