@@ -123,7 +123,7 @@ class SIFoldOperandsImpl {
123123 SmallVectorImpl<FoldCandidate> &FoldList,
124124 SmallVectorImpl<MachineInstr *> &CopiesToReplace) const ;
125125
126- MachineOperand * getImmOrMaterializedImm (MachineOperand &Op) const ;
126+ std::optional< int64_t > getImmOrMaterializedImm (MachineOperand &Op) const ;
127127 bool tryConstantFoldOp (MachineInstr *MI) const ;
128128 bool tryFoldCndMask (MachineInstr &MI) const ;
129129 bool tryFoldZeroHighBits (MachineInstr &MI) const ;
@@ -1296,21 +1296,22 @@ static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
12961296 MI.removeOperand (I);
12971297}
12981298
1299- MachineOperand *
1299+ std::optional< int64_t >
13001300SIFoldOperandsImpl::getImmOrMaterializedImm (MachineOperand &Op) const {
1301- // If this has a subregister, it obviously is a register source.
1302- if (!Op.isReg () || Op.getSubReg () != AMDGPU::NoSubRegister ||
1303- !Op.getReg ().isVirtual ())
1304- return &Op;
1301+ if (Op.isImm ())
1302+ return Op.getImm ();
13051303
1306- MachineInstr *Def = MRI->getVRegDef (Op.getReg ());
1304+ if (!Op.isReg () || !Op.getReg ().isVirtual ())
1305+ return std::nullopt ;
1306+
1307+ const MachineInstr *Def = MRI->getVRegDef (Op.getReg ());
13071308 if (Def && Def->isMoveImmediate ()) {
1308- MachineOperand &ImmSrc = Def->getOperand (1 );
1309+ const MachineOperand &ImmSrc = Def->getOperand (1 );
13091310 if (ImmSrc.isImm ())
1310- return & ImmSrc;
1311+ return TII-> extractSubregFromImm ( ImmSrc. getImm (), Op. getSubReg ()) ;
13111312 }
13121313
1313- return &Op ;
1314+ return std:: nullopt ;
13141315}
13151316
13161317// Try to simplify operations with a constant that may appear after instruction
@@ -1325,30 +1326,34 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
13251326 int Src0Idx = AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::src0);
13261327 if (Src0Idx == -1 )
13271328 return false ;
1328- MachineOperand *Src0 = getImmOrMaterializedImm (MI->getOperand (Src0Idx));
1329+
1330+ MachineOperand *Src0 = &MI->getOperand (Src0Idx);
1331+ std::optional<int64_t > Src0Imm = getImmOrMaterializedImm (*Src0);
13291332
13301333 if ((Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
13311334 Opc == AMDGPU::S_NOT_B32) &&
1332- Src0-> isImm () ) {
1333- MI->getOperand (1 ).ChangeToImmediate (~Src0-> getImm () );
1335+ Src0Imm ) {
1336+ MI->getOperand (1 ).ChangeToImmediate (~*Src0Imm );
13341337 mutateCopyOp (*MI, TII->get (getMovOpc (Opc == AMDGPU::S_NOT_B32)));
13351338 return true ;
13361339 }
13371340
13381341 int Src1Idx = AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::src1);
13391342 if (Src1Idx == -1 )
13401343 return false ;
1341- MachineOperand *Src1 = getImmOrMaterializedImm (MI->getOperand (Src1Idx));
13421344
1343- if (!Src0->isImm () && !Src1->isImm ())
1345+ MachineOperand *Src1 = &MI->getOperand (Src1Idx);
1346+ std::optional<int64_t > Src1Imm = getImmOrMaterializedImm (*Src1);
1347+
1348+ if (!Src0Imm && !Src1Imm)
13441349 return false ;
13451350
13461351 // and k0, k1 -> v_mov_b32 (k0 & k1)
13471352 // or k0, k1 -> v_mov_b32 (k0 | k1)
13481353 // xor k0, k1 -> v_mov_b32 (k0 ^ k1)
1349- if (Src0-> isImm () && Src1-> isImm () ) {
1354+ if (Src0Imm && Src1Imm ) {
13501355 int32_t NewImm;
1351- if (!evalBinaryInstruction (Opc, NewImm, Src0-> getImm (), Src1-> getImm () ))
1356+ if (!evalBinaryInstruction (Opc, NewImm, *Src0Imm, *Src1Imm ))
13521357 return false ;
13531358
13541359 bool IsSGPR = TRI->isSGPRReg (*MRI, MI->getOperand (0 ).getReg ());
@@ -1364,12 +1369,13 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
13641369 if (!MI->isCommutable ())
13651370 return false ;
13661371
1367- if (Src0-> isImm () && !Src1-> isImm () ) {
1372+ if (Src0Imm && !Src1Imm ) {
13681373 std::swap (Src0, Src1);
13691374 std::swap (Src0Idx, Src1Idx);
1375+ std::swap (Src0Imm, Src1Imm);
13701376 }
13711377
1372- int32_t Src1Val = static_cast <int32_t >(Src1-> getImm () );
1378+ int32_t Src1Val = static_cast <int32_t >(*Src1Imm );
13731379 if (Opc == AMDGPU::V_OR_B32_e64 ||
13741380 Opc == AMDGPU::V_OR_B32_e32 ||
13751381 Opc == AMDGPU::S_OR_B32) {
@@ -1426,9 +1432,12 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const {
14261432 MachineOperand *Src0 = TII->getNamedOperand (MI, AMDGPU::OpName::src0);
14271433 MachineOperand *Src1 = TII->getNamedOperand (MI, AMDGPU::OpName::src1);
14281434 if (!Src1->isIdenticalTo (*Src0)) {
1429- auto *Src0Imm = getImmOrMaterializedImm (*Src0);
1430- auto *Src1Imm = getImmOrMaterializedImm (*Src1);
1431- if (!Src1Imm->isIdenticalTo (*Src0Imm))
1435+ std::optional<int64_t > Src1Imm = getImmOrMaterializedImm (*Src1);
1436+ if (!Src1Imm)
1437+ return false ;
1438+
1439+ std::optional<int64_t > Src0Imm = getImmOrMaterializedImm (*Src0);
1440+ if (!Src0Imm || *Src0Imm != *Src1Imm)
14321441 return false ;
14331442 }
14341443
@@ -1461,8 +1470,8 @@ bool SIFoldOperandsImpl::tryFoldZeroHighBits(MachineInstr &MI) const {
14611470 MI.getOpcode () != AMDGPU::V_AND_B32_e32)
14621471 return false ;
14631472
1464- MachineOperand *Src0 = getImmOrMaterializedImm (MI.getOperand (1 ));
1465- if (!Src0-> isImm () || Src0-> getImm () != 0xffff )
1473+ std::optional< int64_t > Src0Imm = getImmOrMaterializedImm (MI.getOperand (1 ));
1474+ if (!Src0Imm || *Src0Imm != 0xffff )
14661475 return false ;
14671476
14681477 Register Src1 = MI.getOperand (2 ).getReg ();
0 commit comments