@@ -123,7 +123,7 @@ class SIFoldOperandsImpl {
123123 SmallVectorImpl<FoldCandidate> &FoldList,
124124 SmallVectorImpl<MachineInstr *> &CopiesToReplace) const ;
125125
126- MachineOperand * getImmOrMaterializedImm (MachineOperand &Op) const ;
126+ std::optional< int64_t > getImmOrMaterializedImm (MachineOperand &Op) const ;
127127 bool tryConstantFoldOp (MachineInstr *MI) const ;
128128 bool tryFoldCndMask (MachineInstr &MI) const ;
129129 bool tryFoldZeroHighBits (MachineInstr &MI) const ;
@@ -1298,21 +1298,22 @@ static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
12981298 MI.removeOperand (I);
12991299}
13001300
1301- MachineOperand *
1301+ std::optional< int64_t >
13021302SIFoldOperandsImpl::getImmOrMaterializedImm (MachineOperand &Op) const {
1303- // If this has a subregister, it obviously is a register source.
1304- if (!Op.isReg () || Op.getSubReg () != AMDGPU::NoSubRegister ||
1305- !Op.getReg ().isVirtual ())
1306- return &Op;
1303+ if (Op.isImm ())
1304+ return Op.getImm ();
13071305
1308- MachineInstr *Def = MRI->getVRegDef (Op.getReg ());
1306+ if (!Op.isReg () || !Op.getReg ().isVirtual ())
1307+ return std::nullopt ;
1308+
1309+ const MachineInstr *Def = MRI->getVRegDef (Op.getReg ());
13091310 if (Def && Def->isMoveImmediate ()) {
1310- MachineOperand &ImmSrc = Def->getOperand (1 );
1311+ const MachineOperand &ImmSrc = Def->getOperand (1 );
13111312 if (ImmSrc.isImm ())
1312- return & ImmSrc;
1313+ return TII-> extractSubregFromImm ( ImmSrc. getImm (), Op. getSubReg ()) ;
13131314 }
13141315
1315- return &Op ;
1316+ return std:: nullopt ;
13161317}
13171318
13181319// Try to simplify operations with a constant that may appear after instruction
@@ -1327,30 +1328,34 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
13271328 int Src0Idx = AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::src0);
13281329 if (Src0Idx == -1 )
13291330 return false ;
1330- MachineOperand *Src0 = getImmOrMaterializedImm (MI->getOperand (Src0Idx));
1331+
1332+ MachineOperand *Src0 = &MI->getOperand (Src0Idx);
1333+ std::optional<int64_t > Src0Imm = getImmOrMaterializedImm (*Src0);
13311334
13321335 if ((Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
13331336 Opc == AMDGPU::S_NOT_B32) &&
1334- Src0-> isImm () ) {
1335- MI->getOperand (1 ).ChangeToImmediate (~Src0-> getImm () );
1337+ Src0Imm ) {
1338+ MI->getOperand (1 ).ChangeToImmediate (~*Src0Imm );
13361339 mutateCopyOp (*MI, TII->get (getMovOpc (Opc == AMDGPU::S_NOT_B32)));
13371340 return true ;
13381341 }
13391342
13401343 int Src1Idx = AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::src1);
13411344 if (Src1Idx == -1 )
13421345 return false ;
1343- MachineOperand *Src1 = getImmOrMaterializedImm (MI->getOperand (Src1Idx));
13441346
1345- if (!Src0->isImm () && !Src1->isImm ())
1347+ MachineOperand *Src1 = &MI->getOperand (Src1Idx);
1348+ std::optional<int64_t > Src1Imm = getImmOrMaterializedImm (*Src1);
1349+
1350+ if (!Src0Imm && !Src1Imm)
13461351 return false ;
13471352
13481353 // and k0, k1 -> v_mov_b32 (k0 & k1)
13491354 // or k0, k1 -> v_mov_b32 (k0 | k1)
13501355 // xor k0, k1 -> v_mov_b32 (k0 ^ k1)
1351- if (Src0-> isImm () && Src1-> isImm () ) {
1356+ if (Src0Imm && Src1Imm ) {
13521357 int32_t NewImm;
1353- if (!evalBinaryInstruction (Opc, NewImm, Src0-> getImm (), Src1-> getImm () ))
1358+ if (!evalBinaryInstruction (Opc, NewImm, *Src0Imm, *Src1Imm ))
13541359 return false ;
13551360
13561361 bool IsSGPR = TRI->isSGPRReg (*MRI, MI->getOperand (0 ).getReg ());
@@ -1366,12 +1371,13 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
13661371 if (!MI->isCommutable ())
13671372 return false ;
13681373
1369- if (Src0-> isImm () && !Src1-> isImm () ) {
1374+ if (Src0Imm && !Src1Imm ) {
13701375 std::swap (Src0, Src1);
13711376 std::swap (Src0Idx, Src1Idx);
1377+ std::swap (Src0Imm, Src1Imm);
13721378 }
13731379
1374- int32_t Src1Val = static_cast <int32_t >(Src1-> getImm () );
1380+ int32_t Src1Val = static_cast <int32_t >(*Src1Imm );
13751381 if (Opc == AMDGPU::V_OR_B32_e64 ||
13761382 Opc == AMDGPU::V_OR_B32_e32 ||
13771383 Opc == AMDGPU::S_OR_B32) {
@@ -1428,9 +1434,12 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const {
14281434 MachineOperand *Src0 = TII->getNamedOperand (MI, AMDGPU::OpName::src0);
14291435 MachineOperand *Src1 = TII->getNamedOperand (MI, AMDGPU::OpName::src1);
14301436 if (!Src1->isIdenticalTo (*Src0)) {
1431- auto *Src0Imm = getImmOrMaterializedImm (*Src0);
1432- auto *Src1Imm = getImmOrMaterializedImm (*Src1);
1433- if (!Src1Imm->isIdenticalTo (*Src0Imm))
1437+ std::optional<int64_t > Src1Imm = getImmOrMaterializedImm (*Src1);
1438+ if (!Src1Imm)
1439+ return false ;
1440+
1441+ std::optional<int64_t > Src0Imm = getImmOrMaterializedImm (*Src0);
1442+ if (!Src0Imm || *Src0Imm != *Src1Imm)
14341443 return false ;
14351444 }
14361445
@@ -1463,8 +1472,8 @@ bool SIFoldOperandsImpl::tryFoldZeroHighBits(MachineInstr &MI) const {
14631472 MI.getOpcode () != AMDGPU::V_AND_B32_e32)
14641473 return false ;
14651474
1466- MachineOperand *Src0 = getImmOrMaterializedImm (MI.getOperand (1 ));
1467- if (!Src0-> isImm () || Src0-> getImm () != 0xffff )
1475+ std::optional< int64_t > Src0Imm = getImmOrMaterializedImm (MI.getOperand (1 ));
1476+ if (!Src0Imm || *Src0Imm != 0xffff )
14681477 return false ;
14691478
14701479 Register Src1 = MI.getOperand (2 ).getReg ();
0 commit comments