@@ -78,6 +78,12 @@ class SIFoldOperandsImpl {
7878 bool frameIndexMayFold (const MachineInstr &UseMI, int OpNo,
7979 const MachineOperand &OpToFold) const ;
8080
81+ // / Fold %vgpr = COPY (S_ADD_I32 x, frameindex)
82+ // /
83+ // / => %vgpr = V_ADD_U32 x, frameindex
84+ bool foldCopyToVGPROfScalarAddOfFrameIndex (Register DstReg, Register SrcReg,
85+ MachineInstr &MI) const ;
86+
8187 bool updateOperand (FoldCandidate &Fold) const ;
8288
8389 bool canUseImmWithOpSel (FoldCandidate &Fold) const ;
@@ -224,6 +230,67 @@ bool SIFoldOperandsImpl::frameIndexMayFold(
224230 return OpNo == VIdx && SIdx == -1 ;
225231}
226232
233+ // / Fold %vgpr = COPY (S_ADD_I32 x, frameindex)
234+ // /
235+ // / => %vgpr = V_ADD_U32 x, frameindex
236+ bool SIFoldOperandsImpl::foldCopyToVGPROfScalarAddOfFrameIndex (
237+ Register DstReg, Register SrcReg, MachineInstr &MI) const {
238+ if (TRI->isVGPR (*MRI, DstReg) && TRI->isSGPRReg (*MRI, SrcReg) &&
239+ MRI->hasOneNonDBGUse (SrcReg)) {
240+ MachineInstr *Def = MRI->getVRegDef (SrcReg);
241+ if (Def && Def->getOpcode () == AMDGPU::S_ADD_I32 &&
242+ Def->getOperand (3 ).isDead ()) {
243+ MachineOperand *Src0 = &Def->getOperand (1 );
244+ MachineOperand *Src1 = &Def->getOperand (2 );
245+
246+ // TODO: This is profitable with more operand types, and for more
247+ // opcodes. But ultimately this is working around poor / nonexistent
248+ // regbankselect.
249+ if (!Src0->isFI () && !Src1->isFI ())
250+ return false ;
251+
252+ if (Src0->isFI ())
253+ std::swap (Src0, Src1);
254+
255+ MachineBasicBlock *MBB = Def->getParent ();
256+ const DebugLoc &DL = Def->getDebugLoc ();
257+ if (ST->hasAddNoCarry ()) {
258+ bool UseVOP3 = !Src0->isImm () || TII->isInlineConstant (*Src0);
259+ MachineInstrBuilder Add =
260+ BuildMI (*MBB, *Def, DL,
261+ TII->get (UseVOP3 ? AMDGPU::V_ADD_U32_e64
262+ : AMDGPU::V_ADD_U32_e32),
263+ DstReg)
264+ .add (*Src0)
265+ .add (*Src1)
266+ .setMIFlags (Def->getFlags ());
267+ if (UseVOP3)
268+ Add.addImm (0 );
269+
270+ Def->eraseFromParent ();
271+ MI.eraseFromParent ();
272+ return true ;
273+ }
274+
275+ MachineBasicBlock::LivenessQueryResult Liveness =
276+ MBB->computeRegisterLiveness (TRI, AMDGPU::VCC, *Def, 16 );
277+ if (Liveness == MachineBasicBlock::LQR_Dead) {
278+ // TODO: If src1 satisfies operand constraints, use vop3 version.
279+ BuildMI (*MBB, *Def, DL, TII->get (AMDGPU::V_ADD_CO_U32_e32), DstReg)
280+ .add (*Src0)
281+ .add (*Src1)
282+ .setOperandDead (3 ) // implicit-def $vcc
283+ .setMIFlags (Def->getFlags ());
284+ Def->eraseFromParent ();
285+ MI.eraseFromParent ();
286+ return true ;
287+ }
288+ }
289+ }
290+
291+ return false ;
292+ }
293+
227294FunctionPass *llvm::createSIFoldOperandsLegacyPass () {
228295 return new SIFoldOperandsLegacy ();
229296}
@@ -1470,9 +1537,10 @@ bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &MI,
14701537
14711538bool SIFoldOperandsImpl::tryFoldFoldableCopy (
14721539 MachineInstr &MI, MachineOperand *&CurrentKnownM0Val) const {
1540+ Register DstReg = MI.getOperand (0 ).getReg ();
14731541 // Specially track simple redefs of m0 to the same value in a block, so we
14741542 // can erase the later ones.
1475- if (MI. getOperand ( 0 ). getReg () == AMDGPU::M0) {
1543+ if (DstReg == AMDGPU::M0) {
14761544 MachineOperand &NewM0Val = MI.getOperand (1 );
14771545 if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo (NewM0Val)) {
14781546 MI.eraseFromParent ();
@@ -1504,13 +1572,17 @@ bool SIFoldOperandsImpl::tryFoldFoldableCopy(
15041572 if (OpToFold.isReg () && !OpToFold.getReg ().isVirtual ())
15051573 return false ;
15061574
1575+ if (OpToFold.isReg () &&
1576+ foldCopyToVGPROfScalarAddOfFrameIndex (DstReg, OpToFold.getReg (), MI))
1577+ return true ;
1578+
15071579 // Prevent folding operands backwards in the function. For example,
15081580 // the COPY opcode must not be replaced by 1 in this example:
15091581 //
15101582 // %3 = COPY %vgpr0; VGPR_32:%3
15111583 // ...
15121584 // %vgpr0 = V_MOV_B32_e32 1, implicit %exec
1513- if (!MI. getOperand ( 0 ). getReg () .isVirtual ())
1585+ if (!DstReg .isVirtual ())
15141586 return false ;
15151587
15161588 bool Changed = foldInstOperand (MI, OpToFold);
0 commit comments