@@ -2268,7 +2268,70 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
22682268 MI->eraseFromParent ();
22692269 return true ;
22702270 }
2271+ case AMDGPU::S_ADD_I32: {
2272+ // TODO: Handle s_or_b32, s_and_b32.
2273+ MachineOperand &OtherOp = MI->getOperand (FIOperandNum == 1 ? 2 : 1 );
22712274
2275+ assert (FrameReg || MFI->isBottomOfStack ());
2276+
2277+ MachineOperand &DstOp = MI->getOperand (0 );
2278+ const DebugLoc &DL = MI->getDebugLoc ();
2279+ Register MaterializedReg = FrameReg;
2280+
2281+ // Defend against live scc, which should never happen in practice.
2282+ bool DeadSCC = MI->getOperand (3 ).isDead ();
2283+
2284+ // Do an in-place scale of the wave offset to the lane offset.
2285+ if (FrameReg && !ST.enableFlatScratch ()) {
2286+ BuildMI (*MBB, *MI, DL, TII->get (AMDGPU::S_LSHR_B32))
2287+ .addDef (DstOp.getReg (), RegState::Renamable)
2288+ .addReg (FrameReg)
2289+ .addImm (ST.getWavefrontSizeLog2 ())
2290+ .setOperandDead (3 ); // Set SCC dead
2291+ MaterializedReg = DstOp.getReg ();
2292+ }
2293+
2294+ // If we can't fold the other operand, do another increment.
2295+ if (!OtherOp.isImm () && MaterializedReg) {
2296+ auto AddI32 = BuildMI (*MBB, *MI, DL, TII->get (AMDGPU::S_ADD_I32))
2297+ .addDef (DstOp.getReg (), RegState::Renamable)
2298+ .addReg (MaterializedReg)
2299+ .add (OtherOp);
2300+ if (DeadSCC)
2301+ AddI32.setOperandDead (3 );
2302+ MaterializedReg = DstOp.getReg ();
2303+ }
2304+
2305+ int64_t NewOffset = FrameInfo.getObjectOffset (Index);
2306+
2307+ // For the non-immediate case, we could fall through to the default
2308+ // handling, but we do an in-place update of the result register here to
2309+ // avoid scavenging another register.
2310+ if (OtherOp.isImm ())
2311+ NewOffset += OtherOp.getImm ();
2312+
2313+ if (NewOffset == 0 && DeadSCC) {
2314+ MI->eraseFromParent ();
2315+ } else if (!MaterializedReg && OtherOp.isImm ()) {
2316+ // In a kernel, the address should just be an immediate.
2317+ // SCC should really be dead, but preserve the def just in case it
2318+ // isn't.
2319+ if (DeadSCC)
2320+ MI->removeOperand (3 );
2321+ else
2322+ MI->getOperand (3 ).setIsDef (true );
2323+
2324+ MI->removeOperand (2 );
2325+ MI->getOperand (1 ).ChangeToImmediate (NewOffset);
2326+ MI->setDesc (TII->get (AMDGPU::S_MOV_B32));
2327+ } else {
2328+ if (MaterializedReg)
2329+ OtherOp.ChangeToRegister (MaterializedReg, false );
2330+ FIOp.ChangeToImmediate (NewOffset);
2331+ }
2332+
2333+ return true ;
2334+ }
22722335 default : {
22732336 // Other access to frame index
22742337 const DebugLoc &DL = MI->getDebugLoc ();
0 commit comments