@@ -2445,299 +2445,6 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
24452445 MI->eraseFromParent ();
24462446 return true ;
24472447 }
2448- case AMDGPU::V_ADD_U32_e32:
2449- case AMDGPU::V_ADD_U32_e64:
2450- case AMDGPU::V_ADD_CO_U32_e32:
2451- case AMDGPU::V_ADD_CO_U32_e64: {
2452- // TODO: Handle sub, and, or.
2453- unsigned NumDefs = MI->getNumExplicitDefs ();
2454- unsigned Src0Idx = NumDefs;
2455-
2456- bool HasClamp = false ;
2457- MachineOperand *VCCOp = nullptr ;
2458-
2459- switch (MI->getOpcode ()) {
2460- case AMDGPU::V_ADD_U32_e32:
2461- break ;
2462- case AMDGPU::V_ADD_U32_e64:
2463- HasClamp = MI->getOperand (3 ).getImm ();
2464- break ;
2465- case AMDGPU::V_ADD_CO_U32_e32:
2466- VCCOp = &MI->getOperand (3 );
2467- break ;
2468- case AMDGPU::V_ADD_CO_U32_e64:
2469- VCCOp = &MI->getOperand (1 );
2470- HasClamp = MI->getOperand (4 ).getImm ();
2471- break ;
2472- default :
2473- break ;
2474- }
2475- bool DeadVCC = !VCCOp || VCCOp->isDead ();
2476- MachineOperand &DstOp = MI->getOperand (0 );
2477- Register DstReg = DstOp.getReg ();
2478-
2479- unsigned OtherOpIdx =
2480- FIOperandNum == Src0Idx ? FIOperandNum + 1 : Src0Idx;
2481- MachineOperand *OtherOp = &MI->getOperand (OtherOpIdx);
2482-
2483- unsigned Src1Idx = Src0Idx + 1 ;
2484- Register MaterializedReg = FrameReg;
2485- Register ScavengedVGPR;
2486-
2487- if (FrameReg && !ST.enableFlatScratch ()) {
2488- // We should just do an in-place update of the result register. However,
2489- // the value there may also be used by the add, in which case we need a
2490- // temporary register.
2491- //
2492- // FIXME: The scavenger is not finding the result register in the
2493- // common case where the add does not read the register.
2494-
2495- ScavengedVGPR = RS->scavengeRegisterBackwards (
2496- AMDGPU::VGPR_32RegClass, MI, /* RestoreAfter=*/ false , /* SPAdj=*/ 0 );
2497-
2498- // TODO: If we have a free SGPR, it's sometimes better to use a scalar
2499- // shift.
2500- BuildMI (*MBB, *MI, DL, TII->get (AMDGPU::V_LSHRREV_B32_e64))
2501- .addDef (ScavengedVGPR, RegState::Renamable)
2502- .addImm (ST.getWavefrontSizeLog2 ())
2503- .addReg (FrameReg);
2504- MaterializedReg = ScavengedVGPR;
2505- }
2506-
2507- int64_t Offset = FrameInfo.getObjectOffset (Index);
2508- // For the non-immediate case, we could fall through to the default
2509- // handling, but we do an in-place update of the result register here to
2510- // avoid scavenging another register.
2511- if (OtherOp->isImm ()) {
2512- OtherOp->setImm (OtherOp->getImm () + Offset);
2513- Offset = 0 ;
2514- }
2515-
2516- if ((!OtherOp->isImm () || OtherOp->getImm () != 0 ) && MaterializedReg) {
2517- if (ST.enableFlatScratch () &&
2518- !TII->isOperandLegal (*MI, Src1Idx, OtherOp)) {
2519- // We didn't need the shift above, so we have an SGPR for the frame
2520- // register, but may have a VGPR only operand.
2521- //
2522- // TODO: On gfx10+, we can easily change the opcode to the e64 version
2523- // and use the higher constant bus restriction to avoid this copy.
2524-
2525- if (!ScavengedVGPR) {
2526- ScavengedVGPR = RS->scavengeRegisterBackwards (
2527- AMDGPU::VGPR_32RegClass, MI, /* RestoreAfter=*/ false ,
2528- /* SPAdj=*/ 0 );
2529- }
2530-
2531- assert (ScavengedVGPR != DstReg);
2532-
2533- BuildMI (*MBB, *MI, DL, TII->get (AMDGPU::V_MOV_B32_e32), ScavengedVGPR)
2534- .addReg (MaterializedReg,
2535- MaterializedReg != FrameReg ? RegState::Kill : 0 );
2536- MaterializedReg = ScavengedVGPR;
2537- }
2538-
2539- // TODO: In the flat scratch case, if this is an add of an SGPR, and SCC
2540- // is not live, we could use a scalar add + vector add instead of 2
2541- // vector adds.
2542- auto AddI32 = BuildMI (*MBB, *MI, DL, TII->get (MI->getOpcode ()))
2543- .addDef (DstReg, RegState::Renamable);
2544- if (NumDefs == 2 )
2545- AddI32.add (MI->getOperand (1 ));
2546-
2547- unsigned MaterializedRegFlags =
2548- MaterializedReg != FrameReg ? RegState::Kill : 0 ;
2549-
2550- if (isVGPRClass (getPhysRegBaseClass (MaterializedReg))) {
2551- // If we know we have a VGPR already, it's more likely the other
2552- // operand is a legal vsrc0.
2553- AddI32
2554- .add (*OtherOp)
2555- .addReg (MaterializedReg, MaterializedRegFlags);
2556- } else {
2557- // Commute operands to avoid violating VOP2 restrictions. This will
2558- // typically happen when using scratch.
2559- AddI32
2560- .addReg (MaterializedReg, MaterializedRegFlags)
2561- .add (*OtherOp);
2562- }
2563-
2564- if (MI->getOpcode () == AMDGPU::V_ADD_CO_U32_e64 ||
2565- MI->getOpcode () == AMDGPU::V_ADD_U32_e64)
2566- AddI32.addImm (0 ); // clamp
2567-
2568- if (MI->getOpcode () == AMDGPU::V_ADD_CO_U32_e32)
2569- AddI32.setOperandDead (3 ); // Dead vcc
2570-
2571- MaterializedReg = DstReg;
2572-
2573- OtherOp->ChangeToRegister (MaterializedReg, false );
2574- OtherOp->setIsKill (true );
2575- FIOp->ChangeToImmediate (Offset);
2576- Offset = 0 ;
2577- } else if (Offset != 0 ) {
2578- assert (!MaterializedReg);
2579- FIOp->ChangeToImmediate (Offset);
2580- Offset = 0 ;
2581- } else {
2582- if (DeadVCC && !HasClamp) {
2583- assert (Offset == 0 );
2584-
2585- // TODO: Losing kills and implicit operands. Just mutate to copy and
2586- // let lowerCopy deal with it?
2587- if (OtherOp->isReg () && OtherOp->getReg () == DstReg) {
2588- // Folded to an identity copy.
2589- MI->eraseFromParent ();
2590- return true ;
2591- }
2592-
2593- // The immediate value should be in OtherOp
2594- MI->setDesc (TII->get (AMDGPU::V_MOV_B32_e32));
2595- MI->removeOperand (FIOperandNum);
2596-
2597- unsigned NumOps = MI->getNumOperands ();
2598- for (unsigned I = NumOps - 2 ; I >= 2 ; --I)
2599- MI->removeOperand (I);
2600-
2601- if (NumDefs == 2 )
2602- MI->removeOperand (1 );
2603-
2604- // The code below can't deal with a mov.
2605- return true ;
2606- }
2607-
2608- // This folded to a constant, but we have to keep the add around for
2609- // pointless implicit defs or clamp modifier.
2610- FIOp->ChangeToImmediate (0 );
2611- }
2612-
2613- // Try to improve legality by commuting.
2614- if (!TII->isOperandLegal (*MI, Src1Idx) && TII->commuteInstruction (*MI)) {
2615- std::swap (FIOp, OtherOp);
2616- std::swap (FIOperandNum, OtherOpIdx);
2617- }
2618-
2619- for (unsigned SrcIdx : {Src1Idx, Src0Idx}) {
2620- // Depending on operand constraints we may need to insert another copy.
2621- if (!TII->isOperandLegal (*MI, SrcIdx)) {
2622- // If commuting didn't make the operands legal, we need to materialize
2623- // in a register.
2624- // TODO: Can use SGPR on gfx10+ in some cases.
2625- if (!ScavengedVGPR) {
2626- ScavengedVGPR = RS->scavengeRegisterBackwards (
2627- AMDGPU::VGPR_32RegClass, MI, /* RestoreAfter=*/ false ,
2628- /* SPAdj=*/ 0 );
2629- }
2630-
2631- assert (ScavengedVGPR != DstReg);
2632-
2633- MachineOperand &Src = MI->getOperand (SrcIdx);
2634- BuildMI (*MBB, *MI, DL, TII->get (AMDGPU::V_MOV_B32_e32), ScavengedVGPR)
2635- .add (Src);
2636-
2637- Src.ChangeToRegister (ScavengedVGPR, false );
2638- Src.setIsKill (true );
2639- }
2640- }
2641-
2642- // Fold out add of 0 case that can appear in kernels.
2643- if (FIOp->isImm () && FIOp->getImm () == 0 && DeadVCC && !HasClamp) {
2644- if (OtherOp->isReg () && OtherOp->getReg () != DstReg) {
2645- BuildMI (*MBB, *MI, DL, TII->get (AMDGPU::COPY), DstReg).add (*OtherOp);
2646- }
2647-
2648- MI->eraseFromParent ();
2649- }
2650-
2651- return true ;
2652- }
2653- case AMDGPU::S_ADD_I32: {
2654- // TODO: Handle s_or_b32, s_and_b32.
2655- unsigned OtherOpIdx = FIOperandNum == 1 ? 2 : 1 ;
2656- MachineOperand &OtherOp = MI->getOperand (OtherOpIdx);
2657-
2658- assert (FrameReg || MFI->isBottomOfStack ());
2659-
2660- MachineOperand &DstOp = MI->getOperand (0 );
2661- const DebugLoc &DL = MI->getDebugLoc ();
2662- Register MaterializedReg = FrameReg;
2663-
2664- // Defend against live scc, which should never happen in practice.
2665- bool DeadSCC = MI->getOperand (3 ).isDead ();
2666-
2667- Register TmpReg;
2668-
2669- if (FrameReg && !ST.enableFlatScratch ()) {
2670- // FIXME: In the common case where the add does not also read its result
2671- // (i.e. this isn't a reg += fi), it's not finding the dest reg as
2672- // available.
2673- TmpReg = RS->scavengeRegisterBackwards (AMDGPU::SReg_32_XM0RegClass, MI,
2674- false , 0 );
2675- BuildMI (*MBB, *MI, DL, TII->get (AMDGPU::S_LSHR_B32))
2676- .addDef (TmpReg, RegState::Renamable)
2677- .addReg (FrameReg)
2678- .addImm (ST.getWavefrontSizeLog2 ())
2679- .setOperandDead (3 ); // Set SCC dead
2680- MaterializedReg = TmpReg;
2681- }
2682-
2683- int64_t Offset = FrameInfo.getObjectOffset (Index);
2684-
2685- // For the non-immediate case, we could fall through to the default
2686- // handling, but we do an in-place update of the result register here to
2687- // avoid scavenging another register.
2688- if (OtherOp.isImm ()) {
2689- OtherOp.setImm (OtherOp.getImm () + Offset);
2690- Offset = 0 ;
2691-
2692- if (MaterializedReg)
2693- FIOp->ChangeToRegister (MaterializedReg, false );
2694- else
2695- FIOp->ChangeToImmediate (0 );
2696- } else if (MaterializedReg) {
2697- // If we can't fold the other operand, do another increment.
2698- Register DstReg = DstOp.getReg ();
2699-
2700- if (!TmpReg && MaterializedReg == FrameReg) {
2701- TmpReg = RS->scavengeRegisterBackwards (AMDGPU::SReg_32_XM0RegClass,
2702- MI, false , 0 );
2703- DstReg = TmpReg;
2704- }
2705-
2706- auto AddI32 = BuildMI (*MBB, *MI, DL, TII->get (AMDGPU::S_ADD_I32))
2707- .addDef (DstReg, RegState::Renamable)
2708- .addReg (MaterializedReg, RegState::Kill)
2709- .add (OtherOp);
2710- if (DeadSCC)
2711- AddI32.setOperandDead (3 );
2712-
2713- MaterializedReg = DstReg;
2714-
2715- OtherOp.ChangeToRegister (MaterializedReg, false );
2716- OtherOp.setIsKill (true );
2717- OtherOp.setIsRenamable (true );
2718- FIOp->ChangeToImmediate (Offset);
2719- } else {
2720- // If we don't have any other offset to apply, we can just directly
2721- // interpret the frame index as the offset.
2722- FIOp->ChangeToImmediate (Offset);
2723- }
2724-
2725- if (DeadSCC && OtherOp.isImm () && OtherOp.getImm () == 0 ) {
2726- assert (Offset == 0 );
2727- MI->removeOperand (3 );
2728- MI->removeOperand (OtherOpIdx);
2729- MI->setDesc (TII->get (FIOp->isReg () ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
2730- } else if (DeadSCC && FIOp->isImm () && FIOp->getImm () == 0 ) {
2731- assert (Offset == 0 );
2732- MI->removeOperand (3 );
2733- MI->removeOperand (FIOperandNum);
2734- MI->setDesc (
2735- TII->get (OtherOp.isReg () ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
2736- }
2737-
2738- assert (!FIOp->isFI ());
2739- return true ;
2740- }
27412448 default : {
27422449 // Other access to frame index
27432450 const DebugLoc &DL = MI->getDebugLoc ();
0 commit comments