@@ -2612,299 +2612,6 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
26122612 MI->eraseFromParent ();
26132613 return true ;
26142614 }
2615- case AMDGPU::V_ADD_U32_e32:
2616- case AMDGPU::V_ADD_U32_e64:
2617- case AMDGPU::V_ADD_CO_U32_e32:
2618- case AMDGPU::V_ADD_CO_U32_e64: {
2619- // TODO: Handle sub, and, or.
2620- unsigned NumDefs = MI->getNumExplicitDefs ();
2621- unsigned Src0Idx = NumDefs;
2622-
2623- bool HasClamp = false ;
2624- MachineOperand *VCCOp = nullptr ;
2625-
2626- switch (MI->getOpcode ()) {
2627- case AMDGPU::V_ADD_U32_e32:
2628- break ;
2629- case AMDGPU::V_ADD_U32_e64:
2630- HasClamp = MI->getOperand (3 ).getImm ();
2631- break ;
2632- case AMDGPU::V_ADD_CO_U32_e32:
2633- VCCOp = &MI->getOperand (3 );
2634- break ;
2635- case AMDGPU::V_ADD_CO_U32_e64:
2636- VCCOp = &MI->getOperand (1 );
2637- HasClamp = MI->getOperand (4 ).getImm ();
2638- break ;
2639- default :
2640- break ;
2641- }
2642- bool DeadVCC = !VCCOp || VCCOp->isDead ();
2643- MachineOperand &DstOp = MI->getOperand (0 );
2644- Register DstReg = DstOp.getReg ();
2645-
2646- unsigned OtherOpIdx =
2647- FIOperandNum == Src0Idx ? FIOperandNum + 1 : Src0Idx;
2648- MachineOperand *OtherOp = &MI->getOperand (OtherOpIdx);
2649-
2650- unsigned Src1Idx = Src0Idx + 1 ;
2651- Register MaterializedReg = FrameReg;
2652- Register ScavengedVGPR;
2653-
2654- if (FrameReg && !ST.enableFlatScratch ()) {
2655- // We should just do an in-place update of the result register. However,
2656- // the value there may also be used by the add, in which case we need a
2657- // temporary register.
2658- //
2659- // FIXME: The scavenger is not finding the result register in the
2660- // common case where the add does not read the register.
2661-
2662- ScavengedVGPR = RS->scavengeRegisterBackwards (
2663- AMDGPU::VGPR_32RegClass, MI, /* RestoreAfter=*/ false , /* SPAdj=*/ 0 );
2664-
2665- // TODO: If we have a free SGPR, it's sometimes better to use a scalar
2666- // shift.
2667- BuildMI (*MBB, *MI, DL, TII->get (AMDGPU::V_LSHRREV_B32_e64))
2668- .addDef (ScavengedVGPR, RegState::Renamable)
2669- .addImm (ST.getWavefrontSizeLog2 ())
2670- .addReg (FrameReg);
2671- MaterializedReg = ScavengedVGPR;
2672- }
2673-
2674- int64_t Offset = FrameInfo.getObjectOffset (Index);
2675- // For the non-immediate case, we could fall through to the default
2676- // handling, but we do an in-place update of the result register here to
2677- // avoid scavenging another register.
2678- if (OtherOp->isImm ()) {
2679- OtherOp->setImm (OtherOp->getImm () + Offset);
2680- Offset = 0 ;
2681- }
2682-
2683- if ((!OtherOp->isImm () || OtherOp->getImm () != 0 ) && MaterializedReg) {
2684- if (ST.enableFlatScratch () &&
2685- !TII->isOperandLegal (*MI, Src1Idx, OtherOp)) {
2686- // We didn't need the shift above, so we have an SGPR for the frame
2687- // register, but may have a VGPR only operand.
2688- //
2689- // TODO: On gfx10+, we can easily change the opcode to the e64 version
2690- // and use the higher constant bus restriction to avoid this copy.
2691-
2692- if (!ScavengedVGPR) {
2693- ScavengedVGPR = RS->scavengeRegisterBackwards (
2694- AMDGPU::VGPR_32RegClass, MI, /* RestoreAfter=*/ false ,
2695- /* SPAdj=*/ 0 );
2696- }
2697-
2698- assert (ScavengedVGPR != DstReg);
2699-
2700- BuildMI (*MBB, *MI, DL, TII->get (AMDGPU::V_MOV_B32_e32), ScavengedVGPR)
2701- .addReg (MaterializedReg,
2702- MaterializedReg != FrameReg ? RegState::Kill : 0 );
2703- MaterializedReg = ScavengedVGPR;
2704- }
2705-
2706- // TODO: In the flat scratch case, if this is an add of an SGPR, and SCC
2707- // is not live, we could use a scalar add + vector add instead of 2
2708- // vector adds.
2709- auto AddI32 = BuildMI (*MBB, *MI, DL, TII->get (MI->getOpcode ()))
2710- .addDef (DstReg, RegState::Renamable);
2711- if (NumDefs == 2 )
2712- AddI32.add (MI->getOperand (1 ));
2713-
2714- unsigned MaterializedRegFlags =
2715- MaterializedReg != FrameReg ? RegState::Kill : 0 ;
2716-
2717- if (isVGPRClass (getPhysRegBaseClass (MaterializedReg))) {
2718- // If we know we have a VGPR already, it's more likely the other
2719- // operand is a legal vsrc0.
2720- AddI32
2721- .add (*OtherOp)
2722- .addReg (MaterializedReg, MaterializedRegFlags);
2723- } else {
2724- // Commute operands to avoid violating VOP2 restrictions. This will
2725- // typically happen when using scratch.
2726- AddI32
2727- .addReg (MaterializedReg, MaterializedRegFlags)
2728- .add (*OtherOp);
2729- }
2730-
2731- if (MI->getOpcode () == AMDGPU::V_ADD_CO_U32_e64 ||
2732- MI->getOpcode () == AMDGPU::V_ADD_U32_e64)
2733- AddI32.addImm (0 ); // clamp
2734-
2735- if (MI->getOpcode () == AMDGPU::V_ADD_CO_U32_e32)
2736- AddI32.setOperandDead (3 ); // Dead vcc
2737-
2738- MaterializedReg = DstReg;
2739-
2740- OtherOp->ChangeToRegister (MaterializedReg, false );
2741- OtherOp->setIsKill (true );
2742- FIOp->ChangeToImmediate (Offset);
2743- Offset = 0 ;
2744- } else if (Offset != 0 ) {
2745- assert (!MaterializedReg);
2746- FIOp->ChangeToImmediate (Offset);
2747- Offset = 0 ;
2748- } else {
2749- if (DeadVCC && !HasClamp) {
2750- assert (Offset == 0 );
2751-
2752- // TODO: Losing kills and implicit operands. Just mutate to copy and
2753- // let lowerCopy deal with it?
2754- if (OtherOp->isReg () && OtherOp->getReg () == DstReg) {
2755- // Folded to an identity copy.
2756- MI->eraseFromParent ();
2757- return true ;
2758- }
2759-
2760- // The immediate value should be in OtherOp
2761- MI->setDesc (TII->get (AMDGPU::V_MOV_B32_e32));
2762- MI->removeOperand (FIOperandNum);
2763-
2764- unsigned NumOps = MI->getNumOperands ();
2765- for (unsigned I = NumOps - 2 ; I >= 2 ; --I)
2766- MI->removeOperand (I);
2767-
2768- if (NumDefs == 2 )
2769- MI->removeOperand (1 );
2770-
2771- // The code below can't deal with a mov.
2772- return true ;
2773- }
2774-
2775- // This folded to a constant, but we have to keep the add around for
2776- // pointless implicit defs or clamp modifier.
2777- FIOp->ChangeToImmediate (0 );
2778- }
2779-
2780- // Try to improve legality by commuting.
2781- if (!TII->isOperandLegal (*MI, Src1Idx) && TII->commuteInstruction (*MI)) {
2782- std::swap (FIOp, OtherOp);
2783- std::swap (FIOperandNum, OtherOpIdx);
2784- }
2785-
2786- for (unsigned SrcIdx : {Src1Idx, Src0Idx}) {
2787- // Depending on operand constraints we may need to insert another copy.
2788- if (!TII->isOperandLegal (*MI, SrcIdx)) {
2789- // If commuting didn't make the operands legal, we need to materialize
2790- // in a register.
2791- // TODO: Can use SGPR on gfx10+ in some cases.
2792- if (!ScavengedVGPR) {
2793- ScavengedVGPR = RS->scavengeRegisterBackwards (
2794- AMDGPU::VGPR_32RegClass, MI, /* RestoreAfter=*/ false ,
2795- /* SPAdj=*/ 0 );
2796- }
2797-
2798- assert (ScavengedVGPR != DstReg);
2799-
2800- MachineOperand &Src = MI->getOperand (SrcIdx);
2801- BuildMI (*MBB, *MI, DL, TII->get (AMDGPU::V_MOV_B32_e32), ScavengedVGPR)
2802- .add (Src);
2803-
2804- Src.ChangeToRegister (ScavengedVGPR, false );
2805- Src.setIsKill (true );
2806- }
2807- }
2808-
2809- // Fold out add of 0 case that can appear in kernels.
2810- if (FIOp->isImm () && FIOp->getImm () == 0 && DeadVCC && !HasClamp) {
2811- if (OtherOp->isReg () && OtherOp->getReg () != DstReg) {
2812- BuildMI (*MBB, *MI, DL, TII->get (AMDGPU::COPY), DstReg).add (*OtherOp);
2813- }
2814-
2815- MI->eraseFromParent ();
2816- }
2817-
2818- return true ;
2819- }
2820- case AMDGPU::S_ADD_I32: {
2821- // TODO: Handle s_or_b32, s_and_b32.
2822- unsigned OtherOpIdx = FIOperandNum == 1 ? 2 : 1 ;
2823- MachineOperand &OtherOp = MI->getOperand (OtherOpIdx);
2824-
2825- assert (FrameReg || MFI->isBottomOfStack ());
2826-
2827- MachineOperand &DstOp = MI->getOperand (0 );
2828- const DebugLoc &DL = MI->getDebugLoc ();
2829- Register MaterializedReg = FrameReg;
2830-
2831- // Defend against live scc, which should never happen in practice.
2832- bool DeadSCC = MI->getOperand (3 ).isDead ();
2833-
2834- Register TmpReg;
2835-
2836- if (FrameReg && !ST.enableFlatScratch ()) {
2837- // FIXME: In the common case where the add does not also read its result
2838- // (i.e. this isn't a reg += fi), it's not finding the dest reg as
2839- // available.
2840- TmpReg = RS->scavengeRegisterBackwards (AMDGPU::SReg_32_XM0RegClass, MI,
2841- false , 0 );
2842- BuildMI (*MBB, *MI, DL, TII->get (AMDGPU::S_LSHR_B32))
2843- .addDef (TmpReg, RegState::Renamable)
2844- .addReg (FrameReg)
2845- .addImm (ST.getWavefrontSizeLog2 ())
2846- .setOperandDead (3 ); // Set SCC dead
2847- MaterializedReg = TmpReg;
2848- }
2849-
2850- int64_t Offset = FrameInfo.getObjectOffset (Index);
2851-
2852- // For the non-immediate case, we could fall through to the default
2853- // handling, but we do an in-place update of the result register here to
2854- // avoid scavenging another register.
2855- if (OtherOp.isImm ()) {
2856- OtherOp.setImm (OtherOp.getImm () + Offset);
2857- Offset = 0 ;
2858-
2859- if (MaterializedReg)
2860- FIOp->ChangeToRegister (MaterializedReg, false );
2861- else
2862- FIOp->ChangeToImmediate (0 );
2863- } else if (MaterializedReg) {
2864- // If we can't fold the other operand, do another increment.
2865- Register DstReg = DstOp.getReg ();
2866-
2867- if (!TmpReg && MaterializedReg == FrameReg) {
2868- TmpReg = RS->scavengeRegisterBackwards (AMDGPU::SReg_32_XM0RegClass,
2869- MI, false , 0 );
2870- DstReg = TmpReg;
2871- }
2872-
2873- auto AddI32 = BuildMI (*MBB, *MI, DL, TII->get (AMDGPU::S_ADD_I32))
2874- .addDef (DstReg, RegState::Renamable)
2875- .addReg (MaterializedReg, RegState::Kill)
2876- .add (OtherOp);
2877- if (DeadSCC)
2878- AddI32.setOperandDead (3 );
2879-
2880- MaterializedReg = DstReg;
2881-
2882- OtherOp.ChangeToRegister (MaterializedReg, false );
2883- OtherOp.setIsKill (true );
2884- OtherOp.setIsRenamable (true );
2885- FIOp->ChangeToImmediate (Offset);
2886- } else {
2887- // If we don't have any other offset to apply, we can just directly
2888- // interpret the frame index as the offset.
2889- FIOp->ChangeToImmediate (Offset);
2890- }
2891-
2892- if (DeadSCC && OtherOp.isImm () && OtherOp.getImm () == 0 ) {
2893- assert (Offset == 0 );
2894- MI->removeOperand (3 );
2895- MI->removeOperand (OtherOpIdx);
2896- MI->setDesc (TII->get (FIOp->isReg () ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
2897- } else if (DeadSCC && FIOp->isImm () && FIOp->getImm () == 0 ) {
2898- assert (Offset == 0 );
2899- MI->removeOperand (3 );
2900- MI->removeOperand (FIOperandNum);
2901- MI->setDesc (
2902- TII->get (OtherOp.isReg () ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
2903- }
2904-
2905- assert (!FIOp->isFI ());
2906- return true ;
2907- }
29082615 default : {
29092616 // Other access to frame index
29102617 const DebugLoc &DL = MI->getDebugLoc ();
0 commit comments