Skip to content

Commit 9548705

Browse files
committed
Revert "AMDGPU: Handle folding frame indexes into s_add_i32 (llvm#101694)"
This reverts commit 8039886. breaks hip_on_rocclr build Change-Id: I37907f36632c22cecbd1d12efe9758e4f23b7ebf
1 parent 90c5c16 commit 9548705

17 files changed

+1337
-1152
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 0 additions & 293 deletions
Original file line numberDiff line numberDiff line change
@@ -2612,299 +2612,6 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
26122612
MI->eraseFromParent();
26132613
return true;
26142614
}
2615-
case AMDGPU::V_ADD_U32_e32:
2616-
case AMDGPU::V_ADD_U32_e64:
2617-
case AMDGPU::V_ADD_CO_U32_e32:
2618-
case AMDGPU::V_ADD_CO_U32_e64: {
2619-
// TODO: Handle sub, and, or.
2620-
unsigned NumDefs = MI->getNumExplicitDefs();
2621-
unsigned Src0Idx = NumDefs;
2622-
2623-
bool HasClamp = false;
2624-
MachineOperand *VCCOp = nullptr;
2625-
2626-
switch (MI->getOpcode()) {
2627-
case AMDGPU::V_ADD_U32_e32:
2628-
break;
2629-
case AMDGPU::V_ADD_U32_e64:
2630-
HasClamp = MI->getOperand(3).getImm();
2631-
break;
2632-
case AMDGPU::V_ADD_CO_U32_e32:
2633-
VCCOp = &MI->getOperand(3);
2634-
break;
2635-
case AMDGPU::V_ADD_CO_U32_e64:
2636-
VCCOp = &MI->getOperand(1);
2637-
HasClamp = MI->getOperand(4).getImm();
2638-
break;
2639-
default:
2640-
break;
2641-
}
2642-
bool DeadVCC = !VCCOp || VCCOp->isDead();
2643-
MachineOperand &DstOp = MI->getOperand(0);
2644-
Register DstReg = DstOp.getReg();
2645-
2646-
unsigned OtherOpIdx =
2647-
FIOperandNum == Src0Idx ? FIOperandNum + 1 : Src0Idx;
2648-
MachineOperand *OtherOp = &MI->getOperand(OtherOpIdx);
2649-
2650-
unsigned Src1Idx = Src0Idx + 1;
2651-
Register MaterializedReg = FrameReg;
2652-
Register ScavengedVGPR;
2653-
2654-
if (FrameReg && !ST.enableFlatScratch()) {
2655-
// We should just do an in-place update of the result register. However,
2656-
// the value there may also be used by the add, in which case we need a
2657-
// temporary register.
2658-
//
2659-
// FIXME: The scavenger is not finding the result register in the
2660-
// common case where the add does not read the register.
2661-
2662-
ScavengedVGPR = RS->scavengeRegisterBackwards(
2663-
AMDGPU::VGPR_32RegClass, MI, /*RestoreAfter=*/false, /*SPAdj=*/0);
2664-
2665-
// TODO: If we have a free SGPR, it's sometimes better to use a scalar
2666-
// shift.
2667-
BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64))
2668-
.addDef(ScavengedVGPR, RegState::Renamable)
2669-
.addImm(ST.getWavefrontSizeLog2())
2670-
.addReg(FrameReg);
2671-
MaterializedReg = ScavengedVGPR;
2672-
}
2673-
2674-
int64_t Offset = FrameInfo.getObjectOffset(Index);
2675-
// For the non-immediate case, we could fall through to the default
2676-
// handling, but we do an in-place update of the result register here to
2677-
// avoid scavenging another register.
2678-
if (OtherOp->isImm()) {
2679-
OtherOp->setImm(OtherOp->getImm() + Offset);
2680-
Offset = 0;
2681-
}
2682-
2683-
if ((!OtherOp->isImm() || OtherOp->getImm() != 0) && MaterializedReg) {
2684-
if (ST.enableFlatScratch() &&
2685-
!TII->isOperandLegal(*MI, Src1Idx, OtherOp)) {
2686-
// We didn't need the shift above, so we have an SGPR for the frame
2687-
// register, but may have a VGPR only operand.
2688-
//
2689-
// TODO: On gfx10+, we can easily change the opcode to the e64 version
2690-
// and use the higher constant bus restriction to avoid this copy.
2691-
2692-
if (!ScavengedVGPR) {
2693-
ScavengedVGPR = RS->scavengeRegisterBackwards(
2694-
AMDGPU::VGPR_32RegClass, MI, /*RestoreAfter=*/false,
2695-
/*SPAdj=*/0);
2696-
}
2697-
2698-
assert(ScavengedVGPR != DstReg);
2699-
2700-
BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), ScavengedVGPR)
2701-
.addReg(MaterializedReg,
2702-
MaterializedReg != FrameReg ? RegState::Kill : 0);
2703-
MaterializedReg = ScavengedVGPR;
2704-
}
2705-
2706-
// TODO: In the flat scratch case, if this is an add of an SGPR, and SCC
2707-
// is not live, we could use a scalar add + vector add instead of 2
2708-
// vector adds.
2709-
auto AddI32 = BuildMI(*MBB, *MI, DL, TII->get(MI->getOpcode()))
2710-
.addDef(DstReg, RegState::Renamable);
2711-
if (NumDefs == 2)
2712-
AddI32.add(MI->getOperand(1));
2713-
2714-
unsigned MaterializedRegFlags =
2715-
MaterializedReg != FrameReg ? RegState::Kill : 0;
2716-
2717-
if (isVGPRClass(getPhysRegBaseClass(MaterializedReg))) {
2718-
// If we know we have a VGPR already, it's more likely the other
2719-
// operand is a legal vsrc0.
2720-
AddI32
2721-
.add(*OtherOp)
2722-
.addReg(MaterializedReg, MaterializedRegFlags);
2723-
} else {
2724-
// Commute operands to avoid violating VOP2 restrictions. This will
2725-
// typically happen when using scratch.
2726-
AddI32
2727-
.addReg(MaterializedReg, MaterializedRegFlags)
2728-
.add(*OtherOp);
2729-
}
2730-
2731-
if (MI->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 ||
2732-
MI->getOpcode() == AMDGPU::V_ADD_U32_e64)
2733-
AddI32.addImm(0); // clamp
2734-
2735-
if (MI->getOpcode() == AMDGPU::V_ADD_CO_U32_e32)
2736-
AddI32.setOperandDead(3); // Dead vcc
2737-
2738-
MaterializedReg = DstReg;
2739-
2740-
OtherOp->ChangeToRegister(MaterializedReg, false);
2741-
OtherOp->setIsKill(true);
2742-
FIOp->ChangeToImmediate(Offset);
2743-
Offset = 0;
2744-
} else if (Offset != 0) {
2745-
assert(!MaterializedReg);
2746-
FIOp->ChangeToImmediate(Offset);
2747-
Offset = 0;
2748-
} else {
2749-
if (DeadVCC && !HasClamp) {
2750-
assert(Offset == 0);
2751-
2752-
// TODO: Losing kills and implicit operands. Just mutate to copy and
2753-
// let lowerCopy deal with it?
2754-
if (OtherOp->isReg() && OtherOp->getReg() == DstReg) {
2755-
// Folded to an identity copy.
2756-
MI->eraseFromParent();
2757-
return true;
2758-
}
2759-
2760-
// The immediate value should be in OtherOp
2761-
MI->setDesc(TII->get(AMDGPU::V_MOV_B32_e32));
2762-
MI->removeOperand(FIOperandNum);
2763-
2764-
unsigned NumOps = MI->getNumOperands();
2765-
for (unsigned I = NumOps - 2; I >= 2; --I)
2766-
MI->removeOperand(I);
2767-
2768-
if (NumDefs == 2)
2769-
MI->removeOperand(1);
2770-
2771-
// The code below can't deal with a mov.
2772-
return true;
2773-
}
2774-
2775-
// This folded to a constant, but we have to keep the add around for
2776-
// pointless implicit defs or clamp modifier.
2777-
FIOp->ChangeToImmediate(0);
2778-
}
2779-
2780-
// Try to improve legality by commuting.
2781-
if (!TII->isOperandLegal(*MI, Src1Idx) && TII->commuteInstruction(*MI)) {
2782-
std::swap(FIOp, OtherOp);
2783-
std::swap(FIOperandNum, OtherOpIdx);
2784-
}
2785-
2786-
for (unsigned SrcIdx : {Src1Idx, Src0Idx}) {
2787-
// Depending on operand constraints we may need to insert another copy.
2788-
if (!TII->isOperandLegal(*MI, SrcIdx)) {
2789-
// If commuting didn't make the operands legal, we need to materialize
2790-
// in a register.
2791-
// TODO: Can use SGPR on gfx10+ in some cases.
2792-
if (!ScavengedVGPR) {
2793-
ScavengedVGPR = RS->scavengeRegisterBackwards(
2794-
AMDGPU::VGPR_32RegClass, MI, /*RestoreAfter=*/false,
2795-
/*SPAdj=*/0);
2796-
}
2797-
2798-
assert(ScavengedVGPR != DstReg);
2799-
2800-
MachineOperand &Src = MI->getOperand(SrcIdx);
2801-
BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), ScavengedVGPR)
2802-
.add(Src);
2803-
2804-
Src.ChangeToRegister(ScavengedVGPR, false);
2805-
Src.setIsKill(true);
2806-
}
2807-
}
2808-
2809-
// Fold out add of 0 case that can appear in kernels.
2810-
if (FIOp->isImm() && FIOp->getImm() == 0 && DeadVCC && !HasClamp) {
2811-
if (OtherOp->isReg() && OtherOp->getReg() != DstReg) {
2812-
BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::COPY), DstReg).add(*OtherOp);
2813-
}
2814-
2815-
MI->eraseFromParent();
2816-
}
2817-
2818-
return true;
2819-
}
2820-
case AMDGPU::S_ADD_I32: {
2821-
// TODO: Handle s_or_b32, s_and_b32.
2822-
unsigned OtherOpIdx = FIOperandNum == 1 ? 2 : 1;
2823-
MachineOperand &OtherOp = MI->getOperand(OtherOpIdx);
2824-
2825-
assert(FrameReg || MFI->isBottomOfStack());
2826-
2827-
MachineOperand &DstOp = MI->getOperand(0);
2828-
const DebugLoc &DL = MI->getDebugLoc();
2829-
Register MaterializedReg = FrameReg;
2830-
2831-
// Defend against live scc, which should never happen in practice.
2832-
bool DeadSCC = MI->getOperand(3).isDead();
2833-
2834-
Register TmpReg;
2835-
2836-
if (FrameReg && !ST.enableFlatScratch()) {
2837-
// FIXME: In the common case where the add does not also read its result
2838-
// (i.e. this isn't a reg += fi), it's not finding the dest reg as
2839-
// available.
2840-
TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass, MI,
2841-
false, 0);
2842-
BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_LSHR_B32))
2843-
.addDef(TmpReg, RegState::Renamable)
2844-
.addReg(FrameReg)
2845-
.addImm(ST.getWavefrontSizeLog2())
2846-
.setOperandDead(3); // Set SCC dead
2847-
MaterializedReg = TmpReg;
2848-
}
2849-
2850-
int64_t Offset = FrameInfo.getObjectOffset(Index);
2851-
2852-
// For the non-immediate case, we could fall through to the default
2853-
// handling, but we do an in-place update of the result register here to
2854-
// avoid scavenging another register.
2855-
if (OtherOp.isImm()) {
2856-
OtherOp.setImm(OtherOp.getImm() + Offset);
2857-
Offset = 0;
2858-
2859-
if (MaterializedReg)
2860-
FIOp->ChangeToRegister(MaterializedReg, false);
2861-
else
2862-
FIOp->ChangeToImmediate(0);
2863-
} else if (MaterializedReg) {
2864-
// If we can't fold the other operand, do another increment.
2865-
Register DstReg = DstOp.getReg();
2866-
2867-
if (!TmpReg && MaterializedReg == FrameReg) {
2868-
TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
2869-
MI, false, 0);
2870-
DstReg = TmpReg;
2871-
}
2872-
2873-
auto AddI32 = BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_ADD_I32))
2874-
.addDef(DstReg, RegState::Renamable)
2875-
.addReg(MaterializedReg, RegState::Kill)
2876-
.add(OtherOp);
2877-
if (DeadSCC)
2878-
AddI32.setOperandDead(3);
2879-
2880-
MaterializedReg = DstReg;
2881-
2882-
OtherOp.ChangeToRegister(MaterializedReg, false);
2883-
OtherOp.setIsKill(true);
2884-
OtherOp.setIsRenamable(true);
2885-
FIOp->ChangeToImmediate(Offset);
2886-
} else {
2887-
// If we don't have any other offset to apply, we can just directly
2888-
// interpret the frame index as the offset.
2889-
FIOp->ChangeToImmediate(Offset);
2890-
}
2891-
2892-
if (DeadSCC && OtherOp.isImm() && OtherOp.getImm() == 0) {
2893-
assert(Offset == 0);
2894-
MI->removeOperand(3);
2895-
MI->removeOperand(OtherOpIdx);
2896-
MI->setDesc(TII->get(FIOp->isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
2897-
} else if (DeadSCC && FIOp->isImm() && FIOp->getImm() == 0) {
2898-
assert(Offset == 0);
2899-
MI->removeOperand(3);
2900-
MI->removeOperand(FIOperandNum);
2901-
MI->setDesc(
2902-
TII->get(OtherOp.isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
2903-
}
2904-
2905-
assert(!FIOp->isFI());
2906-
return true;
2907-
}
29082615
default: {
29092616
// Other access to frame index
29102617
const DebugLoc &DL = MI->getDebugLoc();

0 commit comments

Comments
 (0)