Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 62 additions & 31 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1863,15 +1863,6 @@ bool AMDGPUDAGToDAGISel::SelectScratchOffset(SDNode *N, SDValue Addr,
SIInstrFlags::FlatScratch);
}

// If this matches zero_extend i32:x, return x
static SDValue matchZExtFromI32(SDValue Op) {
if (Op.getOpcode() != ISD::ZERO_EXTEND)
return SDValue();

SDValue ExtSrc = Op.getOperand(0);
return (ExtSrc.getValueType() == MVT::i32) ? ExtSrc : SDValue();
}

// If this matches *_extend i32:x, return x
// Otherwise if the value is I32 returns x.
static SDValue matchExtFromI32orI32(SDValue Op, bool IsSigned,
Expand All @@ -1890,12 +1881,13 @@ static SDValue matchExtFromI32orI32(SDValue Op, bool IsSigned,
}

// Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
SDValue Addr,
SDValue &SAddr,
SDValue &VOffset,
SDValue &Offset) const {
// or (64-bit SGPR base) + (sext vgpr offset) + sext(imm offset)
bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N, SDValue Addr,
SDValue &SAddr, SDValue &VOffset,
SDValue &Offset, bool &ScaleOffset,
bool NeedIOffset) const {
int64_t ImmOffset = 0;
ScaleOffset = false;

// Match the immediate offset first, which canonically is moved as low as
// possible.
Expand All @@ -1905,7 +1897,8 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
const SIInstrInfo *TII = Subtarget->getInstrInfo();

if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS,
if (NeedIOffset &&
TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS,
SIInstrFlags::FlatGlobal)) {
Addr = LHS;
ImmOffset = COffsetVal;
Expand All @@ -1915,11 +1908,14 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
// saddr + large_offset -> saddr +
// (voffset = large_offset & ~MaxOffset) +
// (large_offset & MaxOffset);
int64_t SplitImmOffset, RemainderOffset;
std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, SIInstrFlags::FlatGlobal);
int64_t SplitImmOffset = 0, RemainderOffset = COffsetVal;
if (NeedIOffset) {
std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, SIInstrFlags::FlatGlobal);
}

if (isUInt<32>(RemainderOffset)) {
if (Subtarget->hasSignedGVSOffset() ? isInt<32>(RemainderOffset)
: isUInt<32>(RemainderOffset)) {
SDNode *VMov = CurDAG->getMachineNode(
AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
Expand All @@ -1946,21 +1942,26 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
// Match the variable offset.
if (Addr.getOpcode() == ISD::ADD) {
LHS = Addr.getOperand(0);
RHS = Addr.getOperand(1);

if (!LHS->isDivergent()) {
// add (i64 sgpr), (zero_extend (i32 vgpr))
if (SDValue ZextRHS = matchZExtFromI32(RHS)) {
// add (i64 sgpr), (*_extend (i32 vgpr))
RHS = Addr.getOperand(1);
ScaleOffset = SelectScaleOffset(N, RHS, Subtarget->hasSignedGVSOffset());
if (SDValue ExtRHS = matchExtFromI32orI32(
RHS, Subtarget->hasSignedGVSOffset(), CurDAG)) {
SAddr = LHS;
VOffset = ZextRHS;
VOffset = ExtRHS;
}
}

RHS = Addr.getOperand(1);
if (!SAddr && !RHS->isDivergent()) {
// add (zero_extend (i32 vgpr)), (i64 sgpr)
if (SDValue ZextLHS = matchZExtFromI32(LHS)) {
// add (*_extend (i32 vgpr)), (i64 sgpr)
ScaleOffset = SelectScaleOffset(N, LHS, Subtarget->hasSignedGVSOffset());
if (SDValue ExtLHS = matchExtFromI32orI32(
LHS, Subtarget->hasSignedGVSOffset(), CurDAG)) {
SAddr = RHS;
VOffset = ZextLHS;
VOffset = ExtLHS;
}
}

Expand All @@ -1970,6 +1971,27 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
}
}

if (Subtarget->hasScaleOffset() &&
(Addr.getOpcode() == (Subtarget->hasSignedGVSOffset()
? AMDGPUISD::MAD_I64_I32
: AMDGPUISD::MAD_U64_U32) ||
(Addr.getOpcode() == AMDGPUISD::MAD_U64_U32 &&
CurDAG->SignBitIsZero(Addr.getOperand(0)))) &&
Addr.getOperand(0)->isDivergent() &&
isa<ConstantSDNode>(Addr.getOperand(1)) &&
!Addr.getOperand(2)->isDivergent()) {
// mad_u64_u32 (i32 vgpr), (i32 c), (i64 sgpr)
unsigned Size =
(unsigned)cast<MemSDNode>(N)->getMemoryVT().getFixedSizeInBits() / 8;
ScaleOffset = Addr.getConstantOperandVal(1) == Size;
if (ScaleOffset) {
SAddr = Addr.getOperand(2);
VOffset = Addr.getOperand(0);
Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
return true;
}
}

if (Addr->isDivergent() || Addr.getOpcode() == ISD::UNDEF ||
isa<ConstantSDNode>(Addr))
return false;
Expand All @@ -1989,21 +2011,24 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N, SDValue Addr,
SDValue &SAddr, SDValue &VOffset,
SDValue &Offset,
SDValue &CPol) const {
if (!SelectGlobalSAddr(N, Addr, SAddr, VOffset, Offset))
bool ScaleOffset;
if (!SelectGlobalSAddr(N, Addr, SAddr, VOffset, Offset, ScaleOffset))
return false;

CPol = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
CPol = CurDAG->getTargetConstant(ScaleOffset ? AMDGPU::CPol::SCAL : 0,
SDLoc(), MVT::i32);
return true;
}

bool AMDGPUDAGToDAGISel::SelectGlobalSAddrGLC(SDNode *N, SDValue Addr,
SDValue &SAddr, SDValue &VOffset,
SDValue &Offset,
SDValue &CPol) const {
if (!SelectGlobalSAddr(N, Addr, SAddr, VOffset, Offset))
bool ScaleOffset;
if (!SelectGlobalSAddr(N, Addr, SAddr, VOffset, Offset, ScaleOffset))
return false;

unsigned CPolVal = AMDGPU::CPol::GLC;
unsigned CPolVal = (ScaleOffset ? AMDGPU::CPol::SCAL : 0) | AMDGPU::CPol::GLC;
CPol = CurDAG->getTargetConstant(CPolVal, SDLoc(), MVT::i32);
return true;
}
Expand Down Expand Up @@ -2091,7 +2116,8 @@ bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(

bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
SDValue &VAddr, SDValue &SAddr,
SDValue &Offset) const {
SDValue &Offset,
SDValue &CPol) const {
int64_t ImmOffset = 0;

SDValue LHS, RHS;
Expand Down Expand Up @@ -2123,6 +2149,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
return false;
Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
CPol = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
return true;
}
}
Expand Down Expand Up @@ -2156,6 +2183,10 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
return false;
SAddr = SelectSAddrFI(CurDAG, SAddr);
Offset = CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);

bool ScaleOffset = SelectScaleOffset(N, VAddr, true /* IsSigned */);
CPol = CurDAG->getTargetConstant(ScaleOffset ? AMDGPU::CPol::SCAL : 0,
SDLoc(), MVT::i32);
return true;
}

Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset) const;
bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
SDValue &VOffset, SDValue &Offset) const;
SDValue &VOffset, SDValue &Offset, bool &ScaleOffset,
bool NeedIOffset = true) const;
bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
SDValue &VOffset, SDValue &Offset,
SDValue &CPol) const;
Expand All @@ -174,7 +175,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool checkFlatScratchSVSSwizzleBug(SDValue VAddr, SDValue SAddr,
uint64_t ImmOffset) const;
bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &SAddr, SDValue &Offset) const;
SDValue &SAddr, SDValue &Offset,
SDValue &CPol) const;

bool SelectSMRDOffset(SDNode *N, SDValue ByteOffsetNode, SDValue *SOffset,
SDValue *Offset, bool Imm32Only = false,
Expand Down
83 changes: 65 additions & 18 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5616,7 +5616,8 @@ AMDGPUInstructionSelector::selectScratchOffset(MachineOperand &Root) const {
// Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root,
unsigned CPolBits) const {
unsigned CPolBits,
bool NeedIOffset) const {
Register Addr = Root.getReg();
Register PtrBase;
int64_t ConstOffset;
Expand All @@ -5627,7 +5628,8 @@ AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root,
std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);

if (ConstOffset != 0) {
if (TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::GLOBAL_ADDRESS,
if (NeedIOffset &&
TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::GLOBAL_ADDRESS,
SIInstrFlags::FlatGlobal)) {
Addr = PtrBase;
ImmOffset = ConstOffset;
Expand All @@ -5640,11 +5642,15 @@ AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root,
// saddr + large_offset -> saddr +
// (voffset = large_offset & ~MaxOffset) +
// (large_offset & MaxOffset);
int64_t SplitImmOffset, RemainderOffset;
std::tie(SplitImmOffset, RemainderOffset) = TII.splitFlatOffset(
ConstOffset, AMDGPUAS::GLOBAL_ADDRESS, SIInstrFlags::FlatGlobal);
int64_t SplitImmOffset = 0, RemainderOffset = ConstOffset;
if (NeedIOffset) {
std::tie(SplitImmOffset, RemainderOffset) =
TII.splitFlatOffset(ConstOffset, AMDGPUAS::GLOBAL_ADDRESS,
SIInstrFlags::FlatGlobal);
}

if (isUInt<32>(RemainderOffset)) {
if (Subtarget->hasSignedGVSOffset() ? isInt<32>(RemainderOffset)
: isUInt<32>(RemainderOffset)) {
MachineInstr *MI = Root.getParent();
MachineBasicBlock *MBB = MI->getParent();
Register HighBits =
Expand All @@ -5654,12 +5660,22 @@ AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root,
HighBits)
.addImm(RemainderOffset);

if (NeedIOffset)
return {{
[=](MachineInstrBuilder &MIB) {
MIB.addReg(PtrBase);
}, // saddr
[=](MachineInstrBuilder &MIB) {
MIB.addReg(HighBits);
}, // voffset
[=](MachineInstrBuilder &MIB) { MIB.addImm(SplitImmOffset); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(CPolBits); },
}};
return {{
[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrBase); }, // saddr
[=](MachineInstrBuilder &MIB) {
MIB.addReg(HighBits);
}, // voffset
[=](MachineInstrBuilder &MIB) { MIB.addImm(SplitImmOffset); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(CPolBits); },
}};
}
Expand Down Expand Up @@ -5691,18 +5707,33 @@ AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root,

// It's possible voffset is an SGPR here, but the copy to VGPR will be
// inserted later.
if (Register VOffset = matchZeroExtendFromS32(PtrBaseOffset)) {
bool ScaleOffset = selectScaleOffset(Root, PtrBaseOffset,
Subtarget->hasSignedGVSOffset());
if (Register VOffset = matchExtendFromS32OrS32(
PtrBaseOffset, Subtarget->hasSignedGVSOffset())) {
if (NeedIOffset)
return {{[=](MachineInstrBuilder &MIB) { // saddr
MIB.addReg(SAddr);
},
[=](MachineInstrBuilder &MIB) { // voffset
MIB.addReg(VOffset);
},
[=](MachineInstrBuilder &MIB) { // offset
MIB.addImm(ImmOffset);
},
[=](MachineInstrBuilder &MIB) { // cpol
MIB.addImm(CPolBits |
(ScaleOffset ? AMDGPU::CPol::SCAL : 0));
}}};
return {{[=](MachineInstrBuilder &MIB) { // saddr
MIB.addReg(SAddr);
},
[=](MachineInstrBuilder &MIB) { // voffset
MIB.addReg(VOffset);
},
[=](MachineInstrBuilder &MIB) { // offset
MIB.addImm(ImmOffset);
},
[=](MachineInstrBuilder &MIB) { // cpol
MIB.addImm(CPolBits);
MIB.addImm(CPolBits |
(ScaleOffset ? AMDGPU::CPol::SCAL : 0));
}}};
}
}
Expand All @@ -5723,10 +5754,16 @@ AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root,
BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
.addImm(0);

if (NeedIOffset)
return {{
[=](MachineInstrBuilder &MIB) { MIB.addReg(AddrDef->Reg); }, // saddr
[=](MachineInstrBuilder &MIB) { MIB.addReg(VOffset); }, // voffset
[=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); }, // offset
[=](MachineInstrBuilder &MIB) { MIB.addImm(CPolBits); } // cpol
}};
return {{
[=](MachineInstrBuilder &MIB) { MIB.addReg(AddrDef->Reg); }, // saddr
[=](MachineInstrBuilder &MIB) { MIB.addReg(VOffset); }, // voffset
[=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); }, // offset
[=](MachineInstrBuilder &MIB) { MIB.addImm(CPolBits); } // cpol
}};
}
Expand Down Expand Up @@ -5858,22 +5895,32 @@ AMDGPUInstructionSelector::selectScratchSVAddr(MachineOperand &Root) const {
if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))
return std::nullopt;

unsigned CPol = selectScaleOffset(Root, RHS, true /* IsSigned */)
? AMDGPU::CPol::SCAL
: 0;

if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
int FI = LHSDef->MI->getOperand(1).getIndex();
return {{
[=](MachineInstrBuilder &MIB) { MIB.addReg(RHS); }, // vaddr
[=](MachineInstrBuilder &MIB) { MIB.addReg(RHS); }, // vaddr
[=](MachineInstrBuilder &MIB) { MIB.addFrameIndex(FI); }, // saddr
[=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset
[=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); }, // offset
[=](MachineInstrBuilder &MIB) { MIB.addImm(CPol); } // cpol
}};
}

if (!isSGPR(LHS))
if (auto Def = getDefSrcRegIgnoringCopies(LHS, *MRI))
LHS = Def->Reg;

if (!isSGPR(LHS))
return std::nullopt;

return {{
[=](MachineInstrBuilder &MIB) { MIB.addReg(RHS); }, // vaddr
[=](MachineInstrBuilder &MIB) { MIB.addReg(LHS); }, // saddr
[=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset
[=](MachineInstrBuilder &MIB) { MIB.addReg(RHS); }, // vaddr
[=](MachineInstrBuilder &MIB) { MIB.addReg(LHS); }, // saddr
[=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); }, // offset
[=](MachineInstrBuilder &MIB) { MIB.addImm(CPol); } // cpol
}};
}

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,8 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
selectScratchOffset(MachineOperand &Root) const;

InstructionSelector::ComplexRendererFns
selectGlobalSAddr(MachineOperand &Root, unsigned CPolBits) const;
selectGlobalSAddr(MachineOperand &Root, unsigned CPolBits,
bool NeedIOffset = true) const;
InstructionSelector::ComplexRendererFns
selectGlobalSAddr(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
Expand Down
Loading
Loading