Skip to content

Commit d6ff70e

Browse files
committed
[AMDGPU] Fix buffer addressing mode matching
Starting in gfx1250, voffset and immoffset are zero-extended from 32 bits to 45 bits before being added together.
1 parent d09dbda commit d6ff70e

30 files changed

+9513
-4399
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5879,8 +5879,12 @@ AMDGPULegalizerInfo::splitBufferOffsets(MachineIRBuilder &B,
58795879
const LLT S32 = LLT::scalar(32);
58805880
MachineRegisterInfo &MRI = *B.getMRI();
58815881

5882-
std::tie(BaseReg, ImmOffset) =
5883-
AMDGPU::getBaseWithConstantOffset(MRI, OrigOffset);
5882+
// On GFX1250+, voffset and immoffset are zero-extended from 32 bits before
5883+
// being added, so we can only safely match a 32-bit addition with no unsigned
5884+
// overflow.
5885+
bool CheckNUW = AMDGPU::isGFX1250(ST);
5886+
std::tie(BaseReg, ImmOffset) = AMDGPU::getBaseWithConstantOffset(
5887+
MRI, OrigOffset, /*KnownBits=*/nullptr, CheckNUW);
58845888

58855889
// If BaseReg is a pointer, convert it to int.
58865890
if (MRI.getType(BaseReg).isPointer())

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10877,6 +10877,13 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
1087710877
}
1087810878
}
1087910879

10880+
// Return whether the operation has NoUnsignedWrap property.
10881+
static bool isNoUnsignedWrap(SDValue Addr) {
10882+
return (Addr.getOpcode() == ISD::ADD &&
10883+
Addr->getFlags().hasNoUnsignedWrap()) ||
10884+
Addr->getOpcode() == ISD::OR;
10885+
}
10886+
1088010887
bool SITargetLowering::shouldPreservePtrArith(const Function &F,
1088110888
EVT PtrVT) const {
1088210889
return UseSelectionDAGPTRADD && PtrVT == MVT::i64;
@@ -10898,8 +10905,14 @@ SITargetLowering::splitBufferOffsets(SDValue Offset, SelectionDAG &DAG) const {
1089810905
if ((C1 = dyn_cast<ConstantSDNode>(N0)))
1089910906
N0 = SDValue();
1090010907
else if (DAG.isBaseWithConstantOffset(N0)) {
10901-
C1 = cast<ConstantSDNode>(N0.getOperand(1));
10902-
N0 = N0.getOperand(0);
10908+
// On GFX1250+, voffset and immoffset are zero-extended from 32 bits before
10909+
// being added, so we can only safely match a 32-bit addition with no
10910+
// unsigned overflow.
10911+
bool CheckNUW = AMDGPU::isGFX1250(*Subtarget);
10912+
if (!CheckNUW || isNoUnsignedWrap(N0)) {
10913+
C1 = cast<ConstantSDNode>(N0.getOperand(1));
10914+
N0 = N0.getOperand(0);
10915+
}
1090310916
}
1090410917

1090510918
if (C1) {

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll

Lines changed: 318 additions & 155 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll

Lines changed: 736 additions & 360 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll

Lines changed: 821 additions & 398 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.tfe.ll

Lines changed: 468 additions & 229 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll

Lines changed: 808 additions & 386 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll

Lines changed: 409 additions & 201 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll

Lines changed: 784 additions & 384 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll

Lines changed: 715 additions & 347 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)