Skip to content

Commit 9ad8e12

Browse files
authored
[AMDGPU] Expand scratch atomics to flat atomics if GAS is enabled (#154710)
1 parent 0a193cb commit 9ad8e12

File tree

9 files changed

+14009
-2615
lines changed

9 files changed

+14009
-2615
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17773,11 +17773,19 @@ static bool flatInstrMayAccessPrivate(const Instruction *I) {
1777317773
!AMDGPU::hasValueInRangeLikeMetadata(*MD, AMDGPUAS::PRIVATE_ADDRESS);
1777417774
}
1777517775

17776+
static TargetLowering::AtomicExpansionKind
17777+
getPrivateAtomicExpansionKind(const GCNSubtarget &STI) {
17778+
// For GAS, lower to flat atomic.
17779+
return STI.hasGloballyAddressableScratch()
17780+
? TargetLowering::AtomicExpansionKind::CustomExpand
17781+
: TargetLowering::AtomicExpansionKind::NotAtomic;
17782+
}
17783+
1777617784
TargetLowering::AtomicExpansionKind
1777717785
SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1777817786
unsigned AS = RMW->getPointerAddressSpace();
1777917787
if (AS == AMDGPUAS::PRIVATE_ADDRESS)
17780-
return AtomicExpansionKind::NotAtomic;
17788+
return getPrivateAtomicExpansionKind(*getSubtarget());
1778117789

1778217790
// 64-bit flat atomics that dynamically reside in private memory will silently
1778317791
// be dropped.
@@ -18048,22 +18056,22 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1804818056
TargetLowering::AtomicExpansionKind
1804918057
SITargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
1805018058
return LI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
18051-
? AtomicExpansionKind::NotAtomic
18059+
? getPrivateAtomicExpansionKind(*getSubtarget())
1805218060
: AtomicExpansionKind::None;
1805318061
}
1805418062

1805518063
TargetLowering::AtomicExpansionKind
1805618064
SITargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
1805718065
return SI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
18058-
? AtomicExpansionKind::NotAtomic
18066+
? getPrivateAtomicExpansionKind(*getSubtarget())
1805918067
: AtomicExpansionKind::None;
1806018068
}
1806118069

1806218070
TargetLowering::AtomicExpansionKind
1806318071
SITargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CmpX) const {
1806418072
unsigned AddrSpace = CmpX->getPointerAddressSpace();
1806518073
if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
18066-
return AtomicExpansionKind::NotAtomic;
18074+
return getPrivateAtomicExpansionKind(*getSubtarget());
1806718075

1806818076
if (AddrSpace != AMDGPUAS::FLAT_ADDRESS || !flatInstrMayAccessPrivate(CmpX))
1806918077
return AtomicExpansionKind::None;
@@ -18433,9 +18441,24 @@ void SITargetLowering::emitExpandAtomicAddrSpacePredicate(
1843318441
Builder.CreateBr(ExitBB);
1843418442
}
1843518443

18444+
static void convertScratchAtomicToFlatAtomic(Instruction *I,
18445+
unsigned PtrOpIdx) {
18446+
Value *PtrOp = I->getOperand(PtrOpIdx);
18447+
assert(PtrOp->getType()->getPointerAddressSpace() ==
18448+
AMDGPUAS::PRIVATE_ADDRESS);
18449+
18450+
Type *FlatPtr = PointerType::get(I->getContext(), AMDGPUAS::FLAT_ADDRESS);
18451+
Value *ASCast = CastInst::CreatePointerCast(PtrOp, FlatPtr, "scratch.ascast",
18452+
I->getIterator());
18453+
I->setOperand(PtrOpIdx, ASCast);
18454+
}
18455+
1843618456
void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
1843718457
AtomicRMWInst::BinOp Op = AI->getOperation();
1843818458

18459+
if (AI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
18460+
return convertScratchAtomicToFlatAtomic(AI, AI->getPointerOperandIndex());
18461+
1843918462
if (Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Or ||
1844018463
Op == AtomicRMWInst::Xor) {
1844118464
if (const auto *ConstVal = dyn_cast<Constant>(AI->getValOperand());
@@ -18458,9 +18481,28 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
1845818481
}
1845918482

1846018483
void SITargetLowering::emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const {
18484+
if (CI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
18485+
return convertScratchAtomicToFlatAtomic(CI, CI->getPointerOperandIndex());
18486+
1846118487
emitExpandAtomicAddrSpacePredicate(CI);
1846218488
}
1846318489

18490+
void SITargetLowering::emitExpandAtomicLoad(LoadInst *LI) const {
18491+
if (LI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
18492+
return convertScratchAtomicToFlatAtomic(LI, LI->getPointerOperandIndex());
18493+
18494+
llvm_unreachable(
18495+
"Expand Atomic Load only handles SCRATCH -> FLAT conversion");
18496+
}
18497+
18498+
void SITargetLowering::emitExpandAtomicStore(StoreInst *SI) const {
18499+
if (SI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
18500+
return convertScratchAtomicToFlatAtomic(SI, SI->getPointerOperandIndex());
18501+
18502+
llvm_unreachable(
18503+
"Expand Atomic Store only handles SCRATCH -> FLAT conversion");
18504+
}
18505+
1846418506
LoadInst *
1846518507
SITargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
1846618508
IRBuilder<> Builder(AI);

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
562562
void emitExpandAtomicAddrSpacePredicate(Instruction *AI) const;
563563
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override;
564564
void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const override;
565+
void emitExpandAtomicLoad(LoadInst *LI) const override;
566+
void emitExpandAtomicStore(StoreInst *SI) const override;
565567

566568
LoadInst *
567569
lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;

llvm/test/CodeGen/AMDGPU/gfx1250-no-scope-cu-stores.ll

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -86,15 +86,3 @@ entry:
8686
store atomic i32 %val, ptr addrspace(3) %dst syncscope("wavefront") unordered, align 4
8787
ret void
8888
}
89-
90-
; GCN: scratch_atomic_store:
91-
; CU: scratch_store_b32 off, v{{.*}}, s{{.*}} scope:SCOPE_SE
92-
; NOCU: scratch_store_b32 off, v{{.*}}, s{{.*}} scope:SCOPE_SE
93-
; GCN: .amdhsa_kernel scratch_atomic_store
94-
; CU: .amdhsa_uses_cu_stores 1
95-
; NOCU: .amdhsa_uses_cu_stores 0
96-
define amdgpu_kernel void @scratch_atomic_store(ptr addrspace(5) %dst, i32 %val) {
97-
entry:
98-
store atomic i32 %val, ptr addrspace(5) %dst syncscope("wavefront") unordered, align 4
99-
ret void
100-
}

0 commit comments

Comments
 (0)