Skip to content

Commit fec54a7

Browse files
committed
AMDGPU: Start considering new atomicrmw metadata on integer operations
Start considering !amdgpu.no.remote.memory.access and !amdgpu.no.fine.grained.host.memory metadata when deciding to expand integer atomic operations. This does not yet attempt to accurately handle fadd/fmin/fmax, which are trickier and require migrating the old "amdgpu-unsafe-fp-atomics" attribute.
1 parent 654c9b6 commit fec54a7

28 files changed

+9303
-11739
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 56 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17864,19 +17864,63 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1786417864
case AtomicRMWInst::UDecWrap: {
1786517865
if (AMDGPU::isFlatGlobalAddrSpace(AS) ||
1786617866
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
17867-
// Always expand system scope atomics.
17868-
if (HasSystemScope && !Subtarget->hasEmulatedSystemScopeAtomics()) {
17869-
if (Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Or ||
17870-
Op == AtomicRMWInst::Xor) {
17871-
// Atomic sub/or/xor do not work over PCI express, but atomic add
17872-
// does. InstCombine transforms these with 0 to or, so undo that.
17873-
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
17874-
ConstVal && ConstVal->isNullValue())
17875-
return AtomicExpansionKind::Expand;
17876-
}
17877-
17878-
return AtomicExpansionKind::CmpXChg;
17867+
if (Subtarget->hasEmulatedSystemScopeAtomics())
17868+
return atomicSupportedIfLegalIntType(RMW);
17869+
17870+
// On most subtargets, for atomicrmw operations other than add/xchg,
17871+
// whether or not the instructions will behave correctly depends on where
17872+
// the address physically resides and what interconnect is used in the
17873+
// system configuration. On some some targets the instruction will nop,
17874+
// and in others synchronization will only occur at degraded device scope.
17875+
//
17876+
// If the allocation is known local to the device, the instructions should
17877+
// work correctly.
17878+
if (RMW->hasMetadata("amdgpu.no.remote.memory"))
17879+
return atomicSupportedIfLegalIntType(RMW);
17880+
17881+
// If fine-grained remote memory works at device scope, we don't need to
17882+
// do anything.
17883+
if (!HasSystemScope &&
17884+
Subtarget->supportsAgentScopeFineGrainedRemoteMemoryAtomics())
17885+
return atomicSupportedIfLegalIntType(RMW);
17886+
17887+
// If we are targeting a remote allocated address, it depends what kind of
17888+
// allocation the address belongs to.
17889+
//
17890+
// If the allocation is fine-grained (in host memory, or in PCIe peer
17891+
// device memory), the operation will fail depending on the target.
17892+
//
17893+
// Note fine-grained host memory access does work on APUs or if XGMI is
17894+
// used, but we do not know if we are targeting an APU or the system
17895+
// configuration from the ISA version/target-cpu.
17896+
if (RMW->hasMetadata("amdgpu.no.fine.grained.memory"))
17897+
return atomicSupportedIfLegalIntType(RMW);
17898+
17899+
if (Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Or ||
17900+
Op == AtomicRMWInst::Xor) {
17901+
// Atomic sub/or/xor do not work over PCI express, but atomic add
17902+
// does. InstCombine transforms these with 0 to or, so undo that.
17903+
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
17904+
ConstVal && ConstVal->isNullValue())
17905+
return AtomicExpansionKind::Expand;
1787917906
}
17907+
17908+
// If the allocation could be in remote, fine-grained memory, the rmw
17909+
// instructions may fail. cmpxchg should work, so emit that. On some
17910+
// system configurations, PCIe atomics aren't supported so cmpxchg won't
17911+
// even work, so you're out of luck anyway.
17912+
17913+
// In summary:
17914+
//
17915+
// Cases that may fail:
17916+
// - fine-grained pinned host memory
17917+
// - fine-grained migratable host memory
17918+
// - fine-grained PCIe peer device
17919+
//
17920+
// Cases that should work, but may be treated overly conservatively.
17921+
// - fine-grained host memory on an APU
17922+
// - fine-grained XGMI peer device
17923+
return AtomicExpansionKind::CmpXChg;
1788017924
}
1788117925

1788217926
return atomicSupportedIfLegalIntType(RMW);

0 commit comments

Comments
 (0)