Skip to content

Commit b1b5102

Browse files
authored
AMDGPU: Start considering new atomicrmw metadata on integer operations (#122138)
Start considering !amdgpu.no.remote.memory.access and !amdgpu.no.fine.grained.host.memory metadata when deciding to expand integer atomic operations. This does not yet attempt to accurately handle fadd/fmin/fmax, which are trickier and require migrating the old "amdgpu-unsafe-fp-atomics" attribute.
1 parent c1625fa commit b1b5102

28 files changed

+9301
-11737
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 56 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17860,19 +17860,63 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1786017860
case AtomicRMWInst::UDecWrap: {
1786117861
if (AMDGPU::isFlatGlobalAddrSpace(AS) ||
1786217862
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
17863-
// Always expand system scope atomics.
17864-
if (HasSystemScope && !Subtarget->hasEmulatedSystemScopeAtomics()) {
17865-
if (Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Or ||
17866-
Op == AtomicRMWInst::Xor) {
17867-
// Atomic sub/or/xor do not work over PCI express, but atomic add
17868-
// does. InstCombine transforms these with 0 to or, so undo that.
17869-
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
17870-
ConstVal && ConstVal->isNullValue())
17871-
return AtomicExpansionKind::Expand;
17872-
}
17873-
17874-
return AtomicExpansionKind::CmpXChg;
17863+
if (Subtarget->hasEmulatedSystemScopeAtomics())
17864+
return atomicSupportedIfLegalIntType(RMW);
17865+
17866+
// On most subtargets, for atomicrmw operations other than add/xchg,
17867+
// whether or not the instructions will behave correctly depends on where
17868+
// the address physically resides and what interconnect is used in the
17869+
// system configuration. On some some targets the instruction will nop,
17870+
// and in others synchronization will only occur at degraded device scope.
17871+
//
17872+
// If the allocation is known local to the device, the instructions should
17873+
// work correctly.
17874+
if (RMW->hasMetadata("amdgpu.no.remote.memory"))
17875+
return atomicSupportedIfLegalIntType(RMW);
17876+
17877+
// If fine-grained remote memory works at device scope, we don't need to
17878+
// do anything.
17879+
if (!HasSystemScope &&
17880+
Subtarget->supportsAgentScopeFineGrainedRemoteMemoryAtomics())
17881+
return atomicSupportedIfLegalIntType(RMW);
17882+
17883+
// If we are targeting a remote allocated address, it depends what kind of
17884+
// allocation the address belongs to.
17885+
//
17886+
// If the allocation is fine-grained (in host memory, or in PCIe peer
17887+
// device memory), the operation will fail depending on the target.
17888+
//
17889+
// Note fine-grained host memory access does work on APUs or if XGMI is
17890+
// used, but we do not know if we are targeting an APU or the system
17891+
// configuration from the ISA version/target-cpu.
17892+
if (RMW->hasMetadata("amdgpu.no.fine.grained.memory"))
17893+
return atomicSupportedIfLegalIntType(RMW);
17894+
17895+
if (Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Or ||
17896+
Op == AtomicRMWInst::Xor) {
17897+
// Atomic sub/or/xor do not work over PCI express, but atomic add
17898+
// does. InstCombine transforms these with 0 to or, so undo that.
17899+
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
17900+
ConstVal && ConstVal->isNullValue())
17901+
return AtomicExpansionKind::Expand;
1787517902
}
17903+
17904+
// If the allocation could be in remote, fine-grained memory, the rmw
17905+
// instructions may fail. cmpxchg should work, so emit that. On some
17906+
// system configurations, PCIe atomics aren't supported so cmpxchg won't
17907+
// even work, so you're out of luck anyway.
17908+
17909+
// In summary:
17910+
//
17911+
// Cases that may fail:
17912+
// - fine-grained pinned host memory
17913+
// - fine-grained migratable host memory
17914+
// - fine-grained PCIe peer device
17915+
//
17916+
// Cases that should work, but may be treated overly conservatively.
17917+
// - fine-grained host memory on an APU
17918+
// - fine-grained XGMI peer device
17919+
return AtomicExpansionKind::CmpXChg;
1787617920
}
1787717921

1787817922
return atomicSupportedIfLegalIntType(RMW);

0 commit comments

Comments
 (0)