Skip to content

Commit 5030fc1

Browse files
author
Pasham, Sai-poorna
authored
Apply atomicrmw metadata lowering patches (llvm#3783)
Apply atomicrmw metadata lowering patches (llvm#3783) Implements SWDEV-516488: clang atomic control options and statement attributes
2 parents 1e6a516 + 1c47058 commit 5030fc1

30 files changed

+25410
-7840
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 66 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16854,26 +16854,80 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1685416854

1685516855
auto Op = RMW->getOperation();
1685616856
switch (Op) {
16857-
case AtomicRMWInst::Xchg: {
16857+
case AtomicRMWInst::Xchg:
1685816858
// PCIe supports add and xchg for system atomics.
1685916859
return isAtomicRMWLegalXChgTy(RMW)
1686016860
? TargetLowering::AtomicExpansionKind::None
1686116861
: TargetLowering::AtomicExpansionKind::CmpXChg;
16862-
}
1686316862
case AtomicRMWInst::Add:
16864-
case AtomicRMWInst::And:
16865-
case AtomicRMWInst::UIncWrap:
16866-
case AtomicRMWInst::UDecWrap:
16863+
// PCIe supports add and xchg for system atomics.
1686716864
return atomicSupportedIfLegalIntType(RMW);
1686816865
case AtomicRMWInst::Sub:
16866+
case AtomicRMWInst::And:
1686916867
case AtomicRMWInst::Or:
16870-
case AtomicRMWInst::Xor: {
16871-
// Atomic sub/or/xor do not work over PCI express, but atomic add
16872-
// does. InstCombine transforms these with 0 to or, so undo that.
16873-
if (HasSystemScope && AMDGPU::isFlatGlobalAddrSpace(AS)) {
16874-
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
16875-
ConstVal && ConstVal->isNullValue())
16876-
return AtomicExpansionKind::Expand;
16868+
case AtomicRMWInst::Xor:
16869+
case AtomicRMWInst::Max:
16870+
case AtomicRMWInst::Min:
16871+
case AtomicRMWInst::UMax:
16872+
case AtomicRMWInst::UMin:
16873+
case AtomicRMWInst::UIncWrap:
16874+
case AtomicRMWInst::UDecWrap: {
16875+
if (AMDGPU::isFlatGlobalAddrSpace(AS) ||
16876+
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
16877+
// On most subtargets, for atomicrmw operations other than add/xchg,
16878+
// whether or not the instructions will behave correctly depends on where
16879+
// the address physically resides and what interconnect is used in the
16880+
// system configuration. On some some targets the instruction will nop,
16881+
// and in others synchronization will only occur at degraded device scope.
16882+
//
16883+
// If the allocation is known local to the device, the instructions should
16884+
// work correctly.
16885+
if (RMW->hasMetadata("amdgpu.no.remote.memory"))
16886+
return atomicSupportedIfLegalIntType(RMW);
16887+
16888+
// If fine-grained remote memory works at device scope, we don't need to
16889+
// do anything.
16890+
if (!HasSystemScope &&
16891+
Subtarget->supportsAgentScopeFineGrainedRemoteMemoryAtomics())
16892+
return atomicSupportedIfLegalIntType(RMW);
16893+
16894+
// If we are targeting a remote allocated address, it depends what kind of
16895+
// allocation the address belongs to.
16896+
//
16897+
// If the allocation is fine-grained (in host memory, or in PCIe peer
16898+
// device memory), the operation will fail depending on the target.
16899+
//
16900+
// Note fine-grained host memory access does work on APUs or if XGMI is
16901+
// used, but we do not know if we are targeting an APU or the system
16902+
// configuration from the ISA version/target-cpu.
16903+
if (RMW->hasMetadata("amdgpu.no.fine.grained.memory"))
16904+
return atomicSupportedIfLegalIntType(RMW);
16905+
16906+
if (Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Or ||
16907+
Op == AtomicRMWInst::Xor) {
16908+
// Atomic sub/or/xor do not work over PCI express, but atomic add
16909+
// does. InstCombine transforms these with 0 to or, so undo that.
16910+
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
16911+
ConstVal && ConstVal->isNullValue())
16912+
return AtomicExpansionKind::Expand;
16913+
}
16914+
16915+
// If the allocation could be in remote, fine-grained memory, the rmw
16916+
// instructions may fail. cmpxchg should work, so emit that. On some
16917+
// system configurations, PCIe atomics aren't supported so cmpxchg won't
16918+
// even work, so you're out of luck anyway.
16919+
16920+
// In summary:
16921+
//
16922+
// Cases that may fail:
16923+
// - fine-grained pinned host memory
16924+
// - fine-grained migratable host memory
16925+
// - fine-grained PCIe peer device
16926+
//
16927+
// Cases that should work, but may be treated overly conservatively.
16928+
// - fine-grained host memory on an APU
16929+
// - fine-grained XGMI peer device
16930+
return AtomicExpansionKind::CmpXChg;
1687716931
}
1687816932

1687916933
return atomicSupportedIfLegalIntType(RMW);
@@ -17028,19 +17082,6 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1702817082

1702917083
return AtomicExpansionKind::CmpXChg;
1703017084
}
17031-
case AtomicRMWInst::Min:
17032-
case AtomicRMWInst::Max:
17033-
case AtomicRMWInst::UMin:
17034-
case AtomicRMWInst::UMax: {
17035-
if (AMDGPU::isFlatGlobalAddrSpace(AS) ||
17036-
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
17037-
// Always expand system scope min/max atomics.
17038-
if (HasSystemScope)
17039-
return AtomicExpansionKind::CmpXChg;
17040-
}
17041-
17042-
return atomicSupportedIfLegalIntType(RMW);
17043-
}
1704417085
case AtomicRMWInst::Nand:
1704517086
case AtomicRMWInst::FSub:
1704617087
default:

0 commit comments

Comments
 (0)