Skip to content

Commit 10996a5

Browse files
author
Salinas, David
authored
Apply atomicrmw metadata lowering patches (llvm#3783) (llvm#4345)
2 parents 9046f1e + 9e6aaa6 commit 10996a5

30 files changed

+23354
-11175
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 66 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -17300,26 +17300,80 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1730017300

1730117301
auto Op = RMW->getOperation();
1730217302
switch (Op) {
17303-
case AtomicRMWInst::Xchg: {
17303+
case AtomicRMWInst::Xchg:
1730417304
// PCIe supports add and xchg for system atomics.
1730517305
return isAtomicRMWLegalXChgTy(RMW)
1730617306
? TargetLowering::AtomicExpansionKind::None
1730717307
: TargetLowering::AtomicExpansionKind::CmpXChg;
17308-
}
1730917308
case AtomicRMWInst::Add:
17310-
case AtomicRMWInst::And:
17311-
case AtomicRMWInst::UIncWrap:
17312-
case AtomicRMWInst::UDecWrap:
17309+
// PCIe supports add and xchg for system atomics.
1731317310
return atomicSupportedIfLegalIntType(RMW);
1731417311
case AtomicRMWInst::Sub:
17312+
case AtomicRMWInst::And:
1731517313
case AtomicRMWInst::Or:
17316-
case AtomicRMWInst::Xor: {
17317-
// Atomic sub/or/xor do not work over PCI express, but atomic add
17318-
// does. InstCombine transforms these with 0 to or, so undo that.
17319-
if (HasSystemScope && AMDGPU::isFlatGlobalAddrSpace(AS)) {
17320-
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
17321-
ConstVal && ConstVal->isNullValue())
17322-
return AtomicExpansionKind::Expand;
17314+
case AtomicRMWInst::Xor:
17315+
case AtomicRMWInst::Max:
17316+
case AtomicRMWInst::Min:
17317+
case AtomicRMWInst::UMax:
17318+
case AtomicRMWInst::UMin:
17319+
case AtomicRMWInst::UIncWrap:
17320+
case AtomicRMWInst::UDecWrap: {
17321+
if (AMDGPU::isFlatGlobalAddrSpace(AS) ||
17322+
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
17323+
// On most subtargets, for atomicrmw operations other than add/xchg,
17324+
// whether or not the instructions will behave correctly depends on where
17325+
// the address physically resides and what interconnect is used in the
17326+
// system configuration. On some some targets the instruction will nop,
17327+
// and in others synchronization will only occur at degraded device scope.
17328+
//
17329+
// If the allocation is known local to the device, the instructions should
17330+
// work correctly.
17331+
if (RMW->hasMetadata("amdgpu.no.remote.memory"))
17332+
return atomicSupportedIfLegalIntType(RMW);
17333+
17334+
// If fine-grained remote memory works at device scope, we don't need to
17335+
// do anything.
17336+
if (!HasSystemScope &&
17337+
Subtarget->supportsAgentScopeFineGrainedRemoteMemoryAtomics())
17338+
return atomicSupportedIfLegalIntType(RMW);
17339+
17340+
// If we are targeting a remote allocated address, it depends what kind of
17341+
// allocation the address belongs to.
17342+
//
17343+
// If the allocation is fine-grained (in host memory, or in PCIe peer
17344+
// device memory), the operation will fail depending on the target.
17345+
//
17346+
// Note fine-grained host memory access does work on APUs or if XGMI is
17347+
// used, but we do not know if we are targeting an APU or the system
17348+
// configuration from the ISA version/target-cpu.
17349+
if (RMW->hasMetadata("amdgpu.no.fine.grained.memory"))
17350+
return atomicSupportedIfLegalIntType(RMW);
17351+
17352+
if (Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Or ||
17353+
Op == AtomicRMWInst::Xor) {
17354+
// Atomic sub/or/xor do not work over PCI express, but atomic add
17355+
// does. InstCombine transforms these with 0 to or, so undo that.
17356+
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
17357+
ConstVal && ConstVal->isNullValue())
17358+
return AtomicExpansionKind::Expand;
17359+
}
17360+
17361+
// If the allocation could be in remote, fine-grained memory, the rmw
17362+
// instructions may fail. cmpxchg should work, so emit that. On some
17363+
// system configurations, PCIe atomics aren't supported so cmpxchg won't
17364+
// even work, so you're out of luck anyway.
17365+
17366+
// In summary:
17367+
//
17368+
// Cases that may fail:
17369+
// - fine-grained pinned host memory
17370+
// - fine-grained migratable host memory
17371+
// - fine-grained PCIe peer device
17372+
//
17373+
// Cases that should work, but may be treated overly conservatively.
17374+
// - fine-grained host memory on an APU
17375+
// - fine-grained XGMI peer device
17376+
return AtomicExpansionKind::CmpXChg;
1732317377
}
1732417378

1732517379
return atomicSupportedIfLegalIntType(RMW);
@@ -17474,19 +17528,6 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1747417528

1747517529
return AtomicExpansionKind::CmpXChg;
1747617530
}
17477-
case AtomicRMWInst::Min:
17478-
case AtomicRMWInst::Max:
17479-
case AtomicRMWInst::UMin:
17480-
case AtomicRMWInst::UMax: {
17481-
if (AMDGPU::isFlatGlobalAddrSpace(AS) ||
17482-
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
17483-
// Always expand system scope min/max atomics.
17484-
if (HasSystemScope)
17485-
return AtomicExpansionKind::CmpXChg;
17486-
}
17487-
17488-
return atomicSupportedIfLegalIntType(RMW);
17489-
}
1749017531
case AtomicRMWInst::Nand:
1749117532
case AtomicRMWInst::FSub:
1749217533
default:

0 commit comments

Comments
 (0)