Skip to content

Commit c8c1aa7

Browse files
committed
AMDGPU: Expand remaining system atomic operations
System scope atomics need to use cmpxchg loops if we know nothing about the allocation the address is from. aea5980 started this, this expands the set to cover the remaining integer operations. Don't expand xchg and add, those theoretically should work over PCIe. This is a pre-commit which will introduce performance regressions. Subsequent changes will add handling of new atomicrmw metadata, which will avoid the expansion. Note this still isn't conservative enough; we do need to expand some device scope atomics if the memory is in fine-grained remote memory.
1 parent f8a2ed7 commit c8c1aa7

14 files changed

+22171
-4539
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -17702,26 +17702,39 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1770217702

1770317703
auto Op = RMW->getOperation();
1770417704
switch (Op) {
17705-
case AtomicRMWInst::Xchg: {
17705+
case AtomicRMWInst::Xchg:
1770617706
// PCIe supports add and xchg for system atomics.
1770717707
return isAtomicRMWLegalXChgTy(RMW)
1770817708
? TargetLowering::AtomicExpansionKind::None
1770917709
: TargetLowering::AtomicExpansionKind::CmpXChg;
17710-
}
1771117710
case AtomicRMWInst::Add:
17712-
case AtomicRMWInst::And:
17713-
case AtomicRMWInst::UIncWrap:
17714-
case AtomicRMWInst::UDecWrap:
17711+
// PCIe supports add and xchg for system atomics.
1771517712
return atomicSupportedIfLegalIntType(RMW);
1771617713
case AtomicRMWInst::Sub:
17714+
case AtomicRMWInst::And:
1771717715
case AtomicRMWInst::Or:
17718-
case AtomicRMWInst::Xor: {
17719-
// Atomic sub/or/xor do not work over PCI express, but atomic add
17720-
// does. InstCombine transforms these with 0 to or, so undo that.
17721-
if (HasSystemScope && AMDGPU::isFlatGlobalAddrSpace(AS)) {
17722-
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
17723-
ConstVal && ConstVal->isNullValue())
17724-
return AtomicExpansionKind::Expand;
17716+
case AtomicRMWInst::Xor:
17717+
case AtomicRMWInst::Max:
17718+
case AtomicRMWInst::Min:
17719+
case AtomicRMWInst::UMax:
17720+
case AtomicRMWInst::UMin:
17721+
case AtomicRMWInst::UIncWrap:
17722+
case AtomicRMWInst::UDecWrap: {
17723+
if (AMDGPU::isFlatGlobalAddrSpace(AS) ||
17724+
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
17725+
// Always expand system scope atomics.
17726+
if (HasSystemScope) {
17727+
if (Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Or ||
17728+
Op == AtomicRMWInst::Xor) {
17729+
// Atomic sub/or/xor do not work over PCI express, but atomic add
17730+
// does. InstCombine transforms these with 0 to or, so undo that.
17731+
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
17732+
ConstVal && ConstVal->isNullValue())
17733+
return AtomicExpansionKind::Expand;
17734+
}
17735+
17736+
return AtomicExpansionKind::CmpXChg;
17737+
}
1772517738
}
1772617739

1772717740
return atomicSupportedIfLegalIntType(RMW);
@@ -17876,19 +17889,6 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1787617889

1787717890
return AtomicExpansionKind::CmpXChg;
1787817891
}
17879-
case AtomicRMWInst::Min:
17880-
case AtomicRMWInst::Max:
17881-
case AtomicRMWInst::UMin:
17882-
case AtomicRMWInst::UMax: {
17883-
if (AMDGPU::isFlatGlobalAddrSpace(AS) ||
17884-
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
17885-
// Always expand system scope min/max atomics.
17886-
if (HasSystemScope)
17887-
return AtomicExpansionKind::CmpXChg;
17888-
}
17889-
17890-
return atomicSupportedIfLegalIntType(RMW);
17891-
}
1789217892
case AtomicRMWInst::Nand:
1789317893
case AtomicRMWInst::FSub:
1789417894
default:

0 commit comments

Comments
 (0)