Skip to content

Commit 26fd4ea

Browse files
committed
AMDGPU: Expand remaining system atomic operations (llvm#122137)
System scope atomics need to use cmpxchg loops if we know nothing about the allocation the address is from. aea5980 started this, this expands the set to cover the remaining integer operations. Don't expand xchg and add, those theoretically should work over PCIe. This is a pre-commit which will introduce performance regressions. Subsequent changes will add handling of new atomicrmw metadata, which will avoid the expansion. Note this still isn't conservative enough; we do need to expand some device scope atomics if the memory is in fine-grained remote memory. (cherry picked from commit 01f785c)
1 parent 9461df3 commit 26fd4ea

13 files changed

+21176
-3983
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16854,26 +16854,39 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1685416854

1685516855
auto Op = RMW->getOperation();
1685616856
switch (Op) {
16857-
case AtomicRMWInst::Xchg: {
16857+
case AtomicRMWInst::Xchg:
1685816858
// PCIe supports add and xchg for system atomics.
1685916859
return isAtomicRMWLegalXChgTy(RMW)
1686016860
? TargetLowering::AtomicExpansionKind::None
1686116861
: TargetLowering::AtomicExpansionKind::CmpXChg;
16862-
}
1686316862
case AtomicRMWInst::Add:
16864-
case AtomicRMWInst::And:
16865-
case AtomicRMWInst::UIncWrap:
16866-
case AtomicRMWInst::UDecWrap:
16863+
// PCIe supports add and xchg for system atomics.
1686716864
return atomicSupportedIfLegalIntType(RMW);
1686816865
case AtomicRMWInst::Sub:
16866+
case AtomicRMWInst::And:
1686916867
case AtomicRMWInst::Or:
16870-
case AtomicRMWInst::Xor: {
16871-
// Atomic sub/or/xor do not work over PCI express, but atomic add
16872-
// does. InstCombine transforms these with 0 to or, so undo that.
16873-
if (HasSystemScope && AMDGPU::isFlatGlobalAddrSpace(AS)) {
16874-
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
16875-
ConstVal && ConstVal->isNullValue())
16876-
return AtomicExpansionKind::Expand;
16868+
case AtomicRMWInst::Xor:
16869+
case AtomicRMWInst::Max:
16870+
case AtomicRMWInst::Min:
16871+
case AtomicRMWInst::UMax:
16872+
case AtomicRMWInst::UMin:
16873+
case AtomicRMWInst::UIncWrap:
16874+
case AtomicRMWInst::UDecWrap: {
16875+
if (AMDGPU::isFlatGlobalAddrSpace(AS) ||
16876+
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
16877+
// Always expand system scope atomics.
16878+
if (HasSystemScope) {
16879+
if (Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Or ||
16880+
Op == AtomicRMWInst::Xor) {
16881+
// Atomic sub/or/xor do not work over PCI express, but atomic add
16882+
// does. InstCombine transforms these with 0 to or, so undo that.
16883+
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
16884+
ConstVal && ConstVal->isNullValue())
16885+
return AtomicExpansionKind::Expand;
16886+
}
16887+
16888+
return AtomicExpansionKind::CmpXChg;
16889+
}
1687716890
}
1687816891

1687916892
return atomicSupportedIfLegalIntType(RMW);
@@ -17028,19 +17041,6 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1702817041

1702917042
return AtomicExpansionKind::CmpXChg;
1703017043
}
17031-
case AtomicRMWInst::Min:
17032-
case AtomicRMWInst::Max:
17033-
case AtomicRMWInst::UMin:
17034-
case AtomicRMWInst::UMax: {
17035-
if (AMDGPU::isFlatGlobalAddrSpace(AS) ||
17036-
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
17037-
// Always expand system scope min/max atomics.
17038-
if (HasSystemScope)
17039-
return AtomicExpansionKind::CmpXChg;
17040-
}
17041-
17042-
return atomicSupportedIfLegalIntType(RMW);
17043-
}
1704417044
case AtomicRMWInst::Nand:
1704517045
case AtomicRMWInst::FSub:
1704617046
default:

0 commit comments

Comments
 (0)