Skip to content

Commit 976f7b9

Browse files
committed
AMDGPU: Expand remaining system atomic operations
System scope atomics need to use cmpxchg loops if we know nothing about the allocation the address is from. aea5980 started this, this expands the set to cover the remaining integer operations. Don't expand xchg and add, those theoretically should work over PCIe. This is a pre-commit which will introduce performance regressions. Subsequent changes will add handling of new atomicrmw metadata, which will avoid the expansion. Note this still isn't conservative enough; we do need to expand some device scope atomics if the memory is in fine-grained remote memory.
1 parent 4fde8c3 commit 976f7b9

14 files changed

+21832
-4343
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16743,26 +16743,39 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1674316743

1674416744
auto Op = RMW->getOperation();
1674516745
switch (Op) {
16746-
case AtomicRMWInst::Xchg: {
16746+
case AtomicRMWInst::Xchg:
1674716747
// PCIe supports add and xchg for system atomics.
1674816748
return isAtomicRMWLegalXChgTy(RMW)
1674916749
? TargetLowering::AtomicExpansionKind::None
1675016750
: TargetLowering::AtomicExpansionKind::CmpXChg;
16751-
}
1675216751
case AtomicRMWInst::Add:
16753-
case AtomicRMWInst::And:
16754-
case AtomicRMWInst::UIncWrap:
16755-
case AtomicRMWInst::UDecWrap:
16752+
// PCIe supports add and xchg for system atomics.
1675616753
return atomicSupportedIfLegalIntType(RMW);
1675716754
case AtomicRMWInst::Sub:
16755+
case AtomicRMWInst::And:
1675816756
case AtomicRMWInst::Or:
16759-
case AtomicRMWInst::Xor: {
16760-
// Atomic sub/or/xor do not work over PCI express, but atomic add
16761-
// does. InstCombine transforms these with 0 to or, so undo that.
16762-
if (HasSystemScope && AMDGPU::isFlatGlobalAddrSpace(AS)) {
16763-
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
16764-
ConstVal && ConstVal->isNullValue())
16765-
return AtomicExpansionKind::Expand;
16757+
case AtomicRMWInst::Xor:
16758+
case AtomicRMWInst::Max:
16759+
case AtomicRMWInst::Min:
16760+
case AtomicRMWInst::UMax:
16761+
case AtomicRMWInst::UMin:
16762+
case AtomicRMWInst::UIncWrap:
16763+
case AtomicRMWInst::UDecWrap: {
16764+
if (AMDGPU::isFlatGlobalAddrSpace(AS) ||
16765+
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
16766+
// Always expand system scope atomics.
16767+
if (HasSystemScope) {
16768+
if (Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Or ||
16769+
Op == AtomicRMWInst::Xor) {
16770+
// Atomic sub/or/xor do not work over PCI express, but atomic add
16771+
// does. InstCombine transforms these with 0 to or, so undo that.
16772+
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
16773+
ConstVal && ConstVal->isNullValue())
16774+
return AtomicExpansionKind::Expand;
16775+
}
16776+
16777+
return AtomicExpansionKind::CmpXChg;
16778+
}
1676616779
}
1676716780

1676816781
return atomicSupportedIfLegalIntType(RMW);
@@ -16917,19 +16930,6 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1691716930

1691816931
return AtomicExpansionKind::CmpXChg;
1691916932
}
16920-
case AtomicRMWInst::Min:
16921-
case AtomicRMWInst::Max:
16922-
case AtomicRMWInst::UMin:
16923-
case AtomicRMWInst::UMax: {
16924-
if (AMDGPU::isFlatGlobalAddrSpace(AS) ||
16925-
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
16926-
// Always expand system scope min/max atomics.
16927-
if (HasSystemScope)
16928-
return AtomicExpansionKind::CmpXChg;
16929-
}
16930-
16931-
return atomicSupportedIfLegalIntType(RMW);
16932-
}
1693316933
case AtomicRMWInst::Nand:
1693416934
case AtomicRMWInst::FSub:
1693516935
default:

0 commit comments

Comments
 (0)