Skip to content

Commit 01f785c

Browse files
authored
AMDGPU: Expand remaining system atomic operations (llvm#122137)
System scope atomics need to use cmpxchg loops if we know nothing about the allocation the address is from. aea5980 started this, this expands the set to cover the remaining integer operations. Don't expand xchg and add, those theoretically should work over PCIe. This is a pre-commit which will introduce performance regressions. Subsequent changes will add handling of new atomicrmw metadata, which will avoid the expansion. Note this still isn't conservative enough; we do need to expand some device scope atomics if the memory is in fine-grained remote memory.
1 parent 6a7ade0 commit 01f785c

14 files changed

+22169
-4536
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -17840,26 +17840,39 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1784017840

1784117841
auto Op = RMW->getOperation();
1784217842
switch (Op) {
17843-
case AtomicRMWInst::Xchg: {
17843+
case AtomicRMWInst::Xchg:
1784417844
// PCIe supports add and xchg for system atomics.
1784517845
return isAtomicRMWLegalXChgTy(RMW)
1784617846
? TargetLowering::AtomicExpansionKind::None
1784717847
: TargetLowering::AtomicExpansionKind::CmpXChg;
17848-
}
1784917848
case AtomicRMWInst::Add:
17850-
case AtomicRMWInst::And:
17851-
case AtomicRMWInst::UIncWrap:
17852-
case AtomicRMWInst::UDecWrap:
17849+
// PCIe supports add and xchg for system atomics.
1785317850
return atomicSupportedIfLegalIntType(RMW);
1785417851
case AtomicRMWInst::Sub:
17852+
case AtomicRMWInst::And:
1785517853
case AtomicRMWInst::Or:
17856-
case AtomicRMWInst::Xor: {
17857-
// Atomic sub/or/xor do not work over PCI express, but atomic add
17858-
// does. InstCombine transforms these with 0 to or, so undo that.
17859-
if (HasSystemScope && AMDGPU::isFlatGlobalAddrSpace(AS)) {
17860-
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
17861-
ConstVal && ConstVal->isNullValue())
17862-
return AtomicExpansionKind::Expand;
17854+
case AtomicRMWInst::Xor:
17855+
case AtomicRMWInst::Max:
17856+
case AtomicRMWInst::Min:
17857+
case AtomicRMWInst::UMax:
17858+
case AtomicRMWInst::UMin:
17859+
case AtomicRMWInst::UIncWrap:
17860+
case AtomicRMWInst::UDecWrap: {
17861+
if (AMDGPU::isFlatGlobalAddrSpace(AS) ||
17862+
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
17863+
// Always expand system scope atomics.
17864+
if (HasSystemScope && !Subtarget->hasEmulatedSystemScopeAtomics()) {
17865+
if (Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Or ||
17866+
Op == AtomicRMWInst::Xor) {
17867+
// Atomic sub/or/xor do not work over PCI express, but atomic add
17868+
// does. InstCombine transforms these with 0 to or, so undo that.
17869+
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
17870+
ConstVal && ConstVal->isNullValue())
17871+
return AtomicExpansionKind::Expand;
17872+
}
17873+
17874+
return AtomicExpansionKind::CmpXChg;
17875+
}
1786317876
}
1786417877

1786517878
return atomicSupportedIfLegalIntType(RMW);
@@ -18014,18 +18027,6 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1801418027

1801518028
return AtomicExpansionKind::CmpXChg;
1801618029
}
18017-
case AtomicRMWInst::Min:
18018-
case AtomicRMWInst::Max:
18019-
case AtomicRMWInst::UMin:
18020-
case AtomicRMWInst::UMax: {
18021-
if (AMDGPU::isFlatGlobalAddrSpace(AS) ||
18022-
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
18023-
if (HasSystemScope && !Subtarget->hasEmulatedSystemScopeAtomics())
18024-
return AtomicExpansionKind::CmpXChg;
18025-
}
18026-
18027-
return atomicSupportedIfLegalIntType(RMW);
18028-
}
1802918030
case AtomicRMWInst::Nand:
1803018031
case AtomicRMWInst::FSub:
1803118032
default:

0 commit comments

Comments
 (0)