Skip to content

Commit 7f18eb6

Browse files
author
git apple-llvm automerger
committed
Merge commit 'c2eddec4ff42' from llvm.org/main into next
2 parents ef85732 + c2eddec commit 7f18eb6

File tree

5 files changed

+1507
-20
lines changed

5 files changed

+1507
-20
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1013,6 +1013,14 @@ def FeatureAgentScopeFineGrainedRemoteMemoryAtomics
10131013
"device memory."
10141014
>;
10151015

1016+
def FeatureEmulatedSystemScopeAtomics
1017+
: SubtargetFeature<"emulated-system-scope-atomics",
1018+
"HasEmulatedSystemScopeAtomics",
1019+
"true",
1020+
"System scope atomics unsupported by the PCI-e are emulated in HW via CAS "
1021+
"loop and functional."
1022+
>;
1023+
10161024
def FeatureDefaultComponentZero : SubtargetFeature<"default-component-zero",
10171025
"HasDefaultComponentZero",
10181026
"true",
@@ -2062,6 +2070,7 @@ def FeatureISAVersion12_50 : FeatureSet<
20622070
FeatureAtomicFMinFMaxF64FlatInsts,
20632071
FeatureFlatBufferGlobalAtomicFaddF64Inst,
20642072
FeatureMemoryAtomicFAddF32DenormalSupport,
2073+
FeatureEmulatedSystemScopeAtomics,
20652074
FeatureGloballyAddressableScratch,
20662075
FeatureKernargPreload,
20672076
FeatureVmemPrefInsts,

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
187187
bool HasFlatBufferGlobalAtomicFaddF64Inst = false;
188188
bool HasDefaultComponentZero = false;
189189
bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false;
190+
bool HasEmulatedSystemScopeAtomics = false;
190191
bool HasDefaultComponentBroadcast = false;
191192
bool HasXF32Insts = false;
192193
/// The maximum number of instructions that may be placed within an S_CLAUSE,
@@ -950,6 +951,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
950951
return HasAgentScopeFineGrainedRemoteMemoryAtomics;
951952
}
952953

954+
/// \return true is HW emulates system scope atomics unsupported by the PCI-e
955+
/// via CAS loop.
956+
bool hasEmulatedSystemScopeAtomics() const {
957+
return HasEmulatedSystemScopeAtomics;
958+
}
959+
953960
bool hasDefaultComponentZero() const { return HasDefaultComponentZero; }
954961

955962
bool hasDefaultComponentBroadcast() const {

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17695,6 +17695,8 @@ static bool globalMemoryFPAtomicIsLegal(const GCNSubtarget &Subtarget,
1769517695
if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics() &&
1769617696
RMW->hasMetadata("amdgpu.no.remote.memory"))
1769717697
return true;
17698+
if (Subtarget.hasEmulatedSystemScopeAtomics())
17699+
return true;
1769817700
} else if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics())
1769917701
return true;
1770017702

@@ -17942,8 +17944,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1794217944
case AtomicRMWInst::UMax: {
1794317945
if (AMDGPU::isFlatGlobalAddrSpace(AS) ||
1794417946
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
17945-
// Always expand system scope min/max atomics.
17946-
if (HasSystemScope)
17947+
if (HasSystemScope && !Subtarget->hasEmulatedSystemScopeAtomics())
1794717948
return AtomicExpansionKind::CmpXChg;
1794817949
}
1794917950

0 commit comments

Comments
 (0)