@@ -16123,6 +16123,34 @@ static bool isBFloat2(Type *Ty) {
1612316123 return VT && VT->getNumElements() == 2 && VT->getElementType()->isBFloatTy();
1612416124}
1612516125
16126+ /// \returns true if it's valid to emit a native instruction for \p RMW, based
16127+ /// on the properties of the target memory.
16128+ static bool globalMemoryFPAtomicIsLegal(const GCNSubtarget &Subtarget,
16129+ const AtomicRMWInst *RMW,
16130+ bool HasSystemScope) {
16131+ // The remote/fine-grained access logic is different from the integer
16132+ // atomics. Without AgentScopeFineGrainedRemoteMemoryAtomics support,
16133+ // fine-grained access does not work, even for a device local allocation.
16134+ //
16135+ // With AgentScopeFineGrainedRemoteMemoryAtomics, system scoped device local
16136+ // allocations work.
16137+ if (HasSystemScope) {
16138+ if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics() &&
16139+ RMW->hasMetadata("amdgpu.no.remote.memory"))
16140+ return true;
16141+ } else if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics())
16142+ return true;
16143+
16144+ if (RMW->hasMetadata("amdgpu.no.fine.grained.memory"))
16145+ return true;
16146+
16147+ // TODO: Auto-upgrade this attribute to the metadata in function body and stop
16148+ // checking it.
16149+ return RMW->getFunction()
16150+ ->getFnAttribute("amdgpu-unsafe-fp-atomics")
16151+ .getValueAsBool();
16152+ }
16153+
1612616154TargetLowering::AtomicExpansionKind
1612716155SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1612816156 unsigned AS = RMW->getPointerAddressSpace();
@@ -16266,37 +16294,32 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1626616294 Type *Ty = RMW->getType();
1626716295
1626816296 // LDS float and double fmin/fmax were always supported.
16269- if (AS == AMDGPUAS::LOCAL_ADDRESS && (Ty->isFloatTy() || Ty->isDoubleTy()))
16270- return AtomicExpansionKind::None;
16271-
16272- if (unsafeFPAtomicsDisabled(RMW->getFunction()))
16273- return AtomicExpansionKind::CmpXChg;
16274-
16275- // Always expand system scope fp atomics.
16276- if (HasSystemScope)
16277- return AtomicExpansionKind::CmpXChg;
16297+ if (AS == AMDGPUAS::LOCAL_ADDRESS) {
16298+ return Ty->isFloatTy() || Ty->isDoubleTy() ? AtomicExpansionKind::None
16299+ : AtomicExpansionKind::CmpXChg;
16300+ }
1627816301
16279- // For flat and global cases:
16280- // float, double in gfx7. Manual claims denormal support.
16281- // Removed in gfx8 .
16282- // float, double restored in gfx10 .
16283- // double removed again in gfx11, so only f32 for gfx11/gfx12 .
16284- //
16285- // For gfx9, gfx90a and gfx940 support f64 for global (same as fadd), but no
16286- // f32.
16287- //
16288- // FIXME: Check scope and fine grained memory
16289- if (AS == AMDGPUAS::FLAT_ADDRESS) {
16290- if (Subtarget->hasAtomicFMinFMaxF32FlatInsts() && Ty->isFloatTy())
16291- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16292- if (Subtarget->hasAtomicFMinFMaxF64FlatInsts() && Ty->isDoubleTy())
16293- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16294- } else if (AMDGPU::isExtendedGlobalAddrSpace(AS) ||
16295- AS == AMDGPUAS::BUFFER_FAT_POINTER) {
16296- if (Subtarget->hasAtomicFMinFMaxF32GlobalInsts() && Ty->isFloatTy())
16297- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16298- if (Subtarget->hasAtomicFMinFMaxF64GlobalInsts() && Ty->isDoubleTy())
16299- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16302+ if (globalMemoryFPAtomicIsLegal(*Subtarget, RMW, HasSystemScope)) {
16303+ // For flat and global cases:
16304+ // float, double in gfx7. Manual claims denormal support .
16305+ // Removed in gfx8 .
16306+ // float, double restored in gfx10 .
16307+ // double removed again in gfx11, so only f32 for gfx11/gfx12.
16308+ //
16309+ // For gfx9, gfx90a and gfx940 support f64 for global (same as fadd), but
16310+ // no f32.
16311+ if (AS == AMDGPUAS::FLAT_ADDRESS) {
16312+ if (Subtarget->hasAtomicFMinFMaxF32FlatInsts() && Ty->isFloatTy())
16313+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16314+ if (Subtarget->hasAtomicFMinFMaxF64FlatInsts() && Ty->isDoubleTy())
16315+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16316+ } else if (AMDGPU::isExtendedGlobalAddrSpace(AS) ||
16317+ AS == AMDGPUAS::BUFFER_FAT_POINTER) {
16318+ if (Subtarget->hasAtomicFMinFMaxF32GlobalInsts() && Ty->isFloatTy())
16319+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16320+ if (Subtarget->hasAtomicFMinFMaxF64GlobalInsts() && Ty->isDoubleTy())
16321+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16322+ }
1630016323 }
1630116324
1630216325 return AtomicExpansionKind::CmpXChg;
0 commit comments