@@ -16114,6 +16114,34 @@ static bool isBFloat2(Type *Ty) {
1611416114 return VT && VT->getNumElements() == 2 && VT->getElementType()->isBFloatTy();
1611516115}
1611616116
16117+ /// \returns true if it's valid to emit a native instruction for \p RMW, based
16118+ /// on the properties of the target memory.
16119+ static bool globalMemoryFPAtomicIsLegal(const GCNSubtarget &Subtarget,
16120+ const AtomicRMWInst *RMW,
16121+ bool HasSystemScope) {
16122+ // The remote/fine-grained access logic is different from the integer
16123+ // atomics. Without AgentScopeFineGrainedRemoteMemoryAtomics support,
16124+ // fine-grained access does not work, even for a device local allocation.
16125+ //
16126+ // With AgentScopeFineGrainedRemoteMemoryAtomics, system scoped device local
16127+ // allocations work.
16128+ if (HasSystemScope) {
16129+ if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics() &&
16130+ RMW->hasMetadata("amdgpu.no.remote.memory"))
16131+ return true;
16132+ } else if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics())
16133+ return true;
16134+
16135+ if (RMW->hasMetadata("amdgpu.no.fine.grained.memory"))
16136+ return true;
16137+
16138+ // TODO: Auto-upgrade this attribute to the metadata in function body and stop
16139+ // checking it.
16140+ return RMW->getFunction()
16141+ ->getFnAttribute("amdgpu-unsafe-fp-atomics")
16142+ .getValueAsBool();
16143+ }
16144+
1611716145TargetLowering::AtomicExpansionKind
1611816146SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1611916147 unsigned AS = RMW->getPointerAddressSpace();
@@ -16264,37 +16292,32 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1626416292 Type *Ty = RMW->getType();
1626516293
1626616294 // LDS float and double fmin/fmax were always supported.
16267- if (AS == AMDGPUAS::LOCAL_ADDRESS && (Ty->isFloatTy() || Ty->isDoubleTy()))
16268- return AtomicExpansionKind::None;
16269-
16270- if (unsafeFPAtomicsDisabled(RMW->getFunction()))
16271- return AtomicExpansionKind::CmpXChg;
16272-
16273- // Always expand system scope fp atomics.
16274- if (HasSystemScope)
16275- return AtomicExpansionKind::CmpXChg;
16295+ if (AS == AMDGPUAS::LOCAL_ADDRESS) {
16296+ return Ty->isFloatTy() || Ty->isDoubleTy() ? AtomicExpansionKind::None
16297+ : AtomicExpansionKind::CmpXChg;
16298+ }
1627616299
16277- // For flat and global cases:
16278- // float, double in gfx7. Manual claims denormal support.
16279- // Removed in gfx8 .
16280- // float, double restored in gfx10 .
16281- // double removed again in gfx11, so only f32 for gfx11/gfx12 .
16282- //
16283- // For gfx9, gfx90a and gfx940 support f64 for global (same as fadd), but no
16284- // f32.
16285- //
16286- // FIXME: Check scope and fine grained memory
16287- if (AS == AMDGPUAS::FLAT_ADDRESS) {
16288- if (Subtarget->hasAtomicFMinFMaxF32FlatInsts() && Ty->isFloatTy())
16289- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16290- if (Subtarget->hasAtomicFMinFMaxF64FlatInsts() && Ty->isDoubleTy())
16291- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16292- } else if (AMDGPU::isExtendedGlobalAddrSpace(AS) ||
16293- AS == AMDGPUAS::BUFFER_FAT_POINTER) {
16294- if (Subtarget->hasAtomicFMinFMaxF32GlobalInsts() && Ty->isFloatTy())
16295- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16296- if (Subtarget->hasAtomicFMinFMaxF64GlobalInsts() && Ty->isDoubleTy())
16297- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16300+ if (globalMemoryFPAtomicIsLegal(*Subtarget, RMW, HasSystemScope)) {
16301+ // For flat and global cases:
16302+ // float, double in gfx7. Manual claims denormal support .
16303+ // Removed in gfx8 .
16304+ // float, double restored in gfx10 .
16305+ // double removed again in gfx11, so only f32 for gfx11/gfx12.
16306+ //
16307+ // For gfx9, gfx90a and gfx940 support f64 for global (same as fadd), but
16308+ // no f32.
16309+ if (AS == AMDGPUAS::FLAT_ADDRESS) {
16310+ if (Subtarget->hasAtomicFMinFMaxF32FlatInsts() && Ty->isFloatTy())
16311+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16312+ if (Subtarget->hasAtomicFMinFMaxF64FlatInsts() && Ty->isDoubleTy())
16313+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16314+ } else if (AMDGPU::isExtendedGlobalAddrSpace(AS) ||
16315+ AS == AMDGPUAS::BUFFER_FAT_POINTER) {
16316+ if (Subtarget->hasAtomicFMinFMaxF32GlobalInsts() && Ty->isFloatTy())
16317+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16318+ if (Subtarget->hasAtomicFMinFMaxF64GlobalInsts() && Ty->isDoubleTy())
16319+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16320+ }
1629816321 }
1629916322
1630016323 return AtomicExpansionKind::CmpXChg;
0 commit comments