@@ -16093,6 +16093,34 @@ static bool isBFloat2(Type *Ty) {
1609316093 return VT && VT->getNumElements() == 2 && VT->getElementType()->isBFloatTy();
1609416094}
1609516095
16096+ /// \returns true if it's valid to emit a native instruction for \p RMW, based
16097+ /// on the properties of the target memory.
16098+ static bool globalMemoryFPAtomicIsLegal(const GCNSubtarget &Subtarget,
16099+ const AtomicRMWInst *RMW,
16100+ bool HasSystemScope) {
16101+ // The remote/fine-grained access logic is different from the integer
16102+ // atomics. Without AgentScopeFineGrainedRemoteMemoryAtomics support,
16103+ // fine-grained access does not work, even for a device local allocation.
16104+ //
16105+ // With AgentScopeFineGrainedRemoteMemoryAtomics, system scoped device local
16106+ // allocations work.
16107+ if (HasSystemScope) {
16108+ if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics() &&
16109+ RMW->hasMetadata("amdgpu.no.remote.memory"))
16110+ return true;
16111+ } else if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics())
16112+ return true;
16113+
16114+ if (RMW->hasMetadata("amdgpu.no.fine.grained.memory"))
16115+ return true;
16116+
16117+ // TODO: Auto-upgrade this attribute to the metadata in function body and stop
16118+ // checking it.
16119+ return RMW->getFunction()
16120+ ->getFnAttribute("amdgpu-unsafe-fp-atomics")
16121+ .getValueAsBool();
16122+ }
16123+
1609616124TargetLowering::AtomicExpansionKind
1609716125SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1609816126 unsigned AS = RMW->getPointerAddressSpace();
@@ -16236,37 +16264,32 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1623616264 Type *Ty = RMW->getType();
1623716265
1623816266 // LDS float and double fmin/fmax were always supported.
16239- if (AS == AMDGPUAS::LOCAL_ADDRESS && (Ty->isFloatTy() || Ty->isDoubleTy()))
16240- return AtomicExpansionKind::None;
16241-
16242- if (unsafeFPAtomicsDisabled(RMW->getFunction()))
16243- return AtomicExpansionKind::CmpXChg;
16244-
16245- // Always expand system scope fp atomics.
16246- if (HasSystemScope)
16247- return AtomicExpansionKind::CmpXChg;
16267+ if (AS == AMDGPUAS::LOCAL_ADDRESS) {
16268+ return Ty->isFloatTy() || Ty->isDoubleTy() ? AtomicExpansionKind::None
16269+ : AtomicExpansionKind::CmpXChg;
16270+ }
1624816271
16249- // For flat and global cases:
16250- // float, double in gfx7. Manual claims denormal support.
16251- // Removed in gfx8 .
16252- // float, double restored in gfx10 .
16253- // double removed again in gfx11, so only f32 for gfx11/gfx12 .
16254- //
16255- // For gfx9, gfx90a and gfx940 support f64 for global (same as fadd), but no
16256- // f32.
16257- //
16258- // FIXME: Check scope and fine grained memory
16259- if (AS == AMDGPUAS::FLAT_ADDRESS) {
16260- if (Subtarget->hasAtomicFMinFMaxF32FlatInsts() && Ty->isFloatTy())
16261- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16262- if (Subtarget->hasAtomicFMinFMaxF64FlatInsts() && Ty->isDoubleTy())
16263- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16264- } else if (AMDGPU::isExtendedGlobalAddrSpace(AS) ||
16265- AS == AMDGPUAS::BUFFER_FAT_POINTER) {
16266- if (Subtarget->hasAtomicFMinFMaxF32GlobalInsts() && Ty->isFloatTy())
16267- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16268- if (Subtarget->hasAtomicFMinFMaxF64GlobalInsts() && Ty->isDoubleTy())
16269- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16272+ if (globalMemoryFPAtomicIsLegal(*Subtarget, RMW, HasSystemScope)) {
16273+ // For flat and global cases:
16274+ // float, double in gfx7. Manual claims denormal support .
16275+ // Removed in gfx8 .
16276+ // float, double restored in gfx10 .
16277+ // double removed again in gfx11, so only f32 for gfx11/gfx12.
16278+ //
16279+ // For gfx9, gfx90a and gfx940 support f64 for global (same as fadd), but
16280+ // no f32.
16281+ if (AS == AMDGPUAS::FLAT_ADDRESS) {
16282+ if (Subtarget->hasAtomicFMinFMaxF32FlatInsts() && Ty->isFloatTy())
16283+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16284+ if (Subtarget->hasAtomicFMinFMaxF64FlatInsts() && Ty->isDoubleTy())
16285+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16286+ } else if (AMDGPU::isExtendedGlobalAddrSpace(AS) ||
16287+ AS == AMDGPUAS::BUFFER_FAT_POINTER) {
16288+ if (Subtarget->hasAtomicFMinFMaxF32GlobalInsts() && Ty->isFloatTy())
16289+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16290+ if (Subtarget->hasAtomicFMinFMaxF64GlobalInsts() && Ty->isDoubleTy())
16291+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16292+ }
1627016293 }
1627116294
1627216295 return AtomicExpansionKind::CmpXChg;
0 commit comments