@@ -16118,6 +16118,34 @@ static bool isBFloat2(Type *Ty) {
1611816118 return VT && VT->getNumElements() == 2 && VT->getElementType()->isBFloatTy();
1611916119}
1612016120
16121+ /// \returns true if it's valid to emit a native instruction for \p RMW, based
16122+ /// on the properties of the target memory.
16123+ static bool globalMemoryFPAtomicIsLegal(const GCNSubtarget &Subtarget,
16124+ const AtomicRMWInst *RMW,
16125+ bool HasSystemScope) {
16126+ // The remote/fine-grained access logic is different from the integer
16127+ // atomics. Without AgentScopeFineGrainedRemoteMemoryAtomics support,
16128+ // fine-grained access does not work, even for a device local allocation.
16129+ //
16130+ // With AgentScopeFineGrainedRemoteMemoryAtomics, system scoped device local
16131+ // allocations work.
16132+ if (HasSystemScope) {
16133+ if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics() &&
16134+ RMW->hasMetadata("amdgpu.no.remote.memory"))
16135+ return true;
16136+ } else if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics())
16137+ return true;
16138+
16139+ if (RMW->hasMetadata("amdgpu.no.fine.grained.memory"))
16140+ return true;
16141+
16142+ // TODO: Auto-upgrade this attribute to the metadata in function body and stop
16143+ // checking it.
16144+ return RMW->getFunction()
16145+ ->getFnAttribute("amdgpu-unsafe-fp-atomics")
16146+ .getValueAsBool();
16147+ }
16148+
1612116149TargetLowering::AtomicExpansionKind
1612216150SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1612316151 unsigned AS = RMW->getPointerAddressSpace();
@@ -16268,37 +16296,32 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1626816296 Type *Ty = RMW->getType();
1626916297
1627016298 // LDS float and double fmin/fmax were always supported.
16271- if (AS == AMDGPUAS::LOCAL_ADDRESS && (Ty->isFloatTy() || Ty->isDoubleTy()))
16272- return AtomicExpansionKind::None;
16273-
16274- if (unsafeFPAtomicsDisabled(RMW->getFunction()))
16275- return AtomicExpansionKind::CmpXChg;
16276-
16277- // Always expand system scope fp atomics.
16278- if (HasSystemScope)
16279- return AtomicExpansionKind::CmpXChg;
16299+ if (AS == AMDGPUAS::LOCAL_ADDRESS) {
16300+ return Ty->isFloatTy() || Ty->isDoubleTy() ? AtomicExpansionKind::None
16301+ : AtomicExpansionKind::CmpXChg;
16302+ }
1628016303
16281- // For flat and global cases:
16282- // float, double in gfx7. Manual claims denormal support.
16283- // Removed in gfx8 .
16284- // float, double restored in gfx10 .
16285- // double removed again in gfx11, so only f32 for gfx11/gfx12 .
16286- //
16287- // For gfx9, gfx90a and gfx940 support f64 for global (same as fadd), but no
16288- // f32.
16289- //
16290- // FIXME: Check scope and fine grained memory
16291- if (AS == AMDGPUAS::FLAT_ADDRESS) {
16292- if (Subtarget->hasAtomicFMinFMaxF32FlatInsts() && Ty->isFloatTy())
16293- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16294- if (Subtarget->hasAtomicFMinFMaxF64FlatInsts() && Ty->isDoubleTy())
16295- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16296- } else if (AMDGPU::isExtendedGlobalAddrSpace(AS) ||
16297- AS == AMDGPUAS::BUFFER_FAT_POINTER) {
16298- if (Subtarget->hasAtomicFMinFMaxF32GlobalInsts() && Ty->isFloatTy())
16299- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16300- if (Subtarget->hasAtomicFMinFMaxF64GlobalInsts() && Ty->isDoubleTy())
16301- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16304+ if (globalMemoryFPAtomicIsLegal(*Subtarget, RMW, HasSystemScope)) {
16305+ // For flat and global cases:
16306+ // float, double in gfx7. Manual claims denormal support .
16307+ // Removed in gfx8 .
16308+ // float, double restored in gfx10 .
16309+ // double removed again in gfx11, so only f32 for gfx11/gfx12.
16310+ //
16311+ // For gfx9, gfx90a and gfx940 support f64 for global (same as fadd), but
16312+ // no f32.
16313+ if (AS == AMDGPUAS::FLAT_ADDRESS) {
16314+ if (Subtarget->hasAtomicFMinFMaxF32FlatInsts() && Ty->isFloatTy())
16315+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16316+ if (Subtarget->hasAtomicFMinFMaxF64FlatInsts() && Ty->isDoubleTy())
16317+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16318+ } else if (AMDGPU::isExtendedGlobalAddrSpace(AS) ||
16319+ AS == AMDGPUAS::BUFFER_FAT_POINTER) {
16320+ if (Subtarget->hasAtomicFMinFMaxF32GlobalInsts() && Ty->isFloatTy())
16321+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16322+ if (Subtarget->hasAtomicFMinFMaxF64GlobalInsts() && Ty->isDoubleTy())
16323+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16324+ }
1630216325 }
1630316326
1630416327 return AtomicExpansionKind::CmpXChg;
0 commit comments