@@ -16110,6 +16110,34 @@ static bool isBFloat2(Type *Ty) {
1611016110 return VT && VT->getNumElements() == 2 && VT->getElementType()->isBFloatTy();
1611116111}
1611216112
16113+ /// \returns true if it's valid to emit a native instruction for \p RMW, based
16114+ /// on the properties of the target memory.
16115+ static bool globalMemoryFPAtomicIsLegal(const GCNSubtarget &Subtarget,
16116+ const AtomicRMWInst *RMW,
16117+ bool HasSystemScope) {
16118+ // The remote/fine-grained access logic is different from the integer
16119+ // atomics. Without AgentScopeFineGrainedRemoteMemoryAtomics support,
16120+ // fine-grained access does not work, even for a device local allocation.
16121+ //
16122+ // With AgentScopeFineGrainedRemoteMemoryAtomics, system scoped device local
16123+ // allocations work.
16124+ if (HasSystemScope) {
16125+ if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics() &&
16126+ RMW->hasMetadata("amdgpu.no.remote.memory"))
16127+ return true;
16128+ } else if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics())
16129+ return true;
16130+
16131+ if (RMW->hasMetadata("amdgpu.no.fine.grained.memory"))
16132+ return true;
16133+
16134+ // TODO: Auto-upgrade this attribute to the metadata in function body and stop
16135+ // checking it.
16136+ return RMW->getFunction()
16137+ ->getFnAttribute("amdgpu-unsafe-fp-atomics")
16138+ .getValueAsBool();
16139+ }
16140+
1611316141TargetLowering::AtomicExpansionKind
1611416142SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1611516143 unsigned AS = RMW->getPointerAddressSpace();
@@ -16253,37 +16281,32 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1625316281 Type *Ty = RMW->getType();
1625416282
1625516283 // LDS float and double fmin/fmax were always supported.
16256- if (AS == AMDGPUAS::LOCAL_ADDRESS && (Ty->isFloatTy() || Ty->isDoubleTy()))
16257- return AtomicExpansionKind::None;
16258-
16259- if (unsafeFPAtomicsDisabled(RMW->getFunction()))
16260- return AtomicExpansionKind::CmpXChg;
16261-
16262- // Always expand system scope fp atomics.
16263- if (HasSystemScope)
16264- return AtomicExpansionKind::CmpXChg;
16284+ if (AS == AMDGPUAS::LOCAL_ADDRESS) {
16285+ return Ty->isFloatTy() || Ty->isDoubleTy() ? AtomicExpansionKind::None
16286+ : AtomicExpansionKind::CmpXChg;
16287+ }
1626516288
16266- // For flat and global cases:
16267- // float, double in gfx7. Manual claims denormal support.
16268- // Removed in gfx8 .
16269- // float, double restored in gfx10 .
16270- // double removed again in gfx11, so only f32 for gfx11/gfx12 .
16271- //
16272- // For gfx9, gfx90a and gfx940 support f64 for global (same as fadd), but no
16273- // f32.
16274- //
16275- // FIXME: Check scope and fine grained memory
16276- if (AS == AMDGPUAS::FLAT_ADDRESS) {
16277- if (Subtarget->hasAtomicFMinFMaxF32FlatInsts() && Ty->isFloatTy())
16278- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16279- if (Subtarget->hasAtomicFMinFMaxF64FlatInsts() && Ty->isDoubleTy())
16280- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16281- } else if (AMDGPU::isExtendedGlobalAddrSpace(AS) ||
16282- AS == AMDGPUAS::BUFFER_FAT_POINTER) {
16283- if (Subtarget->hasAtomicFMinFMaxF32GlobalInsts() && Ty->isFloatTy())
16284- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16285- if (Subtarget->hasAtomicFMinFMaxF64GlobalInsts() && Ty->isDoubleTy())
16286- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16289+ if (globalMemoryFPAtomicIsLegal(*Subtarget, RMW, HasSystemScope)) {
16290+ // For flat and global cases:
16291+ // float, double in gfx7. Manual claims denormal support .
16292+ // Removed in gfx8 .
16293+ // float, double restored in gfx10 .
16294+ // double removed again in gfx11, so only f32 for gfx11/gfx12.
16295+ //
16296+ // For gfx9, gfx90a and gfx940 support f64 for global (same as fadd), but
16297+ // no f32.
16298+ if (AS == AMDGPUAS::FLAT_ADDRESS) {
16299+ if (Subtarget->hasAtomicFMinFMaxF32FlatInsts() && Ty->isFloatTy())
16300+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16301+ if (Subtarget->hasAtomicFMinFMaxF64FlatInsts() && Ty->isDoubleTy())
16302+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16303+ } else if (AMDGPU::isExtendedGlobalAddrSpace(AS) ||
16304+ AS == AMDGPUAS::BUFFER_FAT_POINTER) {
16305+ if (Subtarget->hasAtomicFMinFMaxF32GlobalInsts() && Ty->isFloatTy())
16306+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16307+ if (Subtarget->hasAtomicFMinFMaxF64GlobalInsts() && Ty->isDoubleTy())
16308+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16309+ }
1628716310 }
1628816311
1628916312 return AtomicExpansionKind::CmpXChg;
0 commit comments