@@ -16075,56 +16075,49 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1607516075 return AtomicExpansionKind::CmpXChg;
1607616076 }
1607716077
16078- if (!Ty->isFloatTy( ) && (!Subtarget->hasGFX90AInsts() || !Ty->isDoubleTy()) )
16078+ if (!AMDGPU::isFlatGlobalAddrSpace(AS ) && AS != AMDGPUAS::BUFFER_FAT_POINTER )
1607916079 return AtomicExpansionKind::CmpXChg;
1608016080
16081- if ((AMDGPU::isFlatGlobalAddrSpace(AS) ||
16082- AS == AMDGPUAS::BUFFER_FAT_POINTER) &&
16083- Subtarget->hasAtomicFaddNoRtnInsts()) {
16084- if (Subtarget->hasGFX940Insts())
16085- return AtomicExpansionKind::None;
16081+ // TODO: gfx940 supports v2f16 and v2bf16
16082+ if (Subtarget->hasGFX940Insts() && (Ty->isFloatTy() || Ty->isDoubleTy()))
16083+ return AtomicExpansionKind::None;
1608616084
16087- if (unsafeFPAtomicsDisabled(RMW->getFunction()))
16088- return AtomicExpansionKind::CmpXChg;
16085+ if (unsafeFPAtomicsDisabled(RMW->getFunction()))
16086+ return AtomicExpansionKind::CmpXChg;
1608916087
16090- // Always expand system scope fp atomics.
16091- if (HasSystemScope)
16092- return AtomicExpansionKind::CmpXChg;
16088+ // Always expand system scope fp atomics.
16089+ if (HasSystemScope)
16090+ return AtomicExpansionKind::CmpXChg;
1609316091
16094- if ((AMDGPU::isExtendedGlobalAddrSpace(AS) ||
16095- AS == AMDGPUAS::BUFFER_FAT_POINTER) &&
16096- Ty->isFloatTy()) {
16097- // global/buffer atomic fadd f32 no-rtn: gfx908, gfx90a, gfx940, gfx11+.
16098- if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts())
16099- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16100- // global/buffer atomic fadd f32 rtn: gfx90a, gfx940, gfx11+.
16101- if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts())
16102- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16103- }
16092+ // global and flat atomic fadd f64: gfx90a, gfx940.
16093+ if (Subtarget->hasGFX90AInsts() && Ty->isDoubleTy())
16094+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
1610416095
16105- // flat atomic fadd f32: gfx940, gfx11+.
16106- if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy() &&
16107- Subtarget->hasFlatAtomicFaddF32Inst())
16096+ if (AS != AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) {
16097+ // global/buffer atomic fadd f32 no-rtn: gfx908, gfx90a, gfx940, gfx11+.
16098+ if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts())
16099+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16100+ // global/buffer atomic fadd f32 rtn: gfx90a, gfx940, gfx11+.
16101+ if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts())
1610816102 return ReportUnsafeHWInst(AtomicExpansionKind::None);
16103+ }
1610916104
16110- // global and flat atomic fadd f64: gfx90a, gfx940.
16111- if (Ty->isDoubleTy() && Subtarget->hasGFX90AInsts())
16105+ // flat atomic fadd f32: gfx940, gfx11+.
16106+ if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) {
16107+ if (Subtarget->hasFlatAtomicFaddF32Inst())
1611216108 return ReportUnsafeHWInst(AtomicExpansionKind::None);
1611316109
1611416110 // If it is in flat address space, and the type is float, we will try to
1611516111 // expand it, if the target supports global and lds atomic fadd. The
1611616112 // reason we need that is, in the expansion, we emit the check of address
1611716113 // space. If it is in global address space, we emit the global atomic
1611816114 // fadd; if it is in shared address space, we emit the LDS atomic fadd.
16119- if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy() &&
16120- Subtarget->hasLDSFPAtomicAddF32()) {
16115+ if (Subtarget->hasLDSFPAtomicAddF32()) {
1612116116 if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts())
1612216117 return AtomicExpansionKind::Expand;
1612316118 if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts())
1612416119 return AtomicExpansionKind::Expand;
1612516120 }
16126-
16127- return AtomicExpansionKind::CmpXChg;
1612816121 }
1612916122
1613016123 return AtomicExpansionKind::CmpXChg;
0 commit comments