Skip to content

Commit baaf961

Browse files
committed
AMDGPU: Add subtarget feature for memory atomic fadd f64
1 parent 4594135 commit baaf961

File tree

5 files changed

+31
-18
lines changed

5 files changed

+31
-18
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst
788788
"Has flat_atomic_add_f32 instruction"
789789
>;
790790

791+
def FeatureFlatBufferGlobalAtomicFaddF64Inst
792+
: SubtargetFeature<"flat-buffer-global-fadd-f64-inst",
793+
"HasFlatBufferGlobalAtomicFaddF64Inst",
794+
"true",
795+
"Has flat, buffer, and global instructions for f64 atomic fadd"
796+
>;
797+
791798
def FeatureMemoryAtomicFaddF32DenormalSupport
792799
: SubtargetFeature<"memory-atomic-fadd-f32-denormal-support",
793800
"HasAtomicMemoryAtomicFaddF32DenormalSupport",
@@ -1388,7 +1395,8 @@ def FeatureISAVersion9_0_A : FeatureSet<
13881395
FeatureBackOffBarrier,
13891396
FeatureKernargPreload,
13901397
FeatureAtomicFMinFMaxF64GlobalInsts,
1391-
FeatureAtomicFMinFMaxF64FlatInsts
1398+
FeatureAtomicFMinFMaxF64FlatInsts,
1399+
FeatureFlatBufferGlobalAtomicFaddF64Inst
13921400
])>;
13931401

13941402
def FeatureISAVersion9_0_C : FeatureSet<
@@ -1433,7 +1441,8 @@ def FeatureISAVersion9_4_Common : FeatureSet<
14331441
FeatureAtomicFMinFMaxF64GlobalInsts,
14341442
FeatureAtomicFMinFMaxF64FlatInsts,
14351443
FeatureAgentScopeFineGrainedRemoteMemoryAtomics,
1436-
FeatureMemoryAtomicFaddF32DenormalSupport
1444+
FeatureMemoryAtomicFaddF32DenormalSupport,
1445+
FeatureFlatBufferGlobalAtomicFaddF64Inst
14371446
]>;
14381447

14391448
def FeatureISAVersion9_4_0 : FeatureSet<
@@ -1928,11 +1937,9 @@ def isGFX12Plus :
19281937
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
19291938
AssemblerPredicate<(all_of FeatureFlatAddressSpace)>;
19301939

1931-
1932-
def HasBufferFlatGlobalAtomicsF64 : // FIXME: Rename to show it's only for fadd
1933-
Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">,
1934-
// FIXME: This is too coarse, and working around using pseudo's predicates on real instruction.
1935-
AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>;
1940+
def HasFlatBufferGlobalAtomicFaddF64Inst :
1941+
Predicate<"Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst()">,
1942+
AssemblerPredicate<(any_of FeatureFlatBufferGlobalAtomicFaddF64Inst)>;
19361943

19371944
def HasAtomicFMinFMaxF32GlobalInsts :
19381945
Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">,

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1312,14 +1312,16 @@ let SubtargetPredicate = isGFX90APlus in {
13121312
}
13131313
} // End SubtargetPredicate = isGFX90APlus
13141314

1315-
let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
1315+
let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in {
13161316
defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>;
1317+
} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst
13171318

1319+
let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
13181320
// Note the names can be buffer_atomic_fmin_x2/buffer_atomic_fmax_x2
13191321
// depending on some subtargets.
13201322
defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>;
13211323
defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>;
1322-
} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
1324+
}
13231325

13241326
def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> {
13251327
let SubtargetPredicate = isGFX940Plus;
@@ -1836,9 +1838,9 @@ let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in {
18361838
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>;
18371839
} // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts
18381840

1839-
let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
1841+
let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in {
18401842
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">;
1841-
} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
1843+
} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst
18421844

18431845
let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
18441846
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">;

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -772,10 +772,10 @@ defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64",
772772
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>;
773773
}
774774

775-
let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
775+
let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in {
776776
defm FLAT_ATOMIC_ADD_F64 : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64>;
777777
defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64>;
778-
} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
778+
} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst
779779

780780
let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in {
781781
defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16", VGPR_32, v2f16>;
@@ -1654,7 +1654,7 @@ defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin",
16541654
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>;
16551655
}
16561656

1657-
let OtherPredicates = [HasBufferFlatGlobalAtomicsF64] in {
1657+
let OtherPredicates = [HasFlatBufferGlobalAtomicFaddF64Inst] in {
16581658
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>;
16591659
defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f64>;
16601660
defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_global_atomic_fadd", "global_addrspace", f64>;

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
174174
bool HasAtomicGlobalPkAddBF16Inst = false;
175175
bool HasAtomicBufferPkAddBF16Inst = false;
176176
bool HasFlatAtomicFaddF32Inst = false;
177+
bool HasFlatBufferGlobalAtomicFaddF64Inst = false;
177178
bool HasDefaultComponentZero = false;
178179
bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false;
179180
bool HasDefaultComponentBroadcast = false;
@@ -660,9 +661,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
660661
return GFX10_BEncoding;
661662
}
662663

663-
// BUFFER/FLAT/GLOBAL_ATOMIC_ADD/MIN/MAX_F64
664-
bool hasBufferFlatGlobalAtomicsF64() const { return hasGFX90AInsts(); }
665-
666664
bool hasExportInsts() const {
667665
return !hasGFX940Insts();
668666
}
@@ -873,6 +871,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
873871

874872
bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
875873

874+
/// \return true if the target has flat, global, and buffer atomic fadd for
875+
/// double.
876+
bool hasFlatBufferGlobalAtomicFaddF64Inst() const {
877+
return HasFlatBufferGlobalAtomicFaddF64Inst;
878+
}
879+
876880
/// \return true if the target's flat, global, and buffer atomic fadd for
877881
/// float supports denormal handling.
878882
bool hasMemoryAtomicFaddF32DenormalSupport() const {

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16028,7 +16028,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1602816028
return AtomicExpansionKind::CmpXChg;
1602916029

1603016030
// global and flat atomic fadd f64: gfx90a, gfx940.
16031-
if (Subtarget->hasGFX90AInsts() && Ty->isDoubleTy())
16031+
if (Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst() && Ty->isDoubleTy())
1603216032
return ReportUnsafeHWInst(AtomicExpansionKind::None);
1603316033

1603416034
if (AS != AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) {

0 commit comments

Comments
 (0)