Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst
"Has flat_atomic_add_f32 instruction"
>;

def FeatureFlatBufferGlobalAtomicFaddF64Inst
: SubtargetFeature<"flat-buffer-global-fadd-f64-inst",
"HasFlatBufferGlobalAtomicFaddF64Inst",
"true",
"Has flat, buffer, and global instructions for f64 atomic fadd"
>;

def FeatureMemoryAtomicFAddF32DenormalSupport
: SubtargetFeature<"memory-atomic-fadd-f32-denormal-support",
"HasMemoryAtomicFaddF32DenormalSupport",
Expand Down Expand Up @@ -1390,7 +1397,8 @@ def FeatureISAVersion9_0_A : FeatureSet<
FeatureBackOffBarrier,
FeatureKernargPreload,
FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF64FlatInsts
FeatureAtomicFMinFMaxF64FlatInsts,
FeatureFlatBufferGlobalAtomicFaddF64Inst
])>;

def FeatureISAVersion9_0_C : FeatureSet<
Expand Down Expand Up @@ -1435,7 +1443,8 @@ def FeatureISAVersion9_4_Common : FeatureSet<
FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF64FlatInsts,
FeatureAgentScopeFineGrainedRemoteMemoryAtomics,
FeatureMemoryAtomicFAddF32DenormalSupport
FeatureMemoryAtomicFAddF32DenormalSupport,
FeatureFlatBufferGlobalAtomicFaddF64Inst
]>;

def FeatureISAVersion9_4_0 : FeatureSet<
Expand Down Expand Up @@ -1932,11 +1941,9 @@ def isGFX12Plus :
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
AssemblerPredicate<(all_of FeatureFlatAddressSpace)>;


def HasBufferFlatGlobalAtomicsF64 : // FIXME: Rename to show it's only for fadd
Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">,
// FIXME: This is too coarse, and working around using pseudo's predicates on real instruction.
AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>;
def HasFlatBufferGlobalAtomicFaddF64Inst :
Predicate<"Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst()">,
AssemblerPredicate<(any_of FeatureFlatBufferGlobalAtomicFaddF64Inst)>;

def HasAtomicFMinFMaxF32GlobalInsts :
Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">,
Expand Down
10 changes: 6 additions & 4 deletions llvm/lib/Target/AMDGPU/BUFInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1312,14 +1312,16 @@ let SubtargetPredicate = isGFX90APlus in {
}
} // End SubtargetPredicate = isGFX90APlus

let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in {
defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>;
} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst

let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
// Note the names can be buffer_atomic_fmin_x2/buffer_atomic_fmax_x2
// depending on some subtargets.
defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>;
defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>;
} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
}

def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> {
let SubtargetPredicate = isGFX940Plus;
Expand Down Expand Up @@ -1836,9 +1838,9 @@ let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in {
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>;
} // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts

let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in {
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">;
} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst

let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">;
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/FLATInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -772,10 +772,10 @@ defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64",
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>;
}

let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in {
defm FLAT_ATOMIC_ADD_F64 : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64>;
defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64>;
} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst

let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in {
defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16", VGPR_32, v2f16>;
Expand Down Expand Up @@ -1655,7 +1655,7 @@ defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin",
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>;
}

let OtherPredicates = [HasBufferFlatGlobalAtomicsF64] in {
let OtherPredicates = [HasFlatBufferGlobalAtomicFaddF64Inst] in {
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>;
defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f64>;
defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_global_atomic_fadd", "global_addrspace", f64>;
Expand Down
10 changes: 7 additions & 3 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasAtomicGlobalPkAddBF16Inst = false;
bool HasAtomicBufferPkAddBF16Inst = false;
bool HasFlatAtomicFaddF32Inst = false;
bool HasFlatBufferGlobalAtomicFaddF64Inst = false;
bool HasDefaultComponentZero = false;
bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false;
bool HasDefaultComponentBroadcast = false;
Expand Down Expand Up @@ -660,9 +661,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return GFX10_BEncoding;
}

// BUFFER/FLAT/GLOBAL_ATOMIC_ADD/MIN/MAX_F64
bool hasBufferFlatGlobalAtomicsF64() const { return hasGFX90AInsts(); }

bool hasExportInsts() const {
return !hasGFX940Insts();
}
Expand Down Expand Up @@ -873,6 +871,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,

bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }

/// \return true if the target has flat, global, and buffer atomic fadd for
/// double.
bool hasFlatBufferGlobalAtomicFaddF64Inst() const {
return HasFlatBufferGlobalAtomicFaddF64Inst;
}

/// \return true if the target's flat, global, and buffer atomic fadd for
/// float supports denormal handling.
bool hasMemoryAtomicFaddF32DenormalSupport() const {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16216,7 +16216,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
return AtomicExpansionKind::CmpXChg;

// global and flat atomic fadd f64: gfx90a, gfx940.
if (Subtarget->hasGFX90AInsts() && Ty->isDoubleTy())
if (Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst() && Ty->isDoubleTy())
return ReportUnsafeHWInst(AtomicExpansionKind::None);

if (AS != AMDGPUAS::FLAT_ADDRESS) {
Expand Down