Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -2966,11 +2966,6 @@ def int_amdgcn_mfma_f32_16x16x8bf16 : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v
// gfx90a intrinsics
// ===----------------------------------------------------------------------===//

def int_amdgcn_global_atomic_fmin : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
def int_amdgcn_global_atomic_fmax : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
def int_amdgcn_flat_atomic_fmin : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
def int_amdgcn_flat_atomic_fmax : AMDGPUAtomicRtn<llvm_anyfloat_ty>;

defset list<Intrinsic> AMDGPUMFMAIntrinsics90A = {
def int_amdgcn_mfma_f32_32x32x4bf16_1k : AMDGPUMfmaIntrinsic<llvm_v32f32_ty, llvm_v4i16_ty>;
def int_amdgcn_mfma_f32_16x16x4bf16_1k : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v4i16_ty>;
Expand Down
25 changes: 16 additions & 9 deletions llvm/lib/IR/AutoUpgrade.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1041,14 +1041,17 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
break; // No other 'amdgcn.atomic.*'
}

if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") ||
Name.starts_with("ds.fmax") ||
Name.starts_with("global.atomic.fadd") ||
Name.starts_with("flat.atomic.fadd")) {
// Replaced with atomicrmw fadd/fmin/fmax, so there's no new
// declaration.
NewFn = nullptr;
return true;
if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
Name.consume_front("flat.atomic.")) {
if (Name.starts_with("fadd") ||
// FIXME: We should also remove fmin.num and fmax.num intrinsics.
(Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
(Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
// Replaced with atomicrmw fadd/fmin/fmax, so there's no new
// declaration.
NewFn = nullptr;
return true;
}
}

if (Name.starts_with("ldexp.")) {
Expand Down Expand Up @@ -4218,7 +4221,11 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
.StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
.StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
.StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
.StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd);
.StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
.StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
.StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
.StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
.StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax);

unsigned NumOperands = CI->getNumOperands();
if (NumOperands < 3) // Malformed bitcode.
Expand Down
4 changes: 0 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -618,10 +618,6 @@ multiclass local_addr_space_atomic_op {
}
}

defm int_amdgcn_flat_atomic_fmin : noret_op;
defm int_amdgcn_flat_atomic_fmax : noret_op;
defm int_amdgcn_global_atomic_fmin : noret_op;
defm int_amdgcn_global_atomic_fmax : noret_op;
defm int_amdgcn_global_atomic_csub : noret_op;
defm int_amdgcn_global_atomic_ordered_add_b64 : noret_op;
defm int_amdgcn_flat_atomic_fmin_num : noret_op;
Expand Down
4 changes: 0 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4913,12 +4913,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case Intrinsic::amdgcn_global_atomic_csub:
case Intrinsic::amdgcn_global_atomic_fmin:
case Intrinsic::amdgcn_global_atomic_fmax:
case Intrinsic::amdgcn_global_atomic_fmin_num:
case Intrinsic::amdgcn_global_atomic_fmax_num:
case Intrinsic::amdgcn_flat_atomic_fmin:
case Intrinsic::amdgcn_flat_atomic_fmax:
case Intrinsic::amdgcn_flat_atomic_fmin_num:
case Intrinsic::amdgcn_flat_atomic_fmax_num:
case Intrinsic::amdgcn_atomic_cond_sub_u32:
Expand Down
4 changes: 0 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
Original file line number Diff line number Diff line change
Expand Up @@ -239,13 +239,9 @@ def : SourceOfDivergence<int_r600_read_tidig_y>;
def : SourceOfDivergence<int_r600_read_tidig_z>;
def : SourceOfDivergence<int_amdgcn_atomic_cond_sub_u32>;
def : SourceOfDivergence<int_amdgcn_global_atomic_csub>;
def : SourceOfDivergence<int_amdgcn_global_atomic_fmin>;
def : SourceOfDivergence<int_amdgcn_global_atomic_fmax>;
def : SourceOfDivergence<int_amdgcn_global_atomic_fmin_num>;
def : SourceOfDivergence<int_amdgcn_global_atomic_fmax_num>;
def : SourceOfDivergence<int_amdgcn_global_atomic_ordered_add_b64>;
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmin>;
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmax>;
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmin_num>;
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmax_num>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_swap>;
Expand Down
4 changes: 0 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1045,8 +1045,6 @@ bool GCNTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
switch (IID) {
case Intrinsic::amdgcn_is_shared:
case Intrinsic::amdgcn_is_private:
case Intrinsic::amdgcn_flat_atomic_fmax:
case Intrinsic::amdgcn_flat_atomic_fmin:
case Intrinsic::amdgcn_flat_atomic_fmax_num:
case Intrinsic::amdgcn_flat_atomic_fmin_num:
OpIndexes.push_back(0);
Expand Down Expand Up @@ -1106,8 +1104,6 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
return B.CreateIntrinsic(Intrinsic::ptrmask, {NewV->getType(), MaskTy},
{NewV, MaskOp});
}
case Intrinsic::amdgcn_flat_atomic_fmax:
case Intrinsic::amdgcn_flat_atomic_fmin:
case Intrinsic::amdgcn_flat_atomic_fmax_num:
case Intrinsic::amdgcn_flat_atomic_fmin_num: {
Type *DestTy = II->getType();
Expand Down
11 changes: 0 additions & 11 deletions llvm/lib/Target/AMDGPU/FLATInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1604,15 +1604,11 @@ let OtherPredicates = [isGFX12Plus] in {
let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in {
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>;
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin", f32>;
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax", f32>;
}

let SubtargetPredicate = HasAtomicFMinFMaxF32FlatInsts in {
defm : FlatAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_flat", f32>;
defm : FlatAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_flat", f32>;
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin", f32>;
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax", f32>;
}

let OtherPredicates = [isGFX12Only] in {
Expand Down Expand Up @@ -1642,13 +1638,6 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_F16", "atomic_load_fadd_globa
let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in {
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>;
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>;
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>;
}

let SubtargetPredicate = HasAtomicFMinFMaxF64FlatInsts in {
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>;
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>;
}

let OtherPredicates = [HasFlatBufferGlobalAtomicFaddF64Inst] in {
Expand Down
16 changes: 0 additions & 16 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1367,13 +1367,9 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
MachineMemOperand::MODereferenceable;
return true;
}
case Intrinsic::amdgcn_global_atomic_fmin:
case Intrinsic::amdgcn_global_atomic_fmax:
case Intrinsic::amdgcn_global_atomic_fmin_num:
case Intrinsic::amdgcn_global_atomic_fmax_num:
case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
case Intrinsic::amdgcn_flat_atomic_fmin:
case Intrinsic::amdgcn_flat_atomic_fmax:
case Intrinsic::amdgcn_flat_atomic_fmin_num:
case Intrinsic::amdgcn_flat_atomic_fmax_num:
case Intrinsic::amdgcn_atomic_cond_sub_u32: {
Expand Down Expand Up @@ -1485,14 +1481,10 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
case Intrinsic::amdgcn_ds_consume:
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap:
case Intrinsic::amdgcn_flat_atomic_fmax:
case Intrinsic::amdgcn_flat_atomic_fmax_num:
case Intrinsic::amdgcn_flat_atomic_fmin:
case Intrinsic::amdgcn_flat_atomic_fmin_num:
case Intrinsic::amdgcn_global_atomic_csub:
case Intrinsic::amdgcn_global_atomic_fmax:
case Intrinsic::amdgcn_global_atomic_fmax_num:
case Intrinsic::amdgcn_global_atomic_fmin:
case Intrinsic::amdgcn_global_atomic_fmin_num:
case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
case Intrinsic::amdgcn_global_load_tr_b64:
Expand Down Expand Up @@ -9397,12 +9389,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
DAG.setNodeMemRefs(NewNode, {MemRef});
return SDValue(NewNode, 0);
}
case Intrinsic::amdgcn_global_atomic_fmin:
case Intrinsic::amdgcn_global_atomic_fmax:
case Intrinsic::amdgcn_global_atomic_fmin_num:
case Intrinsic::amdgcn_global_atomic_fmax_num:
case Intrinsic::amdgcn_flat_atomic_fmin:
case Intrinsic::amdgcn_flat_atomic_fmax:
case Intrinsic::amdgcn_flat_atomic_fmin_num:
case Intrinsic::amdgcn_flat_atomic_fmax_num: {
MemSDNode *M = cast<MemSDNode>(Op);
Expand All @@ -9413,16 +9401,12 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
};
unsigned Opcode = 0;
switch (IntrID) {
case Intrinsic::amdgcn_global_atomic_fmin:
case Intrinsic::amdgcn_global_atomic_fmin_num:
case Intrinsic::amdgcn_flat_atomic_fmin:
case Intrinsic::amdgcn_flat_atomic_fmin_num: {
Opcode = ISD::ATOMIC_LOAD_FMIN;
break;
}
case Intrinsic::amdgcn_global_atomic_fmax:
case Intrinsic::amdgcn_global_atomic_fmax_num:
case Intrinsic::amdgcn_flat_atomic_fmax:
case Intrinsic::amdgcn_flat_atomic_fmax_num: {
Opcode = ISD::ATOMIC_LOAD_FMAX;
break;
Expand Down
64 changes: 64 additions & 0 deletions llvm/test/Bitcode/amdgcn-atomic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,70 @@ define float @upgrade_amdgcn_global_atomic_fadd_f32_p1_f32(ptr addrspace(1) %ptr
ret float %result
}

declare float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr nocapture, float) #0

define float @upgrade_amdgcn_flat_atomic_fmin_f32_p0_f32(ptr %ptr, float %data) {
; CHECK: %{{.+}} = atomicrmw fmin ptr %ptr, float %data syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
%result = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %ptr, float %data)
ret float %result
}

declare float @llvm.amdgcn.global.atomic.fmin.f32.p1.f32(ptr addrspace(1) nocapture, float) #0

define float @upgrade_amdgcn_global_atomic_fmin_f32_p1_f32(ptr addrspace(1) %ptr, float %data) {
; CHECK: %{{.+}} = atomicrmw fmin ptr addrspace(1) %ptr, float %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
%result = call float @llvm.amdgcn.global.atomic.fmin.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
ret float %result
}

declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr nocapture, double) #0

define double @upgrade_amdgcn_flat_atomic_fmin_f64_p0_f64(ptr %ptr, double %data) {
; CHECK: %{{.+}} = atomicrmw fmin ptr %ptr, double %data syncscope("agent") seq_cst, align 8, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
%result = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data)
ret double %result
}

declare double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) nocapture, double) #0

define double @upgrade_amdgcn_global_atomic_fmin_f64_p1_f64(ptr addrspace(1) %ptr, double %data) {
; CHECK: %{{.+}} = atomicrmw fmin ptr addrspace(1) %ptr, double %data syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
%result = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
ret double %result
}

declare float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr nocapture, float) #0

define float @upgrade_amdgcn_flat_atomic_fmax_f32_p0_f32(ptr %ptr, float %data) {
; CHECK: %{{.+}} = atomicrmw fmax ptr %ptr, float %data syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
%result = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %ptr, float %data)
ret float %result
}

declare float @llvm.amdgcn.global.atomic.fmax.f32.p1.f32(ptr addrspace(1) nocapture, float) #0

define float @upgrade_amdgcn_global_atomic_fmax_f32_p1_f32(ptr addrspace(1) %ptr, float %data) {
; CHECK: %{{.+}} = atomicrmw fmax ptr addrspace(1) %ptr, float %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
%result = call float @llvm.amdgcn.global.atomic.fmax.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
ret float %result
}

declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr nocapture, double) #0

define double @upgrade_amdgcn_flat_atomic_fmax_f64_p0_f64(ptr %ptr, double %data) {
; CHECK: %{{.+}} = atomicrmw fmax ptr %ptr, double %data syncscope("agent") seq_cst, align 8, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
%result = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data)
ret double %result
}

declare double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) nocapture, double) #0

define double @upgrade_amdgcn_global_atomic_fmax_f64_p1_f64(ptr addrspace(1) %ptr, double %data) {
; CHECK: %{{.+}} = atomicrmw fmax ptr addrspace(1) %ptr, double %data syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
%result = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
ret double %result
}

attributes #0 = { argmemonly nounwind willreturn }

; CHECK: !0 = !{i32 5, i32 6}
Expand Down
Loading
Loading