Skip to content

Commit ee08d9c

Browse files
authored
AMDGPU: Remove global/flat atomic fadd intrinics (#97051)
These have been replaced with atomicrmw.
1 parent 3082a38 commit ee08d9c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+688
-2784
lines changed

llvm/docs/ReleaseNotes.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@ Changes to the AArch64 Backend
7575
Changes to the AMDGPU Backend
7676
-----------------------------
7777

78+
* Removed ``llvm.amdgcn.flat.atomic.fadd`` and
79+
``llvm.amdgcn.global.atomic.fadd`` intrinsics. Users should use the
80+
:ref:`atomicrmw <i_atomicrmw>` instruction with `fadd` and
81+
addrspace(0) or addrspace(1) instead.
82+
7883
Changes to the ARM Backend
7984
--------------------------
8085

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2909,8 +2909,6 @@ def int_amdgcn_dot4_f32_bf8_bf8 : AMDGPU8bitFloatDot4Intrinsic;
29092909
// gfx908 intrinsics
29102910
// ===----------------------------------------------------------------------===//
29112911

2912-
def int_amdgcn_global_atomic_fadd : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
2913-
29142912
// llvm.amdgcn.mfma.*.* vdst, srcA, srcB, srcC, cbsz, abid, blgp
29152913
class AMDGPUMfmaIntrinsic<LLVMType DestTy, LLVMType SrcABTy> :
29162914
ClangBuiltin<!subst("int", "__builtin", NAME)>,
@@ -2949,7 +2947,6 @@ def int_amdgcn_mfma_f32_16x16x8bf16 : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v
29492947

29502948
def int_amdgcn_global_atomic_fmin : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
29512949
def int_amdgcn_global_atomic_fmax : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
2952-
def int_amdgcn_flat_atomic_fadd : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
29532950
def int_amdgcn_flat_atomic_fmin : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
29542951
def int_amdgcn_flat_atomic_fmax : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
29552952

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,8 +1035,8 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
10351035

10361036
if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") ||
10371037
Name.starts_with("ds.fmax") ||
1038-
Name.starts_with("global.atomic.fadd.v2bf16") ||
1039-
Name.starts_with("flat.atomic.fadd.v2bf16")) {
1038+
Name.starts_with("global.atomic.fadd") ||
1039+
Name.starts_with("flat.atomic.fadd")) {
10401040
// Replaced with atomicrmw fadd/fmin/fmax, so there's no new
10411041
// declaration.
10421042
NewFn = nullptr;

llvm/lib/Target/AMDGPU/AMDGPUInstructions.td

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -618,16 +618,11 @@ multiclass local_addr_space_atomic_op {
618618
}
619619
}
620620

621-
defm int_amdgcn_flat_atomic_fadd : noret_op;
622-
defm int_amdgcn_flat_atomic_fadd : flat_addr_space_atomic_op;
623621
defm int_amdgcn_flat_atomic_fmin : noret_op;
624622
defm int_amdgcn_flat_atomic_fmax : noret_op;
625-
defm int_amdgcn_global_atomic_fadd : global_addr_space_atomic_op;
626-
defm int_amdgcn_flat_atomic_fadd : global_addr_space_atomic_op;
627623
defm int_amdgcn_global_atomic_fmin : noret_op;
628624
defm int_amdgcn_global_atomic_fmax : noret_op;
629625
defm int_amdgcn_global_atomic_csub : noret_op;
630-
defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op;
631626
defm int_amdgcn_global_atomic_ordered_add_b64 : noret_op;
632627
defm int_amdgcn_flat_atomic_fmin_num : noret_op;
633628
defm int_amdgcn_flat_atomic_fmax_num : noret_op;

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4896,13 +4896,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
48964896
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
48974897
break;
48984898
}
4899-
case Intrinsic::amdgcn_global_atomic_fadd:
49004899
case Intrinsic::amdgcn_global_atomic_csub:
49014900
case Intrinsic::amdgcn_global_atomic_fmin:
49024901
case Intrinsic::amdgcn_global_atomic_fmax:
49034902
case Intrinsic::amdgcn_global_atomic_fmin_num:
49044903
case Intrinsic::amdgcn_global_atomic_fmax_num:
4905-
case Intrinsic::amdgcn_flat_atomic_fadd:
49064904
case Intrinsic::amdgcn_flat_atomic_fmin:
49074905
case Intrinsic::amdgcn_flat_atomic_fmax:
49084906
case Intrinsic::amdgcn_flat_atomic_fmin_num:

llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,13 +239,11 @@ def : SourceOfDivergence<int_r600_read_tidig_y>;
239239
def : SourceOfDivergence<int_r600_read_tidig_z>;
240240
def : SourceOfDivergence<int_amdgcn_atomic_cond_sub_u32>;
241241
def : SourceOfDivergence<int_amdgcn_global_atomic_csub>;
242-
def : SourceOfDivergence<int_amdgcn_global_atomic_fadd>;
243242
def : SourceOfDivergence<int_amdgcn_global_atomic_fmin>;
244243
def : SourceOfDivergence<int_amdgcn_global_atomic_fmax>;
245244
def : SourceOfDivergence<int_amdgcn_global_atomic_fmin_num>;
246245
def : SourceOfDivergence<int_amdgcn_global_atomic_fmax_num>;
247246
def : SourceOfDivergence<int_amdgcn_global_atomic_ordered_add_b64>;
248-
def : SourceOfDivergence<int_amdgcn_flat_atomic_fadd>;
249247
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmin>;
250248
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmax>;
251249
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmin_num>;

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,7 +1045,6 @@ bool GCNTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
10451045
switch (IID) {
10461046
case Intrinsic::amdgcn_is_shared:
10471047
case Intrinsic::amdgcn_is_private:
1048-
case Intrinsic::amdgcn_flat_atomic_fadd:
10491048
case Intrinsic::amdgcn_flat_atomic_fmax:
10501049
case Intrinsic::amdgcn_flat_atomic_fmin:
10511050
case Intrinsic::amdgcn_flat_atomic_fmax_num:
@@ -1107,7 +1106,6 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
11071106
return B.CreateIntrinsic(Intrinsic::ptrmask, {NewV->getType(), MaskTy},
11081107
{NewV, MaskOp});
11091108
}
1110-
case Intrinsic::amdgcn_flat_atomic_fadd:
11111109
case Intrinsic::amdgcn_flat_atomic_fmax:
11121110
case Intrinsic::amdgcn_flat_atomic_fmin:
11131111
case Intrinsic::amdgcn_flat_atomic_fmax_num:

llvm/lib/Target/AMDGPU/DSInstructions.td

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1135,11 +1135,7 @@ class DSAtomicRetPatIntrinsic<DS_Pseudo inst, ValueType vt, PatFrag frag,
11351135
(vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value)),
11361136
(inst $ptr, getVregSrcForVT<vt>.ret:$value, Offset:$offset, (i1 gds))> {
11371137
}
1138-
1139-
def : DSAtomicRetPatIntrinsic<DS_ADD_RTN_F64, f64, int_amdgcn_flat_atomic_fadd_local_addrspace>;
1140-
let AddedComplexity = 1 in
1141-
def : DSAtomicRetPatIntrinsic<DS_ADD_F64, f64, int_amdgcn_flat_atomic_fadd_noret_local_addrspace>;
1142-
}
1138+
} // End SubtargetPredicate = HasLdsAtomicAddF64
11431139

11441140
let SubtargetPredicate = HasAtomicDsPkAdd16Insts in {
11451141
defm : DSAtomicRetNoRetPat_mc<DS_PK_ADD_RTN_F16, DS_PK_ADD_F16, v2f16, "atomic_load_fadd">;

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1625,25 +1625,17 @@ let OtherPredicates = [isGFX12Only] in {
16251625

16261626
let OtherPredicates = [HasAtomicFaddNoRtnInsts] in {
16271627
defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>;
1628-
defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f32>;
1629-
defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>;
16301628
}
16311629

16321630
let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in {
16331631
defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_PK_ADD_F16", "atomic_load_fadd_global", v2f16>;
1634-
defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>;
1635-
defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>;
16361632
}
16371633

16381634
let OtherPredicates = [HasAtomicFaddRtnInsts] in {
16391635
defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>;
1640-
defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f32>;
1641-
defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>;
16421636
}
16431637

16441638
let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in {
1645-
defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>;
1646-
defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>;
16471639
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_F16", "atomic_load_fadd_global", v2f16>;
16481640
}
16491641

@@ -1661,19 +1653,14 @@ defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax",
16611653

16621654
let OtherPredicates = [HasFlatBufferGlobalAtomicFaddF64Inst] in {
16631655
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>;
1664-
defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f64>;
1665-
defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_global_atomic_fadd", "global_addrspace", f64>;
16661656
defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>;
1667-
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", f64>;
16681657
}
16691658

16701659
let OtherPredicates = [HasFlatAtomicFaddF32Inst] in {
16711660
defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>;
1672-
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", f32>;
16731661
}
16741662

16751663
let OtherPredicates = [HasAtomicFlatPkAdd16Insts] in {
1676-
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", v2f16>;
16771664
defm : FlatAtomicPat <"FLAT_ATOMIC_PK_ADD_F16", "atomic_load_fadd_flat", v2f16>;
16781665
defm : FlatAtomicPat <"FLAT_ATOMIC_PK_ADD_BF16", "atomic_load_fadd_flat", v2bf16>;
16791666
}

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1351,13 +1351,11 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
13511351
MachineMemOperand::MODereferenceable;
13521352
return true;
13531353
}
1354-
case Intrinsic::amdgcn_global_atomic_fadd:
13551354
case Intrinsic::amdgcn_global_atomic_fmin:
13561355
case Intrinsic::amdgcn_global_atomic_fmax:
13571356
case Intrinsic::amdgcn_global_atomic_fmin_num:
13581357
case Intrinsic::amdgcn_global_atomic_fmax_num:
13591358
case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
1360-
case Intrinsic::amdgcn_flat_atomic_fadd:
13611359
case Intrinsic::amdgcn_flat_atomic_fmin:
13621360
case Intrinsic::amdgcn_flat_atomic_fmax:
13631361
case Intrinsic::amdgcn_flat_atomic_fmin_num:
@@ -1464,13 +1462,11 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
14641462
case Intrinsic::amdgcn_ds_consume:
14651463
case Intrinsic::amdgcn_ds_ordered_add:
14661464
case Intrinsic::amdgcn_ds_ordered_swap:
1467-
case Intrinsic::amdgcn_flat_atomic_fadd:
14681465
case Intrinsic::amdgcn_flat_atomic_fmax:
14691466
case Intrinsic::amdgcn_flat_atomic_fmax_num:
14701467
case Intrinsic::amdgcn_flat_atomic_fmin:
14711468
case Intrinsic::amdgcn_flat_atomic_fmin_num:
14721469
case Intrinsic::amdgcn_global_atomic_csub:
1473-
case Intrinsic::amdgcn_global_atomic_fadd:
14741470
case Intrinsic::amdgcn_global_atomic_fmax:
14751471
case Intrinsic::amdgcn_global_atomic_fmax_num:
14761472
case Intrinsic::amdgcn_global_atomic_fmin:

0 commit comments

Comments
 (0)