Skip to content

Commit c99da46

Browse files
[AMDGPU][GFX12] Add Atomic cond_sub_u32 (llvm#76224)
Co-authored-by: Vang Thao <[email protected]>
1 parent badf0ee commit c99da46

25 files changed

+860
-12
lines changed

llvm/docs/AMDGPUUsage.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1182,6 +1182,11 @@ The AMDGPU backend implements the following LLVM IR intrinsics.
11821182

11831183
The iglp_opt strategy implementations are subject to change.
11841184

1185+
llvm.amdgcn.atomic.cond.sub.u32 Provides direct access to flat_atomic_cond_sub_u32, global_atomic_cond_sub_u32
1186+
and ds_cond_sub_u32 based on address space on gfx12 targets. This
1187+
performs subtraction only if the memory value is greater than or
1188+
equal to the data value.
1189+
11851190
============================================== ==========================================================
11861191

11871192
.. TODO::

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1263,6 +1263,7 @@ def int_amdgcn_raw_buffer_atomic_or : AMDGPURawBufferAtomic;
12631263
def int_amdgcn_raw_buffer_atomic_xor : AMDGPURawBufferAtomic;
12641264
def int_amdgcn_raw_buffer_atomic_inc : AMDGPURawBufferAtomic;
12651265
def int_amdgcn_raw_buffer_atomic_dec : AMDGPURawBufferAtomic;
1266+
def int_amdgcn_raw_buffer_atomic_cond_sub_u32 : AMDGPURawBufferAtomic;
12661267
def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic<
12671268
[llvm_anyint_ty],
12681269
[LLVMMatchType<0>, // src(VGPR)
@@ -1299,6 +1300,7 @@ def int_amdgcn_raw_ptr_buffer_atomic_or : AMDGPURawPtrBufferAtomic;
12991300
def int_amdgcn_raw_ptr_buffer_atomic_xor : AMDGPURawPtrBufferAtomic;
13001301
def int_amdgcn_raw_ptr_buffer_atomic_inc : AMDGPURawPtrBufferAtomic;
13011302
def int_amdgcn_raw_ptr_buffer_atomic_dec : AMDGPURawPtrBufferAtomic;
1303+
def int_amdgcn_raw_ptr_buffer_atomic_cond_sub_u32 : AMDGPURawPtrBufferAtomic;
13021304
def int_amdgcn_raw_ptr_buffer_atomic_cmpswap : Intrinsic<
13031305
[llvm_anyint_ty],
13041306
[LLVMMatchType<0>, // src(VGPR)
@@ -1337,6 +1339,7 @@ def int_amdgcn_struct_buffer_atomic_or : AMDGPUStructBufferAtomic;
13371339
def int_amdgcn_struct_buffer_atomic_xor : AMDGPUStructBufferAtomic;
13381340
def int_amdgcn_struct_buffer_atomic_inc : AMDGPUStructBufferAtomic;
13391341
def int_amdgcn_struct_buffer_atomic_dec : AMDGPUStructBufferAtomic;
1342+
def int_amdgcn_struct_buffer_atomic_cond_sub_u32 : AMDGPUStructBufferAtomic;
13401343
def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic<
13411344
[llvm_anyint_ty],
13421345
[LLVMMatchType<0>, // src(VGPR)
@@ -1372,6 +1375,7 @@ def int_amdgcn_struct_ptr_buffer_atomic_or : AMDGPUStructPtrBufferAtomic;
13721375
def int_amdgcn_struct_ptr_buffer_atomic_xor : AMDGPUStructPtrBufferAtomic;
13731376
def int_amdgcn_struct_ptr_buffer_atomic_inc : AMDGPUStructPtrBufferAtomic;
13741377
def int_amdgcn_struct_ptr_buffer_atomic_dec : AMDGPUStructPtrBufferAtomic;
1378+
def int_amdgcn_struct_ptr_buffer_atomic_cond_sub_u32 : AMDGPUStructPtrBufferAtomic;
13751379
def int_amdgcn_struct_ptr_buffer_atomic_cmpswap : Intrinsic<
13761380
[llvm_anyint_ty],
13771381
[LLVMMatchType<0>, // src(VGPR)
@@ -2524,6 +2528,8 @@ def int_amdgcn_flat_atomic_fmax_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
25242528
def int_amdgcn_global_atomic_fmin_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
25252529
def int_amdgcn_global_atomic_fmax_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
25262530

2531+
def int_amdgcn_atomic_cond_sub_u32 : AMDGPUAtomicRtn<llvm_i32_ty>;
2532+
25272533
//===----------------------------------------------------------------------===//
25282534
// Deep learning intrinsics.
25292535
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_FADD, SIbuffer_atomic_fadd>;
264264
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_FMIN, SIbuffer_atomic_fmin>;
265265
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_FMAX, SIbuffer_atomic_fmax>;
266266
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_CMPSWAP, SIbuffer_atomic_cmpswap>;
267+
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_COND_SUB_U32, SIbuffer_atomic_cond_sub_u32>;
267268
def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD, SIsbuffer_load>;
268269
def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD_SBYTE, SIsbuffer_load_byte>;
269270
def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD_UBYTE, SIsbuffer_load_ubyte>;

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5480,6 +5480,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
54805480
NODE_NAME_CASE(BUFFER_ATOMIC_FADD)
54815481
NODE_NAME_CASE(BUFFER_ATOMIC_FMIN)
54825482
NODE_NAME_CASE(BUFFER_ATOMIC_FMAX)
5483+
NODE_NAME_CASE(BUFFER_ATOMIC_COND_SUB_U32)
54835484

54845485
case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
54855486
}

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,7 @@ enum NodeType : unsigned {
593593
BUFFER_ATOMIC_FADD,
594594
BUFFER_ATOMIC_FMIN,
595595
BUFFER_ATOMIC_FMAX,
596+
BUFFER_ATOMIC_COND_SUB_U32,
596597

597598
LAST_AMDGPU_ISD_NUMBER
598599
};

llvm/lib/Target/AMDGPU/AMDGPUInstructions.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,9 @@ defm int_amdgcn_flat_atomic_fmin_num : noret_op;
647647
defm int_amdgcn_flat_atomic_fmax_num : noret_op;
648648
defm int_amdgcn_global_atomic_fmin_num : noret_op;
649649
defm int_amdgcn_global_atomic_fmax_num : noret_op;
650+
defm int_amdgcn_atomic_cond_sub_u32 : local_addr_space_atomic_op;
651+
defm int_amdgcn_atomic_cond_sub_u32 : flat_addr_space_atomic_op;
652+
defm int_amdgcn_atomic_cond_sub_u32 : global_addr_space_atomic_op;
650653

651654
multiclass noret_binary_atomic_op<SDNode atomic_op, bit IsInt = 1> {
652655
let HasNoUse = true in

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5893,6 +5893,9 @@ static unsigned getBufferAtomicPseudo(Intrinsic::ID IntrID) {
58935893
case Intrinsic::amdgcn_struct_buffer_atomic_fmax:
58945894
case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fmax:
58955895
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX;
5896+
case Intrinsic::amdgcn_raw_buffer_atomic_cond_sub_u32:
5897+
case Intrinsic::amdgcn_struct_buffer_atomic_cond_sub_u32:
5898+
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_COND_SUB_U32;
58965899
default:
58975900
llvm_unreachable("unhandled atomic opcode");
58985901
}

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4856,6 +4856,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
48564856
case Intrinsic::amdgcn_flat_atomic_fmax_num:
48574857
case Intrinsic::amdgcn_global_atomic_fadd_v2bf16:
48584858
case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16:
4859+
case Intrinsic::amdgcn_atomic_cond_sub_u32:
48594860
case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
48604861
return getDefaultMappingAllVGPR(MI);
48614862
case Intrinsic::amdgcn_ds_ordered_add:

llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ def : SourceOfDivergence<int_amdgcn_mbcnt_lo>;
237237
def : SourceOfDivergence<int_r600_read_tidig_x>;
238238
def : SourceOfDivergence<int_r600_read_tidig_y>;
239239
def : SourceOfDivergence<int_r600_read_tidig_z>;
240+
def : SourceOfDivergence<int_amdgcn_atomic_cond_sub_u32>;
240241
def : SourceOfDivergence<int_amdgcn_global_atomic_csub>;
241242
def : SourceOfDivergence<int_amdgcn_global_atomic_fadd>;
242243
def : SourceOfDivergence<int_amdgcn_global_atomic_fmin>;
@@ -282,6 +283,7 @@ def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_fadd>;
282283
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_fmin>;
283284
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_fmax>;
284285
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_cmpswap>;
286+
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_cond_sub_u32>;
285287
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_swap>;
286288
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_add>;
287289
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_sub>;
@@ -298,6 +300,7 @@ def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_fadd>;
298300
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_fmin>;
299301
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_fmax>;
300302
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_cmpswap>;
303+
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_cond_sub_u32>;
301304
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_swap>;
302305
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_add>;
303306
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_sub>;
@@ -314,6 +317,7 @@ def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_fadd>;
314317
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_fmin>;
315318
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_fmax>;
316319
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_cmpswap>;
320+
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_cond_sub_u32>;
317321
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_swap>;
318322
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_add>;
319323
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_sub>;
@@ -330,6 +334,7 @@ def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_fadd>;
330334
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_fmin>;
331335
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_fmax>;
332336
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_cmpswap>;
337+
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_cond_sub_u32>;
333338
def : SourceOfDivergence<int_amdgcn_buffer_atomic_csub>;
334339
def : SourceOfDivergence<int_amdgcn_ps_live>;
335340
def : SourceOfDivergence<int_amdgcn_live_mask>;

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1241,6 +1241,12 @@ defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_RTN <
12411241
"buffer_atomic_pk_add_f16", VGPR_32, v2f16, null_frag
12421242
>;
12431243

1244+
let SubtargetPredicate = isGFX12Plus in {
1245+
defm BUFFER_ATOMIC_COND_SUB_U32 : MUBUF_Pseudo_Atomics <
1246+
"buffer_atomic_cond_sub_u32", VGPR_32, i32
1247+
>;
1248+
}
1249+
12441250
//===----------------------------------------------------------------------===//
12451251
// MTBUF Instructions
12461252
//===----------------------------------------------------------------------===//
@@ -1704,6 +1710,13 @@ defm : SIBufferAtomicPat<"SIbuffer_atomic_dec", i64, "BUFFER_ATOMIC_DEC_X2">;
17041710
let SubtargetPredicate = HasAtomicCSubNoRtnInsts in
17051711
defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["noret"]>;
17061712

1713+
let SubtargetPredicate = isGFX12Plus in {
1714+
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_cond_sub_u32", i32, "BUFFER_ATOMIC_COND_SUB_U32_VBUFFER", ["ret"]>;
1715+
1716+
let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
1717+
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_cond_sub_u32", i32, "BUFFER_ATOMIC_COND_SUB_U32_VBUFFER", ["noret"]>;
1718+
}
1719+
17071720
let SubtargetPredicate = isGFX6GFX7GFX10Plus in {
17081721
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f32, "BUFFER_ATOMIC_FMIN">;
17091722
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">;
@@ -2607,6 +2620,7 @@ defm BUFFER_ATOMIC_AND_X2 : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x049,
26072620
defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x034, "buffer_atomic_cmpswap_b32">;
26082621
defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x042, "buffer_atomic_cmpswap_b64">;
26092622
defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomic_gfx11_Renamed<0x050, "buffer_atomic_cmpswap_f32">;
2623+
defm BUFFER_ATOMIC_COND_SUB_U32 : MUBUF_Real_Atomic_gfx12<0x050>;
26102624
defm BUFFER_ATOMIC_CSUB : MUBUF_Real_Atomic_gfx11_gfx12_Renamed_gfx12_Renamed<0x037, "buffer_atomic_sub_clamp_u32", "buffer_atomic_csub_u32">;
26112625
def : Mnem_gfx11_gfx12<"buffer_atomic_csub", "buffer_atomic_csub_u32">;
26122626
defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomic_gfx11_gfx12_Renamed<0x040, "buffer_atomic_dec_u32">;

0 commit comments

Comments
 (0)