Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 17 additions & 8 deletions llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2606,15 +2606,24 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {

// Insert DEALLOC_VGPR messages before previously identified S_ENDPGM
// instructions.
for (MachineInstr *MI : ReleaseVGPRInsts) {
if (ST->requiresNopBeforeDeallocVGPRs()) {
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::S_NOP))
.addImm(0);
// Skip deallocation if kernel is waveslot limited vs VGPR limited. A short
// waveslot limited kernel runs slower with the deallocation.
if (!ReleaseVGPRInsts.empty() &&
(MF.getFrameInfo().hasCalls() ||
ST->getOccupancyWithNumVGPRs(
TRI->getNumUsedPhysRegs(*MRI, AMDGPU::VGPR_32RegClass)) <
AMDGPU::IsaInfo::getMaxWavesPerEU(ST))) {
for (MachineInstr *MI : ReleaseVGPRInsts) {
if (ST->requiresNopBeforeDeallocVGPRs()) {
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII->get(AMDGPU::S_NOP))
.addImm(0);
}
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII->get(AMDGPU::S_SENDMSG))
.addImm(AMDGPU::SendMsg::ID_DEALLOC_VGPRS_GFX11Plus);
Modified = true;
}
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII->get(AMDGPU::S_SENDMSG))
.addImm(AMDGPU::SendMsg::ID_DEALLOC_VGPRS_GFX11Plus);
Modified = true;
}
ReleaseVGPRInsts.clear();
PreheadersToFlush.clear();
Expand Down
12 changes: 0 additions & 12 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/addsubu64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ define amdgpu_kernel void @s_add_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: s_add_u64:
Expand All @@ -30,8 +28,6 @@ define amdgpu_kernel void @s_add_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[4:5]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX12-NEXT: s_endpgm
entry:
%add = add i64 %a, %b
Expand All @@ -45,8 +41,6 @@ define amdgpu_ps void @v_add_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
; GCN-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
; GCN-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v5, vcc_lo
; GCN-NEXT: global_store_b64 v[0:1], v[2:3], off
; GCN-NEXT: s_nop 0
; GCN-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GCN-NEXT: s_endpgm
entry:
%add = add i64 %a, %b
Expand All @@ -67,8 +61,6 @@ define amdgpu_kernel void @s_sub_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: s_sub_u64:
Expand All @@ -82,8 +74,6 @@ define amdgpu_kernel void @s_sub_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[4:5]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX12-NEXT: s_endpgm
entry:
%sub = sub i64 %a, %b
Expand All @@ -97,8 +87,6 @@ define amdgpu_ps void @v_sub_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
; GCN-NEXT: v_sub_co_u32 v2, vcc_lo, v2, v4
; GCN-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v5, vcc_lo
; GCN-NEXT: global_store_b64 v[0:1], v[2:3], off
; GCN-NEXT: s_nop 0
; GCN-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GCN-NEXT: s_endpgm
entry:
%sub = sub i64 %a, %b
Expand Down
28 changes: 0 additions & 28 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,6 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr add
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i32 42 syncscope("agent") seq_cst, align 4
store i32 %result, ptr addrspace(1) %out, align 4
Expand Down Expand Up @@ -163,8 +161,6 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(ptr addrspace(1) %out,
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
%result = atomicrmw udec_wrap ptr addrspace(3) %gep, i32 42 syncscope("agent") seq_cst, align 4
Expand Down Expand Up @@ -353,8 +349,6 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 42 syncscope("agent") seq_cst, align 4
store i32 %result, ptr addrspace(1) %out, align 4
Expand Down Expand Up @@ -431,8 +425,6 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(ptr addrspace(1) %ou
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4
Expand Down Expand Up @@ -510,8 +502,6 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_system(ptr addrspace
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 seq_cst, align 4
Expand Down Expand Up @@ -797,8 +787,6 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(ptr addrspace
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id
Expand Down Expand Up @@ -2302,8 +2290,6 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0(ptr addrspace(1) %out, ptr
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v2, v0, s[2:3]
; GFX11-NEXT: global_store_b32 v2, v1, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #2
%idx.0 = add nsw i32 %tid.x, 2
Expand Down Expand Up @@ -2390,8 +2376,6 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr add
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i64 42 syncscope("agent") seq_cst, align 8
store i64 %result, ptr addrspace(1) %out, align 4
Expand Down Expand Up @@ -2474,8 +2458,6 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(ptr addrspace(1) %out,
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
%result = atomicrmw udec_wrap ptr addrspace(3) %gep, i64 42 syncscope("agent") seq_cst, align 8
Expand Down Expand Up @@ -2679,8 +2661,6 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 42 syncscope("agent") seq_cst, align 8
store i64 %result, ptr addrspace(1) %out, align 4
Expand Down Expand Up @@ -2762,8 +2742,6 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(ptr addrspace(1) %ou
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8
Expand Down Expand Up @@ -2846,8 +2824,6 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_system(ptr addrspace
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 seq_cst, align 8
Expand Down Expand Up @@ -3153,8 +3129,6 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(ptr addrspace
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id
Expand Down Expand Up @@ -3334,8 +3308,6 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(ptr addrspace(1) %out,
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v3, v2, s[2:3]
; GFX11-NEXT: global_store_b64 v3, v[0:1], s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #2
%idx.0 = add nsw i32 %tid.x, 2
Expand Down
30 changes: 0 additions & 30 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,6 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr add
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i32 42 syncscope("agent") seq_cst, align 4
store i32 %result, ptr addrspace(1) %out, align 4
Expand Down Expand Up @@ -163,8 +161,6 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(ptr addrspace(1) %out,
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
%result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i32 42 syncscope("agent") seq_cst, align 4
Expand Down Expand Up @@ -353,8 +349,6 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i32 42 syncscope("agent") seq_cst, align 4
store i32 %result, ptr addrspace(1) %out, align 4
Expand Down Expand Up @@ -431,8 +425,6 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(ptr addrspace(1) %ou
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
%result = atomicrmw uinc_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4
Expand Down Expand Up @@ -510,8 +502,6 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_sistem(ptr addrspace
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
%result = atomicrmw uinc_wrap ptr addrspace(1) %gep, i32 42 seq_cst, align 4
Expand Down Expand Up @@ -797,8 +787,6 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(ptr addrspace
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id
Expand Down Expand Up @@ -967,8 +955,6 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(ptr addrspace(1) %out,
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v2, v0, s[2:3]
; GFX11-NEXT: global_store_b32 v2, v1, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #2
%idx.0 = add nsw i32 %tid.x, 2
Expand Down Expand Up @@ -1055,8 +1041,6 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr add
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i64 42 syncscope("agent") seq_cst, align 8
store i64 %result, ptr addrspace(1) %out, align 4
Expand Down Expand Up @@ -1139,8 +1123,6 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(ptr addrspace(1) %out,
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
%result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i64 42 syncscope("agent") seq_cst, align 8
Expand Down Expand Up @@ -1344,8 +1326,6 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i64 42 syncscope("agent") seq_cst, align 8
store i64 %result, ptr addrspace(1) %out, align 4
Expand Down Expand Up @@ -1427,8 +1407,6 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(ptr addrspace(1) %ou
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
%result = atomicrmw uinc_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8
Expand Down Expand Up @@ -1511,8 +1489,6 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_system(ptr addrspace
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
%result = atomicrmw uinc_wrap ptr addrspace(1) %gep, i64 42 seq_cst, align 8
Expand Down Expand Up @@ -1818,8 +1794,6 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(ptr addrspace
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id
Expand Down Expand Up @@ -2680,8 +2654,6 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(ptr addrspace(1) %out,
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v3, v2, s[2:3]
; GFX11-NEXT: global_store_b64 v3, v[0:1], s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #2
%idx.0 = add nsw i32 %tid.x, 2
Expand Down Expand Up @@ -3541,8 +3513,6 @@ define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(ptr addrspace(1) %out0,
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v1, v2, s[0:1]
; GFX11-NEXT: global_store_b32 v1, v0, s[2:3]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result0 = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i32 42 syncscope("agent") seq_cst, align 4
%result1 = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i32 42 syncscope("agent") seq_cst, align 4
Expand Down
Loading
Loading