@@ -84,8 +84,6 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr add
84
84
; GFX11-NEXT: buffer_gl0_inv
85
85
; GFX11-NEXT: v_mov_b32_e32 v1, 0
86
86
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
87
- ; GFX11-NEXT: s_nop 0
88
- ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
89
87
; GFX11-NEXT: s_endpgm
90
88
%result = atomicrmw uinc_wrap ptr addrspace (3 ) %ptr , i32 42 syncscope("agent" ) seq_cst , align 4
91
89
store i32 %result , ptr addrspace (1 ) %out , align 4
@@ -163,8 +161,6 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(ptr addrspace(1) %out,
163
161
; GFX11-NEXT: buffer_gl0_inv
164
162
; GFX11-NEXT: v_mov_b32_e32 v1, 0
165
163
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
166
- ; GFX11-NEXT: s_nop 0
167
- ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
168
164
; GFX11-NEXT: s_endpgm
169
165
%gep = getelementptr i32 , ptr addrspace (3 ) %ptr , i32 4
170
166
%result = atomicrmw uinc_wrap ptr addrspace (3 ) %gep , i32 42 syncscope("agent" ) seq_cst , align 4
@@ -353,8 +349,6 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr
353
349
; GFX11-NEXT: buffer_gl1_inv
354
350
; GFX11-NEXT: buffer_gl0_inv
355
351
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
356
- ; GFX11-NEXT: s_nop 0
357
- ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
358
352
; GFX11-NEXT: s_endpgm
359
353
%result = atomicrmw uinc_wrap ptr addrspace (1 ) %ptr , i32 42 syncscope("agent" ) seq_cst , align 4
360
354
store i32 %result , ptr addrspace (1 ) %out , align 4
@@ -431,8 +425,6 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(ptr addrspace(1) %ou
431
425
; GFX11-NEXT: buffer_gl1_inv
432
426
; GFX11-NEXT: buffer_gl0_inv
433
427
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
434
- ; GFX11-NEXT: s_nop 0
435
- ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
436
428
; GFX11-NEXT: s_endpgm
437
429
%gep = getelementptr i32 , ptr addrspace (1 ) %ptr , i32 4
438
430
%result = atomicrmw uinc_wrap ptr addrspace (1 ) %gep , i32 42 syncscope("agent" ) seq_cst , align 4
@@ -510,8 +502,6 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_sistem(ptr addrspace
510
502
; GFX11-NEXT: buffer_gl1_inv
511
503
; GFX11-NEXT: buffer_gl0_inv
512
504
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
513
- ; GFX11-NEXT: s_nop 0
514
- ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
515
505
; GFX11-NEXT: s_endpgm
516
506
%gep = getelementptr i32 , ptr addrspace (1 ) %ptr , i32 4
517
507
%result = atomicrmw uinc_wrap ptr addrspace (1 ) %gep , i32 42 seq_cst , align 4
@@ -797,8 +787,6 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(ptr addrspace
797
787
; GFX11-NEXT: buffer_gl1_inv
798
788
; GFX11-NEXT: buffer_gl0_inv
799
789
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
800
- ; GFX11-NEXT: s_nop 0
801
- ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
802
790
; GFX11-NEXT: s_endpgm
803
791
%id = call i32 @llvm.amdgcn.workitem.id.x ()
804
792
%gep.tid = getelementptr i32 , ptr addrspace (1 ) %ptr , i32 %id
@@ -967,8 +955,6 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(ptr addrspace(1) %out,
967
955
; GFX11-NEXT: s_clause 0x1
968
956
; GFX11-NEXT: global_store_b32 v2, v0, s[2:3]
969
957
; GFX11-NEXT: global_store_b32 v2, v1, s[0:1]
970
- ; GFX11-NEXT: s_nop 0
971
- ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
972
958
; GFX11-NEXT: s_endpgm
973
959
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x () #2
974
960
%idx.0 = add nsw i32 %tid.x , 2
@@ -1055,8 +1041,6 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr add
1055
1041
; GFX11-NEXT: buffer_gl0_inv
1056
1042
; GFX11-NEXT: v_mov_b32_e32 v2, 0
1057
1043
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1058
- ; GFX11-NEXT: s_nop 0
1059
- ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1060
1044
; GFX11-NEXT: s_endpgm
1061
1045
%result = atomicrmw uinc_wrap ptr addrspace (3 ) %ptr , i64 42 syncscope("agent" ) seq_cst , align 8
1062
1046
store i64 %result , ptr addrspace (1 ) %out , align 4
@@ -1139,8 +1123,6 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(ptr addrspace(1) %out,
1139
1123
; GFX11-NEXT: buffer_gl0_inv
1140
1124
; GFX11-NEXT: v_mov_b32_e32 v2, 0
1141
1125
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1142
- ; GFX11-NEXT: s_nop 0
1143
- ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1144
1126
; GFX11-NEXT: s_endpgm
1145
1127
%gep = getelementptr i64 , ptr addrspace (3 ) %ptr , i32 4
1146
1128
%result = atomicrmw uinc_wrap ptr addrspace (3 ) %gep , i64 42 syncscope("agent" ) seq_cst , align 8
@@ -1344,8 +1326,6 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr
1344
1326
; GFX11-NEXT: buffer_gl1_inv
1345
1327
; GFX11-NEXT: buffer_gl0_inv
1346
1328
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1347
- ; GFX11-NEXT: s_nop 0
1348
- ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1349
1329
; GFX11-NEXT: s_endpgm
1350
1330
%result = atomicrmw uinc_wrap ptr addrspace (1 ) %ptr , i64 42 syncscope("agent" ) seq_cst , align 8
1351
1331
store i64 %result , ptr addrspace (1 ) %out , align 4
@@ -1427,8 +1407,6 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(ptr addrspace(1) %ou
1427
1407
; GFX11-NEXT: buffer_gl1_inv
1428
1408
; GFX11-NEXT: buffer_gl0_inv
1429
1409
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1430
- ; GFX11-NEXT: s_nop 0
1431
- ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1432
1410
; GFX11-NEXT: s_endpgm
1433
1411
%gep = getelementptr i64 , ptr addrspace (1 ) %ptr , i32 4
1434
1412
%result = atomicrmw uinc_wrap ptr addrspace (1 ) %gep , i64 42 syncscope("agent" ) seq_cst , align 8
@@ -1511,8 +1489,6 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_system(ptr addrspace
1511
1489
; GFX11-NEXT: buffer_gl1_inv
1512
1490
; GFX11-NEXT: buffer_gl0_inv
1513
1491
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1514
- ; GFX11-NEXT: s_nop 0
1515
- ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1516
1492
; GFX11-NEXT: s_endpgm
1517
1493
%gep = getelementptr i64 , ptr addrspace (1 ) %ptr , i32 4
1518
1494
%result = atomicrmw uinc_wrap ptr addrspace (1 ) %gep , i64 42 seq_cst , align 8
@@ -1818,8 +1794,6 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(ptr addrspace
1818
1794
; GFX11-NEXT: buffer_gl1_inv
1819
1795
; GFX11-NEXT: buffer_gl0_inv
1820
1796
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1821
- ; GFX11-NEXT: s_nop 0
1822
- ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1823
1797
; GFX11-NEXT: s_endpgm
1824
1798
%id = call i32 @llvm.amdgcn.workitem.id.x ()
1825
1799
%gep.tid = getelementptr i64 , ptr addrspace (1 ) %ptr , i32 %id
@@ -2680,8 +2654,6 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(ptr addrspace(1) %out,
2680
2654
; GFX11-NEXT: s_clause 0x1
2681
2655
; GFX11-NEXT: global_store_b32 v3, v2, s[2:3]
2682
2656
; GFX11-NEXT: global_store_b64 v3, v[0:1], s[0:1]
2683
- ; GFX11-NEXT: s_nop 0
2684
- ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2685
2657
; GFX11-NEXT: s_endpgm
2686
2658
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x () #2
2687
2659
%idx.0 = add nsw i32 %tid.x , 2
@@ -3541,8 +3513,6 @@ define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(ptr addrspace(1) %out0,
3541
3513
; GFX11-NEXT: s_clause 0x1
3542
3514
; GFX11-NEXT: global_store_b32 v1, v2, s[0:1]
3543
3515
; GFX11-NEXT: global_store_b32 v1, v0, s[2:3]
3544
- ; GFX11-NEXT: s_nop 0
3545
- ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
3546
3516
; GFX11-NEXT: s_endpgm
3547
3517
%result0 = atomicrmw uinc_wrap ptr addrspace (3 ) %ptr , i32 42 syncscope("agent" ) seq_cst , align 4
3548
3518
%result1 = atomicrmw uinc_wrap ptr addrspace (3 ) %ptr , i32 42 syncscope("agent" ) seq_cst , align 4
0 commit comments