@@ -14,11 +14,10 @@ define void @func1() {
1414; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
1515; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
1616; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
17- ; GFX12-SDAG-NEXT: s_mov_b32 m0, 0x70003
18- ; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
19- ; GFX12-SDAG-NEXT: s_barrier_signal m0
2017; GFX12-SDAG-NEXT: s_mov_b32 m0, 3
2118; GFX12-SDAG-NEXT: s_barrier_join m0
19+ ; GFX12-SDAG-NEXT: s_mov_b32 m0, 0x70003
20+ ; GFX12-SDAG-NEXT: s_barrier_signal m0
2221; GFX12-SDAG-NEXT: s_barrier_wait 1
2322; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
2423;
@@ -30,13 +29,12 @@ define void @func1() {
3029; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
3130; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
3231; GFX12-GISEL-NEXT: s_mov_b32 m0, 0x70003
33- ; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
34- ; GFX12-GISEL-NEXT: s_barrier_signal m0
3532; GFX12-GISEL-NEXT: s_barrier_join 3
33+ ; GFX12-GISEL-NEXT: s_barrier_signal m0
3634; GFX12-GISEL-NEXT: s_barrier_wait 1
3735; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
38- call void @llvm.amdgcn.s.barrier.signal.var (ptr addrspace (3 ) @bar3 , i32 7 )
3936 call void @llvm.amdgcn.s.barrier.join (ptr addrspace (3 ) @bar3 )
37+ call void @llvm.amdgcn.s.barrier.signal.var (ptr addrspace (3 ) @bar3 , i32 7 )
4038 call void @llvm.amdgcn.s.barrier.wait (i16 1 )
4139 ret void
4240}
@@ -49,11 +47,10 @@ define void @func2() {
4947; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
5048; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
5149; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
52- ; GFX12-SDAG-NEXT: s_mov_b32 m0, 0x70001
53- ; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
54- ; GFX12-SDAG-NEXT: s_barrier_signal m0
5550; GFX12-SDAG-NEXT: s_mov_b32 m0, 1
5651; GFX12-SDAG-NEXT: s_barrier_join m0
52+ ; GFX12-SDAG-NEXT: s_mov_b32 m0, 0x70001
53+ ; GFX12-SDAG-NEXT: s_barrier_signal m0
5754; GFX12-SDAG-NEXT: s_barrier_wait 1
5855; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
5956;
@@ -65,13 +62,12 @@ define void @func2() {
6562; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
6663; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
6764; GFX12-GISEL-NEXT: s_mov_b32 m0, 0x70001
68- ; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
69- ; GFX12-GISEL-NEXT: s_barrier_signal m0
7065; GFX12-GISEL-NEXT: s_barrier_join 1
66+ ; GFX12-GISEL-NEXT: s_barrier_signal m0
7167; GFX12-GISEL-NEXT: s_barrier_wait 1
7268; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
73- call void @llvm.amdgcn.s.barrier.signal.var (ptr addrspace (3 ) @bar2 , i32 7 )
7469 call void @llvm.amdgcn.s.barrier.join (ptr addrspace (3 ) @bar2 )
70+ call void @llvm.amdgcn.s.barrier.signal.var (ptr addrspace (3 ) @bar2 , i32 7 )
7571 call void @llvm.amdgcn.s.barrier.wait (i16 1 )
7672 ret void
7773}
@@ -102,9 +98,9 @@ define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in)
10298; GFX12-SDAG-NEXT: s_barrier_signal m0
10399; GFX12-SDAG-NEXT: s_mov_b32 m0, s2
104100; GFX12-SDAG-NEXT: s_barrier_signal -1
105- ; GFX12-SDAG-NEXT: s_barrier_signal_isfirst -1
106101; GFX12-SDAG-NEXT: s_barrier_join m0
107102; GFX12-SDAG-NEXT: s_mov_b32 m0, 2
103+ ; GFX12-SDAG-NEXT: s_barrier_signal_isfirst -1
108104; GFX12-SDAG-NEXT: s_barrier_wait 1
109105; GFX12-SDAG-NEXT: s_barrier_leave
110106; GFX12-SDAG-NEXT: s_get_barrier_state s3, m0
@@ -155,11 +151,11 @@ define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in)
155151; GFX12-GISEL-NEXT: s_barrier_signal m0
156152; GFX12-GISEL-NEXT: s_mov_b32 m0, s1
157153; GFX12-GISEL-NEXT: s_barrier_signal m0
154+ ; GFX12-GISEL-NEXT: s_mov_b32 m0, s0
158155; GFX12-GISEL-NEXT: s_barrier_signal -1
156+ ; GFX12-GISEL-NEXT: s_barrier_join m0
159157; GFX12-GISEL-NEXT: s_barrier_signal_isfirst -1
160- ; GFX12-GISEL-NEXT: s_mov_b32 m0, s0
161158; GFX12-GISEL-NEXT: s_add_co_u32 s8, s12, 48
162- ; GFX12-GISEL-NEXT: s_barrier_join m0
163159; GFX12-GISEL-NEXT: s_barrier_wait 1
164160; GFX12-GISEL-NEXT: s_barrier_leave
165161; GFX12-GISEL-NEXT: s_get_barrier_state s0, 2
@@ -194,8 +190,8 @@ define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in)
194190 call void @llvm.amdgcn.s.barrier.signal.var (ptr addrspace (3 ) @bar , i32 12 )
195191 call void @llvm.amdgcn.s.barrier.signal.var (ptr addrspace (3 ) %in , i32 9 )
196192 call void @llvm.amdgcn.s.barrier.signal (i32 -1 )
197- %isfirst = call i1 @llvm.amdgcn.s.barrier.signal.isfirst (i32 -1 )
198193 call void @llvm.amdgcn.s.barrier.join (ptr addrspace (3 ) %in )
194+ %isfirst = call i1 @llvm.amdgcn.s.barrier.signal.isfirst (i32 -1 )
199195 call void @llvm.amdgcn.s.barrier.wait (i16 1 )
200196 call void @llvm.amdgcn.s.barrier.leave (i16 1 )
201197 %state = call i32 @llvm.amdgcn.s.get.named.barrier.state (ptr addrspace (3 ) @bar )
@@ -219,14 +215,14 @@ define amdgpu_kernel void @kernel2(ptr addrspace(1) %out, ptr addrspace(3) %in)
219215; GFX12-SDAG-NEXT: s_load_b64 s[12:13], s[6:7], 0x0
220216; GFX12-SDAG-NEXT: s_mov_b32 m0, 0x70002
221217; GFX12-SDAG-NEXT: s_add_nc_u64 s[8:9], s[4:5], 48
222- ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
223218; GFX12-SDAG-NEXT: s_barrier_signal m0
224219; GFX12-SDAG-NEXT: s_mov_b32 m0, 2
225220; GFX12-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1]
226221; GFX12-SDAG-NEXT: s_mov_b64 s[6:7], s[2:3]
227222; GFX12-SDAG-NEXT: s_mov_b32 s32, 0
228223; GFX12-SDAG-NEXT: s_barrier_join m0
229224; GFX12-SDAG-NEXT: s_barrier_wait 1
225+ ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
230226; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[12:13]
231227; GFX12-SDAG-NEXT: s_endpgm
232228;
@@ -245,10 +241,10 @@ define amdgpu_kernel void @kernel2(ptr addrspace(1) %out, ptr addrspace(3) %in)
245241; GFX12-GISEL-NEXT: s_mov_b64 s[4:5], s[0:1]
246242; GFX12-GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
247243; GFX12-GISEL-NEXT: s_mov_b32 s32, 0
248- ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
249244; GFX12-GISEL-NEXT: s_barrier_signal m0
250245; GFX12-GISEL-NEXT: s_barrier_join 2
251246; GFX12-GISEL-NEXT: s_barrier_wait 1
247+ ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
252248; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[12:13]
253249; GFX12-GISEL-NEXT: s_endpgm
254250 call void @llvm.amdgcn.s.barrier.signal.var (ptr addrspace (3 ) @bar , i32 7 )
0 commit comments