@@ -50,8 +50,8 @@ if.end:
5050 ret void
5151}
5252
53- define void @uniform_br_unprofitable (i32 noundef inreg %value , ptr addrspace (8 ) nocapture writeonly inreg %res , i32 noundef inreg %v_offset , i32 noundef inreg %0 , i32 noundef inreg %flag ) {
54- ; GFX9-LABEL: uniform_br_unprofitable :
53+ define void @uniform_br_same_weight (i32 noundef inreg %value , ptr addrspace (8 ) nocapture writeonly inreg %res , i32 noundef inreg %v_offset , i32 noundef inreg %0 , i32 noundef inreg %flag ) {
54+ ; GFX9-LABEL: uniform_br_same_weight :
5555; GFX9: ; %bb.0: ; %entry
5656; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5757; GFX9-NEXT: s_cmp_lt_i32 s11, 1
@@ -68,7 +68,7 @@ define void @uniform_br_unprofitable(i32 noundef inreg %value, ptr addrspace(8)
6868; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6969; GFX9-NEXT: s_setpc_b64 s[30:31]
7070;
71- ; GFX10-LABEL: uniform_br_unprofitable :
71+ ; GFX10-LABEL: uniform_br_same_weight :
7272; GFX10: ; %bb.0: ; %entry
7373; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7474; GFX10-NEXT: s_cmp_lt_i32 s11, 1
@@ -97,8 +97,8 @@ if.end:
9797 ret void
9898}
9999
100- define void @uniform_br_profitable (i32 noundef inreg %value , ptr addrspace (8 ) nocapture writeonly inreg %res , i32 noundef inreg %v_offset , i32 noundef inreg %0 , i32 noundef inreg %flag ) {
101- ; GFX9-LABEL: uniform_br_profitable :
100+ define void @uniform_br_then_likely (i32 noundef inreg %value , ptr addrspace (8 ) nocapture writeonly inreg %res , i32 noundef inreg %v_offset , i32 noundef inreg %0 , i32 noundef inreg %flag ) {
101+ ; GFX9-LABEL: uniform_br_then_likely :
102102; GFX9: ; %bb.0: ; %entry
103103; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104104; GFX9-NEXT: s_cmp_lt_i32 s11, 1
@@ -115,7 +115,7 @@ define void @uniform_br_profitable(i32 noundef inreg %value, ptr addrspace(8) no
115115; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116116; GFX9-NEXT: s_setpc_b64 s[30:31]
117117;
118- ; GFX10-LABEL: uniform_br_profitable :
118+ ; GFX10-LABEL: uniform_br_then_likely :
119119; GFX10: ; %bb.0: ; %entry
120120; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121121; GFX10-NEXT: s_cmp_lt_i32 s11, 1
@@ -215,8 +215,8 @@ if.end:
215215 ret void
216216}
217217
218- define void @divergent_br_unprofitable (i32 noundef inreg %value , ptr addrspace (8 ) nocapture writeonly inreg %res , i32 noundef inreg %v_offset , i32 noundef inreg %0 , i32 noundef %flag ) {
219- ; GFX9-LABEL: divergent_br_unprofitable :
218+ define void @divergent_br_same_weight (i32 noundef inreg %value , ptr addrspace (8 ) nocapture writeonly inreg %res , i32 noundef inreg %v_offset , i32 noundef inreg %0 , i32 noundef %flag ) {
219+ ; GFX9-LABEL: divergent_br_same_weight :
220220; GFX9: ; %bb.0: ; %entry
221221; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222222; GFX9-NEXT: s_mov_b32 s14, s7
@@ -235,7 +235,7 @@ define void @divergent_br_unprofitable(i32 noundef inreg %value, ptr addrspace(8
235235; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
236236; GFX9-NEXT: s_setpc_b64 s[30:31]
237237;
238- ; GFX1010-LABEL: divergent_br_unprofitable :
238+ ; GFX1010-LABEL: divergent_br_same_weight :
239239; GFX1010: ; %bb.0: ; %entry
240240; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
241241; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0, v0
@@ -255,7 +255,7 @@ define void @divergent_br_unprofitable(i32 noundef inreg %value, ptr addrspace(8
255255; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
256256; GFX1010-NEXT: s_setpc_b64 s[30:31]
257257;
258- ; GFX1030-LABEL: divergent_br_unprofitable :
258+ ; GFX1030-LABEL: divergent_br_same_weight :
259259; GFX1030: ; %bb.0: ; %entry
260260; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
261261; GFX1030-NEXT: s_mov_b32 s12, s5
@@ -286,61 +286,58 @@ if.end:
286286 ret void
287287}
288288
289- define void @divergent_br_profitable (i32 noundef inreg %value , ptr addrspace (8 ) nocapture writeonly inreg %res , i32 noundef inreg %v_offset , i32 noundef inreg %0 , i32 noundef %flag ) {
290- ; GFX9-LABEL: divergent_br_profitable :
289+ define void @divergent_br_then_likely (i32 noundef inreg %value , ptr addrspace (8 ) nocapture writeonly inreg %res , i32 noundef inreg %v_offset , i32 noundef inreg %0 , i32 noundef %flag ) {
290+ ; GFX9-LABEL: divergent_br_then_likely :
291291; GFX9: ; %bb.0: ; %entry
292292; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
293293; GFX9-NEXT: s_mov_b32 s14, s7
294294; GFX9-NEXT: s_mov_b32 s13, s6
295295; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, 0, v0
296296; GFX9-NEXT: s_and_saveexec_b64 s[6:7], vcc
297- ; GFX9-NEXT: s_cbranch_execz .LBB5_2
298297; GFX9-NEXT: ; %bb.1: ; %if.then
299298; GFX9-NEXT: s_mov_b32 s15, s8
300299; GFX9-NEXT: s_mov_b32 s12, s5
301300; GFX9-NEXT: v_mov_b32_e32 v0, s4
302301; GFX9-NEXT: v_mov_b32_e32 v1, s9
303302; GFX9-NEXT: buffer_store_dword v0, v1, s[12:15], 0 offen
304- ; GFX9-NEXT: .LBB5_2 : ; %if.end
303+ ; GFX9-NEXT: ; %bb.2 : ; %if.end
305304; GFX9-NEXT: s_or_b64 exec, exec, s[6:7]
306305; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
307306; GFX9-NEXT: s_setpc_b64 s[30:31]
308307;
309- ; GFX1010-LABEL: divergent_br_profitable :
308+ ; GFX1010-LABEL: divergent_br_then_likely :
310309; GFX1010: ; %bb.0: ; %entry
311310; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
312311; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0, v0
313312; GFX1010-NEXT: s_mov_b32 s12, s5
314313; GFX1010-NEXT: s_and_saveexec_b32 s5, vcc_lo
315- ; GFX1010-NEXT: s_cbranch_execz .LBB5_2
316314; GFX1010-NEXT: ; %bb.1: ; %if.then
317315; GFX1010-NEXT: v_mov_b32_e32 v0, s4
318316; GFX1010-NEXT: v_mov_b32_e32 v1, s9
319317; GFX1010-NEXT: s_mov_b32 s15, s8
320318; GFX1010-NEXT: s_mov_b32 s14, s7
321319; GFX1010-NEXT: s_mov_b32 s13, s6
322320; GFX1010-NEXT: buffer_store_dword v0, v1, s[12:15], 0 offen
323- ; GFX1010-NEXT: .LBB5_2 : ; %if.end
321+ ; GFX1010-NEXT: ; %bb.2 : ; %if.end
324322; GFX1010-NEXT: s_waitcnt_depctr 0xffe3
325323; GFX1010-NEXT: s_or_b32 exec_lo, exec_lo, s5
326324; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
327325; GFX1010-NEXT: s_setpc_b64 s[30:31]
328326;
329- ; GFX1030-LABEL: divergent_br_profitable :
327+ ; GFX1030-LABEL: divergent_br_then_likely :
330328; GFX1030: ; %bb.0: ; %entry
331329; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
332330; GFX1030-NEXT: s_mov_b32 s12, s5
333331; GFX1030-NEXT: s_mov_b32 s5, exec_lo
334332; GFX1030-NEXT: v_cmpx_lt_i32_e32 0, v0
335- ; GFX1030-NEXT: s_cbranch_execz .LBB5_2
336333; GFX1030-NEXT: ; %bb.1: ; %if.then
337334; GFX1030-NEXT: v_mov_b32_e32 v0, s4
338335; GFX1030-NEXT: v_mov_b32_e32 v1, s9
339336; GFX1030-NEXT: s_mov_b32 s15, s8
340337; GFX1030-NEXT: s_mov_b32 s14, s7
341338; GFX1030-NEXT: s_mov_b32 s13, s6
342339; GFX1030-NEXT: buffer_store_dword v0, v1, s[12:15], 0 offen
343- ; GFX1030-NEXT: .LBB5_2 : ; %if.end
340+ ; GFX1030-NEXT: ; %bb.2 : ; %if.end
344341; GFX1030-NEXT: s_or_b32 exec_lo, exec_lo, s5
345342; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
346343; GFX1030-NEXT: s_setpc_b64 s[30:31]
@@ -359,7 +356,6 @@ if.end:
359356
360357declare void @llvm.amdgcn.raw.ptr.buffer.store.i32 (i32 , ptr addrspace (8 ) nocapture writeonly , i32 , i32 , i32 immarg)
361358declare void @llvm.amdgcn.s.waitcnt (i32 )
362- declare i32 @llvm.amdgcn.workitem.id.x ()
363359
364360!0 = !{!"branch_weights" , i32 1000 , i32 1000 }
365361!1 = !{!"branch_weights" , i32 2000 , i32 1 }
0 commit comments