@@ -1330,13 +1330,7 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(ptr addrspace(1
13301330define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc (ptr addrspace (1 ) %out , [8 x i32 ], ptr addrspace (1 ) %in , [8 x i32 ], ptr addrspace (1 ) %dummy ) {
13311331; GFX7-LABEL: test_div_fmas_f32_i1_phi_vcc:
13321332; GFX7: ; %bb.0: ; %entry
1333- ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa
1334- ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1335- ; GFX7-NEXT: v_mov_b32_e32 v2, 0
13361333; GFX7-NEXT: s_mov_b32 s2, 0
1337- ; GFX7-NEXT: s_mov_b32 s3, 0xf000
1338- ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1339- ; GFX7-NEXT: buffer_load_dwordx3 v[1:3], v[1:2], s[0:3], 0 addr64
13401334; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v0
13411335; GFX7-NEXT: s_mov_b64 vcc, 0
13421336; GFX7-NEXT: s_and_saveexec_b64 s[6:7], s[0:1]
@@ -1355,24 +1349,22 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [
13551349; GFX7-NEXT: s_or_b64 vcc, s[8:9], s[0:1]
13561350; GFX7-NEXT: .LBB13_2: ; %exit
13571351; GFX7-NEXT: s_or_b64 exec, exec, s[6:7]
1352+ ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa
1353+ ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1354+ ; GFX7-NEXT: v_mov_b32_e32 v1, 0
1355+ ; GFX7-NEXT: s_mov_b32 s3, 0xf000
1356+ ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1357+ ; GFX7-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[0:3], 0 addr64
13581358; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1359- ; GFX7-NEXT: s_waitcnt vmcnt(0)
1360- ; GFX7-NEXT: v_div_fmas_f32 v0, v1, v2, v3
13611359; GFX7-NEXT: s_mov_b32 s2, -1
1360+ ; GFX7-NEXT: s_waitcnt vmcnt(0)
1361+ ; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2
13621362; GFX7-NEXT: s_waitcnt lgkmcnt(0)
13631363; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8
13641364; GFX7-NEXT: s_endpgm
13651365;
13661366; GFX8-LABEL: test_div_fmas_f32_i1_phi_vcc:
13671367; GFX8: ; %bb.0: ; %entry
1368- ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x28
1369- ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 2, v0
1370- ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1371- ; GFX8-NEXT: v_mov_b32_e32 v2, s1
1372- ; GFX8-NEXT: v_mov_b32_e32 v1, s0
1373- ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3
1374- ; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc
1375- ; GFX8-NEXT: flat_load_dwordx3 v[1:3], v[1:2]
13761368; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v0
13771369; GFX8-NEXT: s_mov_b64 vcc, 0
13781370; GFX8-NEXT: s_and_saveexec_b64 s[2:3], s[0:1]
@@ -1391,25 +1383,29 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [
13911383; GFX8-NEXT: s_or_b64 vcc, s[6:7], s[0:1]
13921384; GFX8-NEXT: .LBB13_2: ; %exit
13931385; GFX8-NEXT: s_or_b64 exec, exec, s[2:3]
1394- ; GFX8-NEXT: s_waitcnt vmcnt(0)
1395- ; GFX8-NEXT: v_div_fmas_f32 v2, v1, v2, v3
1386+ ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x28
1387+ ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0
1388+ ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1389+ ; GFX8-NEXT: v_mov_b32_e32 v0, s0
1390+ ; GFX8-NEXT: v_mov_b32_e32 v1, s1
1391+ ; GFX8-NEXT: v_add_u32_e64 v0, s[0:1], v0, v2
1392+ ; GFX8-NEXT: v_addc_u32_e64 v1, s[0:1], 0, v1, s[0:1]
1393+ ; GFX8-NEXT: flat_load_dwordx3 v[0:2], v[0:1]
13961394; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
13971395; GFX8-NEXT: s_waitcnt lgkmcnt(0)
13981396; GFX8-NEXT: s_add_u32 s0, s0, 8
13991397; GFX8-NEXT: s_addc_u32 s1, s1, 0
1398+ ; GFX8-NEXT: s_waitcnt vmcnt(0)
1399+ ; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, v2
14001400; GFX8-NEXT: v_mov_b32_e32 v0, s0
14011401; GFX8-NEXT: v_mov_b32_e32 v1, s1
14021402; GFX8-NEXT: flat_store_dword v[0:1], v2
14031403; GFX8-NEXT: s_endpgm
14041404;
14051405; GFX10_W32-LABEL: test_div_fmas_f32_i1_phi_vcc:
14061406; GFX10_W32: ; %bb.0: ; %entry
1407- ; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x28
1408- ; GFX10_W32-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1409- ; GFX10_W32-NEXT: s_mov_b32 vcc_lo, 0
1410- ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
1411- ; GFX10_W32-NEXT: global_load_dwordx3 v[1:3], v1, s[0:1]
14121407; GFX10_W32-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
1408+ ; GFX10_W32-NEXT: s_mov_b32 vcc_lo, 0
14131409; GFX10_W32-NEXT: s_and_saveexec_b32 s1, s0
14141410; GFX10_W32-NEXT: s_cbranch_execz .LBB13_2
14151411; GFX10_W32-NEXT: ; %bb.1: ; %bb
@@ -1426,22 +1422,23 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [
14261422; GFX10_W32-NEXT: s_or_b32 vcc_lo, s2, s0
14271423; GFX10_W32-NEXT: .LBB13_2: ; %exit
14281424; GFX10_W32-NEXT: s_or_b32 exec_lo, exec_lo, s1
1425+ ; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x28
1426+ ; GFX10_W32-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1427+ ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
1428+ ; GFX10_W32-NEXT: global_load_dwordx3 v[0:2], v0, s[0:1]
1429+ ; GFX10_W32-NEXT: s_waitcnt_depctr 0xffe3
14291430; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
14301431; GFX10_W32-NEXT: s_waitcnt vmcnt(0)
1431- ; GFX10_W32-NEXT: v_div_fmas_f32 v0, v1, v2, v3
1432+ ; GFX10_W32-NEXT: v_div_fmas_f32 v0, v0, v1, v2
14321433; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0
14331434; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
14341435; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] offset:8
14351436; GFX10_W32-NEXT: s_endpgm
14361437;
14371438; GFX10_W64-LABEL: test_div_fmas_f32_i1_phi_vcc:
14381439; GFX10_W64: ; %bb.0: ; %entry
1439- ; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x28
1440- ; GFX10_W64-NEXT: v_lshlrev_b32_e32 v1, 2, v0
1441- ; GFX10_W64-NEXT: s_mov_b64 vcc, 0
1442- ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
1443- ; GFX10_W64-NEXT: global_load_dwordx3 v[1:3], v1, s[0:1]
14441440; GFX10_W64-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v0
1441+ ; GFX10_W64-NEXT: s_mov_b64 vcc, 0
14451442; GFX10_W64-NEXT: s_and_saveexec_b64 s[2:3], s[0:1]
14461443; GFX10_W64-NEXT: s_cbranch_execz .LBB13_2
14471444; GFX10_W64-NEXT: ; %bb.1: ; %bb
@@ -1458,24 +1455,25 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [
14581455; GFX10_W64-NEXT: s_or_b64 vcc, s[6:7], s[0:1]
14591456; GFX10_W64-NEXT: .LBB13_2: ; %exit
14601457; GFX10_W64-NEXT: s_or_b64 exec, exec, s[2:3]
1458+ ; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x28
1459+ ; GFX10_W64-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1460+ ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
1461+ ; GFX10_W64-NEXT: global_load_dwordx3 v[0:2], v0, s[0:1]
1462+ ; GFX10_W64-NEXT: s_waitcnt_depctr 0xffe3
14611463; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
14621464; GFX10_W64-NEXT: s_waitcnt vmcnt(0)
1463- ; GFX10_W64-NEXT: v_div_fmas_f32 v0, v1, v2, v3
1465+ ; GFX10_W64-NEXT: v_div_fmas_f32 v0, v0, v1, v2
14641466; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0
14651467; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
14661468; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] offset:8
14671469; GFX10_W64-NEXT: s_endpgm
14681470;
14691471; GFX11_W32-LABEL: test_div_fmas_f32_i1_phi_vcc:
14701472; GFX11_W32: ; %bb.0: ; %entry
1471- ; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x28
1472- ; GFX11_W32-NEXT: v_and_b32_e32 v3, 0x3ff, v0
1473+ ; GFX11_W32-NEXT: v_and_b32_e32 v0, 0x3ff, v0
14731474; GFX11_W32-NEXT: s_mov_b32 vcc_lo, 0
1474- ; GFX11_W32-NEXT: v_lshlrev_b32_e32 v0, 2, v3
1475- ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
1476- ; GFX11_W32-NEXT: global_load_b96 v[0:2], v0, s[0:1]
14771475; GFX11_W32-NEXT: s_mov_b32 s1, exec_lo
1478- ; GFX11_W32-NEXT: v_cmpx_eq_u32_e32 0, v3
1476+ ; GFX11_W32-NEXT: v_cmpx_eq_u32_e32 0, v0
14791477; GFX11_W32-NEXT: s_cbranch_execz .LBB13_2
14801478; GFX11_W32-NEXT: ; %bb.1: ; %bb
14811479; GFX11_W32-NEXT: s_load_b64 s[2:3], s[4:5], 0x50
@@ -1491,6 +1489,10 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [
14911489; GFX11_W32-NEXT: s_or_b32 vcc_lo, s2, s0
14921490; GFX11_W32-NEXT: .LBB13_2: ; %exit
14931491; GFX11_W32-NEXT: s_or_b32 exec_lo, exec_lo, s1
1492+ ; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x28
1493+ ; GFX11_W32-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1494+ ; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
1495+ ; GFX11_W32-NEXT: global_load_b96 v[0:2], v0, s[0:1]
14941496; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
14951497; GFX11_W32-NEXT: s_waitcnt vmcnt(0)
14961498; GFX11_W32-NEXT: v_div_fmas_f32 v0, v0, v1, v2
@@ -1501,14 +1503,10 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [
15011503;
15021504; GFX11_W64-LABEL: test_div_fmas_f32_i1_phi_vcc:
15031505; GFX11_W64: ; %bb.0: ; %entry
1504- ; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x28
1505- ; GFX11_W64-NEXT: v_and_b32_e32 v3, 0x3ff, v0
1506+ ; GFX11_W64-NEXT: v_and_b32_e32 v0, 0x3ff, v0
15061507; GFX11_W64-NEXT: s_mov_b64 vcc, 0
15071508; GFX11_W64-NEXT: s_mov_b64 s[2:3], exec
1508- ; GFX11_W64-NEXT: v_lshlrev_b32_e32 v0, 2, v3
1509- ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
1510- ; GFX11_W64-NEXT: global_load_b96 v[0:2], v0, s[0:1]
1511- ; GFX11_W64-NEXT: v_cmpx_eq_u32_e32 0, v3
1509+ ; GFX11_W64-NEXT: v_cmpx_eq_u32_e32 0, v0
15121510; GFX11_W64-NEXT: s_cbranch_execz .LBB13_2
15131511; GFX11_W64-NEXT: ; %bb.1: ; %bb
15141512; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x50
@@ -1524,6 +1522,10 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [
15241522; GFX11_W64-NEXT: s_or_b64 vcc, s[6:7], s[0:1]
15251523; GFX11_W64-NEXT: .LBB13_2: ; %exit
15261524; GFX11_W64-NEXT: s_or_b64 exec, exec, s[2:3]
1525+ ; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x28
1526+ ; GFX11_W64-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1527+ ; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
1528+ ; GFX11_W64-NEXT: global_load_b96 v[0:2], v0, s[0:1]
15271529; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
15281530; GFX11_W64-NEXT: s_waitcnt vmcnt(0)
15291531; GFX11_W64-NEXT: v_div_fmas_f32 v0, v0, v1, v2
0 commit comments