@@ -921,60 +921,58 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
921921; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
922922; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd
923923; GCN-NEXT: s_waitcnt lgkmcnt(0)
924- ; GCN-NEXT: s_ashr_i64 s[2:3 ], s[2:3], 31
925- ; GCN-NEXT: s_ashr_i64 s[4:5 ], s[4:5], 31
926- ; GCN-NEXT: s_ashr_i32 s6 , s5, 31
927- ; GCN-NEXT: s_add_u32 s4, s4, s6
928- ; GCN-NEXT: s_mov_b32 s7, s6
929- ; GCN-NEXT: s_addc_u32 s5, s5, s6
930- ; GCN-NEXT: s_xor_b64 s[8:9], s[4:5 ], s[6:7 ]
924+ ; GCN-NEXT: s_ashr_i64 s[10:11 ], s[2:3], 31
925+ ; GCN-NEXT: s_ashr_i64 s[6:7 ], s[4:5], 31
926+ ; GCN-NEXT: s_ashr_i32 s4 , s5, 31
927+ ; GCN-NEXT: s_add_u32 s6, s6, s4
928+ ; GCN-NEXT: s_mov_b32 s5, s4
929+ ; GCN-NEXT: s_addc_u32 s7, s7, s4
930+ ; GCN-NEXT: s_xor_b64 s[8:9], s[6:7 ], s[4:5 ]
931931; GCN-NEXT: v_cvt_f32_u32_e32 v0, s8
932932; GCN-NEXT: v_cvt_f32_u32_e32 v1, s9
933- ; GCN-NEXT: s_sub_u32 s4 , 0, s8
934- ; GCN-NEXT: s_subb_u32 s5 , 0, s9
935- ; GCN-NEXT: s_ashr_i32 s10 , s3, 31
933+ ; GCN-NEXT: s_sub_u32 s2 , 0, s8
934+ ; GCN-NEXT: s_subb_u32 s4 , 0, s9
935+ ; GCN-NEXT: s_ashr_i32 s12 , s3, 31
936936; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
937937; GCN-NEXT: v_rcp_f32_e32 v0, v0
938- ; GCN-NEXT: s_add_u32 s2, s2, s10
939- ; GCN-NEXT: s_mov_b32 s11, s10
940- ; GCN-NEXT: s_addc_u32 s3, s3, s10
938+ ; GCN-NEXT: s_mov_b32 s13, s12
939+ ; GCN-NEXT: s_mov_b32 s5, s1
940+ ; GCN-NEXT: s_mov_b32 s7, 0xf000
941941; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
942942; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
943943; GCN-NEXT: v_trunc_f32_e32 v1, v1
944944; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
945945; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
946946; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
947- ; GCN-NEXT: s_xor_b64 s[12:13], s[2:3], s[10:11]
948- ; GCN-NEXT: s_mov_b32 s7, 0xf000
949- ; GCN-NEXT: v_mul_lo_u32 v2, s4, v1
950- ; GCN-NEXT: v_mul_hi_u32 v3, s4, v0
951- ; GCN-NEXT: v_mul_lo_u32 v5, s5, v0
952- ; GCN-NEXT: v_mul_lo_u32 v4, s4, v0
953947; GCN-NEXT: s_mov_b32 s6, -1
948+ ; GCN-NEXT: v_mul_lo_u32 v2, s2, v1
949+ ; GCN-NEXT: v_mul_hi_u32 v3, s2, v0
950+ ; GCN-NEXT: v_mul_lo_u32 v5, s4, v0
951+ ; GCN-NEXT: v_mul_lo_u32 v4, s2, v0
954952; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v3
955953; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v5
956954; GCN-NEXT: v_mul_hi_u32 v3, v0, v4
957955; GCN-NEXT: v_mul_lo_u32 v5, v0, v2
958- ; GCN-NEXT: v_mul_hi_u32 v7, v0, v2
959- ; GCN-NEXT: v_mul_lo_u32 v6, v1, v4
960- ; GCN-NEXT: v_mul_hi_u32 v4, v1, v4
961- ; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v5
962- ; GCN-NEXT: v_addc_u32_e32 v5, vcc, 0, v7, vcc
956+ ; GCN-NEXT: v_mul_hi_u32 v6, v0, v2
963957; GCN-NEXT: v_mul_hi_u32 v7, v1, v2
964958; GCN-NEXT: v_mul_lo_u32 v2, v1, v2
959+ ; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v5
960+ ; GCN-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc
961+ ; GCN-NEXT: v_mul_lo_u32 v6, v1, v4
962+ ; GCN-NEXT: v_mul_hi_u32 v4, v1, v4
965963; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v6
966964; GCN-NEXT: v_addc_u32_e32 v3, vcc, v5, v4, vcc
967965; GCN-NEXT: v_addc_u32_e32 v4, vcc, 0, v7, vcc
968966; GCN-NEXT: v_add_i32_e32 v2, vcc, v3, v2
969967; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc
970968; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v2
971969; GCN-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
972- ; GCN-NEXT: v_mul_lo_u32 v2, s4 , v1
973- ; GCN-NEXT: v_mul_hi_u32 v3, s4 , v0
974- ; GCN-NEXT: v_mul_lo_u32 v4, s5 , v0
975- ; GCN-NEXT: s_mov_b32 s5, s1
970+ ; GCN-NEXT: v_mul_lo_u32 v2, s2 , v1
971+ ; GCN-NEXT: v_mul_hi_u32 v3, s2 , v0
972+ ; GCN-NEXT: v_mul_lo_u32 v4, s4 , v0
973+ ; GCN-NEXT: s_mov_b32 s4, s0
976974; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v3
977- ; GCN-NEXT: v_mul_lo_u32 v3, s4 , v0
975+ ; GCN-NEXT: v_mul_lo_u32 v3, s2 , v0
978976; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4
979977; GCN-NEXT: v_mul_lo_u32 v6, v0, v2
980978; GCN-NEXT: v_mul_hi_u32 v7, v0, v3
@@ -990,18 +988,20 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
990988; GCN-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc
991989; GCN-NEXT: v_add_i32_e32 v2, vcc, v3, v2
992990; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc
991+ ; GCN-NEXT: s_add_u32 s2, s10, s12
993992; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v2
993+ ; GCN-NEXT: s_addc_u32 s3, s11, s12
994994; GCN-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
995- ; GCN-NEXT: v_mul_lo_u32 v2, s12, v1
996- ; GCN-NEXT: v_mul_hi_u32 v3, s12, v0
997- ; GCN-NEXT: v_mul_hi_u32 v4, s12, v1
998- ; GCN-NEXT: v_mul_hi_u32 v5, s13, v1
999- ; GCN-NEXT: v_mul_lo_u32 v1, s13, v1
995+ ; GCN-NEXT: s_xor_b64 s[10:11], s[2:3], s[12:13]
996+ ; GCN-NEXT: v_mul_lo_u32 v2, s10, v1
997+ ; GCN-NEXT: v_mul_hi_u32 v3, s10, v0
998+ ; GCN-NEXT: v_mul_hi_u32 v4, s10, v1
999+ ; GCN-NEXT: v_mul_hi_u32 v5, s11, v1
1000+ ; GCN-NEXT: v_mul_lo_u32 v1, s11, v1
10001001; GCN-NEXT: v_add_i32_e32 v2, vcc, v3, v2
10011002; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc
1002- ; GCN-NEXT: v_mul_lo_u32 v4, s13, v0
1003- ; GCN-NEXT: v_mul_hi_u32 v0, s13, v0
1004- ; GCN-NEXT: s_mov_b32 s4, s0
1003+ ; GCN-NEXT: v_mul_lo_u32 v4, s11, v0
1004+ ; GCN-NEXT: v_mul_hi_u32 v0, s11, v0
10051005; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4
10061006; GCN-NEXT: v_addc_u32_e32 v0, vcc, v3, v0, vcc
10071007; GCN-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc
@@ -1013,9 +1013,9 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
10131013; GCN-NEXT: v_mul_lo_u32 v0, s8, v0
10141014; GCN-NEXT: v_add_i32_e32 v1, vcc, v1, v2
10151015; GCN-NEXT: v_add_i32_e32 v1, vcc, v3, v1
1016- ; GCN-NEXT: v_sub_i32_e32 v2, vcc, s13 , v1
1016+ ; GCN-NEXT: v_sub_i32_e32 v2, vcc, s11 , v1
10171017; GCN-NEXT: v_mov_b32_e32 v3, s9
1018- ; GCN-NEXT: v_sub_i32_e32 v0, vcc, s12 , v0
1018+ ; GCN-NEXT: v_sub_i32_e32 v0, vcc, s10 , v0
10191019; GCN-NEXT: v_subb_u32_e64 v2, s[0:1], v2, v3, vcc
10201020; GCN-NEXT: v_subrev_i32_e64 v4, s[0:1], s8, v0
10211021; GCN-NEXT: v_subbrev_u32_e64 v5, s[2:3], 0, v2, s[0:1]
@@ -1030,7 +1030,7 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
10301030; GCN-NEXT: v_subbrev_u32_e64 v2, s[0:1], 0, v2, s[0:1]
10311031; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6
10321032; GCN-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[0:1]
1033- ; GCN-NEXT: v_mov_b32_e32 v4, s13
1033+ ; GCN-NEXT: v_mov_b32_e32 v4, s11
10341034; GCN-NEXT: v_subb_u32_e32 v1, vcc, v4, v1, vcc
10351035; GCN-NEXT: v_cmp_le_u32_e32 vcc, s9, v1
10361036; GCN-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc
@@ -1042,10 +1042,10 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
10421042; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
10431043; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
10441044; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1045- ; GCN-NEXT: v_xor_b32_e32 v0, s10 , v0
1046- ; GCN-NEXT: v_xor_b32_e32 v1, s10 , v1
1047- ; GCN-NEXT: v_mov_b32_e32 v2, s10
1048- ; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s10 , v0
1045+ ; GCN-NEXT: v_xor_b32_e32 v0, s12 , v0
1046+ ; GCN-NEXT: v_xor_b32_e32 v1, s12 , v1
1047+ ; GCN-NEXT: v_mov_b32_e32 v2, s12
1048+ ; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s12 , v0
10491049; GCN-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc
10501050; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
10511051; GCN-NEXT: s_endpgm
0 commit comments