@@ -2854,89 +2854,90 @@ define i256 @v_mul_i256(i256 %num, i256 %den) {
2854
2854
; GFX1250: ; %bb.0:
2855
2855
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
2856
2856
; GFX1250-NEXT: s_wait_kmcnt 0x0
2857
- ; GFX1250-NEXT: v_mad_nc_u64_u32 v[16:17], v0, v14, 0
2858
- ; GFX1250-NEXT: v_mad_nc_u64_u32 v[18:19], v0, v12, 0
2857
+ ; GFX1250-NEXT: v_dual_mov_b32 v16, v0 :: v_dual_mov_b32 v17, v1
2859
2858
; GFX1250-NEXT: v_mul_lo_u32 v27, v5, v10
2860
2859
; GFX1250-NEXT: v_mul_lo_u32 v29, v3, v12
2861
- ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2862
- ; GFX1250-NEXT: v_mad_nc_u64_u32 v[16:17], v1, v13, v[16:17]
2863
- ; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], s0, v1, v11, v[18:19]
2864
- ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
2860
+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2861
+ ; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v16, v14, 0
2862
+ ; GFX1250-NEXT: v_mad_nc_u64_u32 v[18:19], v16, v12, 0
2863
+ ; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v17, v13, v[0:1]
2864
+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
2865
+ ; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], s0, v17, v11, v[18:19]
2865
2866
; GFX1250-NEXT: v_cndmask_b32_e64 v20, 0, 1, s0
2866
- ; GFX1250-NEXT: v_mad_nc_u64_u32 v[16:17], v2, v12, v[16:17]
2867
- ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
2867
+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2868
+ ; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v2, v12, v[0:1]
2868
2869
; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], vcc_lo, v2, v10, v[18:19]
2870
+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4)
2869
2871
; GFX1250-NEXT: v_add_co_ci_u32_e64 v22, null, 0, v20, vcc_lo
2870
- ; GFX1250-NEXT: v_mad_nc_u64_u32 v[20:21], v0 , v10, 0
2871
- ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2872
- ; GFX1250-NEXT: v_mad_nc_u64_u32 v[16:17], v3, v11, v[16:17]
2872
+ ; GFX1250-NEXT: v_mad_nc_u64_u32 v[20:21], v16 , v10, 0
2873
+ ; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v3, v11, v[0:1]
2874
+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
2873
2875
; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], vcc_lo, v3, v9, v[18:19]
2874
- ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
2875
2876
; GFX1250-NEXT: v_add_co_ci_u32_e64 v24, null, 0, v22, vcc_lo
2876
- ; GFX1250-NEXT: v_mad_nc_u64_u32 v[16:17], v4, v10, v[16:17]
2877
- ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
2877
+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2878
+ ; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v4, v10, v[0:1]
2878
2879
; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], vcc_lo, v4, v8, v[18:19]
2880
+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
2879
2881
; GFX1250-NEXT: v_add_co_ci_u32_e64 v26, null, 0, v24, vcc_lo
2880
- ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
2881
- ; GFX1250-NEXT: v_mad_nc_u64_u32 v[16:17], v5, v9, v[16:17]
2882
- ; GFX1250-NEXT: v_mad_nc_u64_u32 v[22:23], v6, v8, v[16:17]
2883
- ; GFX1250-NEXT: v_mad_co_u64_u32 v[16:17], s0, v1, v9, v[20:21]
2884
- ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
2882
+ ; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v5, v9, v[0:1]
2883
+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2884
+ ; GFX1250-NEXT: v_mad_nc_u64_u32 v[22:23], v6, v8, v[0:1]
2885
+ ; GFX1250-NEXT: v_mad_co_u64_u32 v[0:1], s0, v17, v9, v[20:21]
2885
2886
; GFX1250-NEXT: v_dual_mov_b32 v20, v19 :: v_dual_mov_b32 v21, v22
2886
2887
; GFX1250-NEXT: v_mul_lo_u32 v22, v6, v9
2887
2888
; GFX1250-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0
2888
- ; GFX1250-NEXT: v_mad_co_u64_u32 v[24:25], s0, v2, v8, v[16:17]
2889
- ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
2890
- ; GFX1250-NEXT: v_mad_co_u64_u32 v[20:21], vcc_lo, v0, v13, v[20:21]
2889
+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2890
+ ; GFX1250-NEXT: v_mad_co_u64_u32 v[24:25], s0, v2, v8, v[0:1]
2891
+ ; GFX1250-NEXT: v_mad_co_u64_u32 v[20:21], vcc_lo, v16, v13, v[20:21]
2892
+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
2891
2893
; GFX1250-NEXT: v_add_co_ci_u32_e64 v6, null, 0, v6, s0
2892
- ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
2893
- ; GFX1250-NEXT: v_mad_co_u64_u32 v[16:17], s0, v1, v12, v[20:21]
2894
+ ; GFX1250-NEXT: v_mad_co_u64_u32 v[0:1], s0, v17, v12, v[20:21]
2895
+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2894
2896
; GFX1250-NEXT: v_dual_mov_b32 v20, v25 :: v_dual_mov_b32 v21, v18
2895
2897
; GFX1250-NEXT: v_mul_lo_u32 v25, v4, v11
2896
- ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
2897
- ; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], s2, v0, v11, v[20:21]
2898
+ ; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], s2, v16, v11, v[20:21]
2899
+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_4)
2898
2900
; GFX1250-NEXT: v_cndmask_b32_e64 v28, 0, 1, s2
2899
- ; GFX1250-NEXT: v_mad_co_u64_u32 v[20:21], s1, v2, v11, v[16:17 ]
2900
- ; GFX1250-NEXT: v_mad_nc_u64_u32 v[16:17 ], v0 , v8, 0
2901
- ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
2902
- ; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], s2, v1, v10, v[18:19]
2901
+ ; GFX1250-NEXT: v_mad_co_u64_u32 v[20:21], s1, v2, v11, v[0:1 ]
2902
+ ; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1 ], v16 , v8, 0
2903
+ ; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], s2, v17, v10, v[18:19]
2904
+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
2903
2905
; GFX1250-NEXT: v_mad_co_u64_u32 v[10:11], s3, v3, v10, v[20:21]
2904
2906
; GFX1250-NEXT: v_mul_lo_u32 v20, v2, v13
2905
2907
; GFX1250-NEXT: v_add_co_ci_u32_e64 v21, null, 0, v28, s2
2906
- ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
2907
2908
; GFX1250-NEXT: v_mad_co_u64_u32 v[12:13], s2, v2, v9, v[18:19]
2908
- ; GFX1250-NEXT: v_dual_mov_b32 v18, v17 :: v_dual_mov_b32 v19, v24
2909
+ ; GFX1250-NEXT: v_dual_mov_b32 v18, v1 :: v_dual_mov_b32 v19, v24
2909
2910
; GFX1250-NEXT: v_mad_co_u64_u32 v[10:11], s4, v4, v9, v[10:11]
2910
- ; GFX1250-NEXT: v_add_co_ci_u32_e64 v2, null, 0, v21, s2
2911
- ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3)
2912
- ; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], s6, v0 , v9, v[18:19]
2913
- ; GFX1250-NEXT: v_mul_lo_u32 v0, v0 , v15
2911
+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
2912
+ ; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v21, s2
2913
+ ; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], s6, v16 , v9, v[18:19]
2914
+ ; GFX1250-NEXT: v_mul_lo_u32 v2, v16 , v15
2914
2915
; GFX1250-NEXT: v_mad_co_u64_u32 v[12:13], s2, v3, v8, v[12:13]
2915
2916
; GFX1250-NEXT: v_cndmask_b32_e64 v3, 0, 1, s6
2916
- ; GFX1250-NEXT: v_mul_lo_u32 v9, v1 , v14
2917
+ ; GFX1250-NEXT: v_mul_lo_u32 v9, v17 , v14
2917
2918
; GFX1250-NEXT: v_mad_co_u64_u32 v[10:11], s5, v5, v8, v[10:11]
2918
- ; GFX1250-NEXT: v_add_co_ci_u32_e64 v2 , null, 0, v2 , s2
2919
- ; GFX1250-NEXT: v_mad_co_u64_u32 v[14:15], s2, v1 , v8, v[18:19]
2919
+ ; GFX1250-NEXT: v_add_co_ci_u32_e64 v1 , null, 0, v1 , s2
2920
+ ; GFX1250-NEXT: v_mad_co_u64_u32 v[14:15], s2, v17 , v8, v[18:19]
2920
2921
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2921
2922
; GFX1250-NEXT: v_add_co_ci_u32_e64 v3, s2, v3, v12, s2
2922
2923
; GFX1250-NEXT: v_add_co_ci_u32_e64 v4, s2, v6, v13, s2
2923
2924
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2924
- ; GFX1250-NEXT: v_add_co_ci_u32_e64 v5, s2, v2 , v10, s2
2925
+ ; GFX1250-NEXT: v_add_co_ci_u32_e64 v5, s2, v1 , v10, s2
2925
2926
; GFX1250-NEXT: v_add_co_ci_u32_e64 v6, s2, v26, v11, s2
2926
2927
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2927
- ; GFX1250-NEXT: v_add_co_ci_u32_e64 v0 , null, v23, v0 , s2
2928
- ; GFX1250-NEXT: v_dual_mov_b32 v2, v15 :: v_dual_mov_b32 v1, v14
2929
- ; GFX1250-NEXT: v_add_co_ci_u32_e64 v0 , null, v0 , v9, s5
2928
+ ; GFX1250-NEXT: v_add_co_ci_u32_e64 v1 , null, v23, v2 , s2
2929
+ ; GFX1250-NEXT: v_mov_b32_e32 v2, v15
2930
+ ; GFX1250-NEXT: v_add_co_ci_u32_e64 v1 , null, v1 , v9, s5
2930
2931
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2931
- ; GFX1250-NEXT: v_add_co_ci_u32_e64 v0 , null, v0 , v20, s4
2932
- ; GFX1250-NEXT: v_add_co_ci_u32_e64 v0 , null, v0 , v29, s3
2932
+ ; GFX1250-NEXT: v_add_co_ci_u32_e64 v1 , null, v1 , v20, s4
2933
+ ; GFX1250-NEXT: v_add_co_ci_u32_e64 v1 , null, v1 , v29, s3
2933
2934
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2934
- ; GFX1250-NEXT: v_add_co_ci_u32_e64 v0 , null, v0 , v25, s1
2935
- ; GFX1250-NEXT: v_add_co_ci_u32_e64 v0 , null, v0 , v27, s0
2935
+ ; GFX1250-NEXT: v_add_co_ci_u32_e64 v1 , null, v1 , v25, s1
2936
+ ; GFX1250-NEXT: v_add_co_ci_u32_e64 v1 , null, v1 , v27, s0
2936
2937
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2937
- ; GFX1250-NEXT: v_add_co_ci_u32_e64 v0 , null, v0 , v22, vcc_lo
2938
- ; GFX1250-NEXT: v_mad_u32 v7, v7, v8, v0
2939
- ; GFX1250-NEXT: v_mov_b32_e32 v0, v16
2938
+ ; GFX1250-NEXT: v_add_co_ci_u32_e64 v1 , null, v1 , v22, vcc_lo
2939
+ ; GFX1250-NEXT: v_mad_u32 v7, v7, v8, v1
2940
+ ; GFX1250-NEXT: v_mov_b32_e32 v1, v14
2940
2941
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
2941
2942
%result = mul i256 %num , %den
2942
2943
ret i256 %result
0 commit comments