@@ -5294,15 +5294,15 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
52945294; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
52955295; GFX11-NEXT: s_and_b32 s0, 1, s10
52965296; GFX11-NEXT: s_cmp_eq_u64 s[6:7], 0
5297- ; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
5298- ; GFX11-NEXT: s_cselect_b32 s1, 1, 0
52995297; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s2
5298+ ; GFX11-NEXT: s_cselect_b32 s1, 1, 0
5299+ ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
53005300; GFX11-NEXT: s_and_b32 s1, 1, s1
5301- ; GFX11-NEXT: v_cmp_ne_u32_e64 s1, 0, s1
5302- ; GFX11-NEXT: v_cndmask_b16 v0, v1, v0, s0
5303- ; GFX11-NEXT: s_ashr_i32 s0, s9, 31
5304- ; GFX11-NEXT: v_cndmask_b16 v1, v2, 0, s1
5301+ ; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s1
5302+ ; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
5303+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, 0, s0
53055304; GFX11-NEXT: v_mov_b32_e32 v2, s5
5305+ ; GFX11-NEXT: s_ashr_i32 s0, s9, 31
53065306; GFX11-NEXT: s_add_i32 s1, s0, 0x80000000
53075307; GFX11-NEXT: v_xor_b32_e32 v0, v1, v0
53085308; GFX11-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_and_b32 v0, 1, v0
@@ -5447,20 +5447,20 @@ define amdgpu_ps <4 x float> @saddsat_i128_sv(i128 inreg %lhs, i128 %rhs) {
54475447; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo
54485448; GFX11-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, s2, v2, vcc_lo
54495449; GFX11-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, s3, v3, vcc_lo
5450- ; GFX11-NEXT: v_cmp_gt_u64_e64 s0, s[0:1], v[0:1]
5451- ; GFX11-NEXT: v_cmp_gt_i64_e64 s1, 0, v[2:3]
5450+ ; GFX11-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[0:1], v[0:1]
5451+ ; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo
5452+ ; GFX11-NEXT: v_cmp_gt_i64_e32 vcc_lo, s[2:3], v[4:5]
5453+ ; GFX11-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo
5454+ ; GFX11-NEXT: v_cmp_gt_i64_e32 vcc_lo, 0, v[2:3]
5455+ ; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo
54525456; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[2:3], v[4:5]
5453- ; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0
5454- ; GFX11-NEXT: v_cmp_gt_i64_e64 s0, s[2:3], v[4:5]
5455- ; GFX11-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0
5456- ; GFX11-NEXT: v_cmp_eq_u64_e64 s0, 0, v[2:3]
5457- ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s1
5458- ; GFX11-NEXT: v_cndmask_b16 v3, v7, v6, vcc_lo
5459- ; GFX11-NEXT: v_cndmask_b16 v2, v2, 0, s0
5460- ; GFX11-NEXT: v_xor_b32_e32 v2, v2, v3
5457+ ; GFX11-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc_lo
5458+ ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, 0, v[2:3]
54615459; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v5
5462- ; GFX11-NEXT: v_and_b32_e32 v2, 1, v2
5460+ ; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc_lo
5461+ ; GFX11-NEXT: v_xor_b32_e32 v2, v2, v6
54635462; GFX11-NEXT: v_add_nc_u32_e32 v6, 0x80000000, v3
5463+ ; GFX11-NEXT: v_and_b32_e32 v2, 1, v2
54645464; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2
54655465; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
54665466; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
@@ -5606,22 +5606,21 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
56065606; GFX11-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, s1, v1, vcc_lo
56075607; GFX11-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, s2, v2, vcc_lo
56085608; GFX11-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, s3, v3, vcc_lo
5609- ; GFX11-NEXT: v_cmp_lt_u64_e64 s0 , v[4:5], v[0:1]
5609+ ; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo , v[4:5], v[0:1]
56105610; GFX11-NEXT: s_cmp_eq_u64 s[2:3], 0
5611- ; GFX11-NEXT: s_cselect_b32 s1, 1, 0
5612- ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[2:3]
5613- ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
5614- ; GFX11-NEXT: v_cmp_lt_i64_e64 s0, v[6:7], v[2:3]
5615- ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
5616- ; GFX11-NEXT: s_and_b32 s0, 1, s1
56175611; GFX11-NEXT: v_cmp_lt_i64_e64 s1, s[2:3], 0
5612+ ; GFX11-NEXT: s_cselect_b32 s0, 1, 0
5613+ ; GFX11-NEXT: s_and_b32 s0, 1, s0
5614+ ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
5615+ ; GFX11-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[6:7], v[2:3]
5616+ ; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, 1, s1
56185617; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
5619- ; GFX11-NEXT: v_cndmask_b16 v0, v1, v0, vcc_lo
5620- ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s1
5621- ; GFX11-NEXT: v_cndmask_b16 v1, v2, 0, s0
5618+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
5619+ ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[2:3]
56225620; GFX11-NEXT: v_ashrrev_i32_e32 v2, 31, v7
5621+ ; GFX11-NEXT: v_dual_cndmask_b32 v0, v1, v0 :: v_dual_add_nc_u32 v3, 0x80000000, v2
5622+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, v8, 0, s0
56235623; GFX11-NEXT: v_xor_b32_e32 v0, v1, v0
5624- ; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x80000000, v2
56255624; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
56265625; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
56275626; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc_lo
@@ -5847,33 +5846,33 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
58475846; GFX11-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v1, v9, vcc_lo
58485847; GFX11-NEXT: v_add_co_ci_u32_e32 v16, vcc_lo, v2, v10, vcc_lo
58495848; GFX11-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, v3, v11, vcc_lo
5850- ; GFX11-NEXT: v_cmp_lt_u64_e64 s0, v[8:9], v[0:1]
5851- ; GFX11-NEXT: v_cmp_gt_i64_e64 s1, 0, v[10:11]
5849+ ; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[8:9], v[0:1]
5850+ ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
5851+ ; GFX11-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[16:17], v[2:3]
5852+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
58525853; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[16:17], v[2:3]
5853- ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
5854- ; GFX11-NEXT: v_cmp_lt_i64_e64 s0, v[16:17], v[2:3]
5855- ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s1
5856- ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
5857- ; GFX11-NEXT: v_cmp_eq_u64_e64 s0, 0, v[10:11]
5858- ; GFX11-NEXT: v_add_co_u32 v10, s1, v4, v12
5859- ; GFX11-NEXT: v_add_co_ci_u32_e64 v11, s1, v5, v13, s1
5860- ; GFX11-NEXT: v_add_co_ci_u32_e64 v12, s1, v6, v14, s1
5861- ; GFX11-NEXT: v_cndmask_b16 v0, v1, v0, vcc_lo
5862- ; GFX11-NEXT: v_cndmask_b16 v1, v2, 0, s0
5863- ; GFX11-NEXT: v_cmp_lt_u64_e64 s0, v[10:11], v[4:5]
5864- ; GFX11-NEXT: v_add_co_ci_u32_e64 v13, s1, v7, v15, s1
5865- ; GFX11-NEXT: v_cmp_gt_i64_e64 s1, 0, v[14:15]
5854+ ; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
5855+ ; GFX11-NEXT: v_cmp_gt_i64_e32 vcc_lo, 0, v[10:11]
5856+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
5857+ ; GFX11-NEXT: v_add_co_u32 v12, vcc_lo, v4, v12
5858+ ; GFX11-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, v5, v13, vcc_lo
5859+ ; GFX11-NEXT: v_add_co_ci_u32_e32 v18, vcc_lo, v6, v14, vcc_lo
5860+ ; GFX11-NEXT: v_add_co_ci_u32_e32 v19, vcc_lo, v7, v15, vcc_lo
5861+ ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, 0, v[10:11]
5862+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc_lo
5863+ ; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[12:13], v[4:5]
58665864; GFX11-NEXT: v_xor_b32_e32 v0, v1, v0
5867- ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
5868- ; GFX11-NEXT: v_cmp_lt_i64_e64 s0, v[12:13], v[6:7]
5869- ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[12:13], v[6:7]
5870- ; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, 1, s1
5871- ; GFX11-NEXT: v_ashrrev_i32_e32 v6, 31, v13
5872- ; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0
5873- ; GFX11-NEXT: v_cmp_eq_u64_e64 s0, 0, v[14:15]
5865+ ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
5866+ ; GFX11-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[18:19], v[6:7]
5867+ ; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
5868+ ; GFX11-NEXT: v_cmp_gt_i64_e32 vcc_lo, 0, v[14:15]
5869+ ; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
5870+ ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[18:19], v[6:7]
5871+ ; GFX11-NEXT: v_ashrrev_i32_e32 v6, 31, v19
5872+ ; GFX11-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc_lo
5873+ ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, 0, v[14:15]
58745874; GFX11-NEXT: v_add_nc_u32_e32 v7, 0x80000000, v6
5875- ; GFX11-NEXT: v_cndmask_b16 v1, v3, v2, vcc_lo
5876- ; GFX11-NEXT: v_cndmask_b16 v2, v4, 0, s0
5875+ ; GFX11-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc_lo
58775876; GFX11-NEXT: v_xor_b32_e32 v1, v2, v1
58785877; GFX11-NEXT: v_ashrrev_i32_e32 v2, 31, v17
58795878; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
@@ -5883,10 +5882,10 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
58835882; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, v3
58845883; GFX11-NEXT: v_cndmask_b32_e32 v1, v9, v2, vcc_lo
58855884; GFX11-NEXT: v_dual_cndmask_b32 v2, v16, v2 :: v_dual_cndmask_b32 v3, v17, v4
5886- ; GFX11-NEXT: v_cndmask_b32_e64 v4, v10 , v6, s0
5887- ; GFX11-NEXT: v_cndmask_b32_e64 v5, v11 , v6, s0
5888- ; GFX11-NEXT: v_cndmask_b32_e64 v6, v12 , v6, s0
5889- ; GFX11-NEXT: v_cndmask_b32_e64 v7, v13 , v7, s0
5885+ ; GFX11-NEXT: v_cndmask_b32_e64 v4, v12 , v6, s0
5886+ ; GFX11-NEXT: v_cndmask_b32_e64 v5, v13 , v6, s0
5887+ ; GFX11-NEXT: v_cndmask_b32_e64 v6, v18 , v6, s0
5888+ ; GFX11-NEXT: v_cndmask_b32_e64 v7, v19 , v7, s0
58905889; GFX11-NEXT: s_setpc_b64 s[30:31]
58915890 %result = call <2 x i128 > @llvm.sadd.sat.v2i128 (<2 x i128 > %lhs , <2 x i128 > %rhs )
58925891 ret <2 x i128 > %result
@@ -6244,16 +6243,16 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
62446243; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
62456244; GFX11-NEXT: s_and_b32 s0, 1, s18
62466245; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
6247- ; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
6248- ; GFX11-NEXT: s_cselect_b32 s1, 1, 0
62496246; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s2
6247+ ; GFX11-NEXT: s_cselect_b32 s1, 1, 0
6248+ ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
62506249; GFX11-NEXT: s_and_b32 s1, 1, s1
62516250; GFX11-NEXT: s_ashr_i32 s10, s17, 31
6252- ; GFX11-NEXT: v_cmp_ne_u32_e64 s1 , 0, s1
6251+ ; GFX11-NEXT: v_cmp_ne_u32_e64 s0 , 0, s1
62536252; GFX11-NEXT: s_add_i32 s11, s10, 0x80000000
6254- ; GFX11-NEXT: v_cndmask_b16 v0, v1, v0, s0
6253+ ; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
6254+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, 0, s0
62556255; GFX11-NEXT: s_add_u32 s0, s4, s12
6256- ; GFX11-NEXT: v_cndmask_b16 v1, v2, 0, s1
62576256; GFX11-NEXT: s_addc_u32 s1, s5, s13
62586257; GFX11-NEXT: s_addc_u32 s2, s6, s14
62596258; GFX11-NEXT: v_cmp_lt_u64_e64 s4, s[0:1], s[4:5]
@@ -6269,18 +6268,17 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
62696268; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
62706269; GFX11-NEXT: s_and_b32 s4, 1, s12
62716270; GFX11-NEXT: s_cmp_eq_u64 s[14:15], 0
6272- ; GFX11-NEXT: v_cmp_ne_u32_e64 s4, 0, s4
6273- ; GFX11-NEXT: s_cselect_b32 s5, 1, 0
62746271; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s6
6272+ ; GFX11-NEXT: s_cselect_b32 s5, 1, 0
6273+ ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4
62756274; GFX11-NEXT: s_and_b32 s5, 1, s5
6276- ; GFX11-NEXT: v_cmp_ne_u32_e64 s5, 0, s5
6277- ; GFX11-NEXT: v_cndmask_b16 v1, v2, v1, s4
6278- ; GFX11-NEXT: s_ashr_i32 s4, s3, 31
6279- ; GFX11-NEXT: s_add_i32 s0, s4, 0x80000000
6280- ; GFX11-NEXT: v_cndmask_b16 v2, v3, 0, s5
6275+ ; GFX11-NEXT: v_cmp_ne_u32_e64 s4, 0, s5
6276+ ; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo
6277+ ; GFX11-NEXT: v_cndmask_b32_e64 v2, v3, 0, s4
62816278; GFX11-NEXT: v_mov_b32_e32 v3, s8
62826279; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
62836280; GFX11-NEXT: v_mov_b32_e32 v0, s16
6281+ ; GFX11-NEXT: s_ashr_i32 s4, s3, 31
62846282; GFX11-NEXT: v_xor_b32_e32 v1, v2, v1
62856283; GFX11-NEXT: v_mov_b32_e32 v4, s9
62866284; GFX11-NEXT: v_mov_b32_e32 v2, s17
@@ -6289,6 +6287,7 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
62896287; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
62906288; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, s10, vcc_lo
62916289; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo
6290+ ; GFX11-NEXT: s_add_i32 s0, s4, 0x80000000
62926291; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
62936292; GFX11-NEXT: v_mov_b32_e32 v1, s2
62946293; GFX11-NEXT: v_readfirstlane_b32 s1, v4
0 commit comments