Skip to content

Commit dbcb1f8

Browse files
committed
address PR comment
1 parent 8cc493c commit dbcb1f8

26 files changed

+2464
-2495
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1249,25 +1249,18 @@ class VOPSelectPat_t16 <ValueType vt> : GCNPat <
12491249
(vt (select i1:$src0, vt:$src1, vt:$src2)),
12501250
(V_CNDMASK_B16_t16_e64 0, VSrcT_b16:$src2, 0, VSrcT_b16:$src1, SSrc_i1:$src0)
12511251
>;
1252-
class VOPSelectPat_fake16 <ValueType vt> : GCNPat <
1253-
(vt (select i1:$src0, vt:$src1, vt:$src2)),
1254-
(V_CNDMASK_B16_fake16_e64 0, VSrc_b16:$src2, 0, VSrc_b16:$src1, SSrc_i1:$src0)
1255-
>;
12561252

12571253
def : VOPSelectModsPat <i32>;
12581254
def : VOPSelectModsPat <f32>;
1259-
let True16Predicate = NotHasTrue16BitInsts in {
1255+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
1256+
let True16Predicate = p in {
12601257
def : VOPSelectPat <f16>;
12611258
def : VOPSelectPat <i16>;
1262-
} // End True16Predicate = NotHasTrue16BitInsts
1259+
} // End True16Predicate = p
12631260
let True16Predicate = UseRealTrue16Insts in {
12641261
def : VOPSelectPat_t16 <f16>;
12651262
def : VOPSelectPat_t16 <i16>;
12661263
} // End True16Predicate = UseRealTrue16Insts
1267-
let True16Predicate = UseFakeTrue16Insts in {
1268-
def : VOPSelectPat_fake16 <f16>;
1269-
def : VOPSelectPat_fake16 <i16>;
1270-
} // End True16Predicate = UseFakeTrue16Insts
12711264

12721265
let AddedComplexity = 1 in {
12731266
def : GCNPat <

llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll

Lines changed: 67 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -5294,15 +5294,15 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) {
52945294
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
52955295
; GFX11-NEXT: s_and_b32 s0, 1, s10
52965296
; GFX11-NEXT: s_cmp_eq_u64 s[6:7], 0
5297-
; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
5298-
; GFX11-NEXT: s_cselect_b32 s1, 1, 0
52995297
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s2
5298+
; GFX11-NEXT: s_cselect_b32 s1, 1, 0
5299+
; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
53005300
; GFX11-NEXT: s_and_b32 s1, 1, s1
5301-
; GFX11-NEXT: v_cmp_ne_u32_e64 s1, 0, s1
5302-
; GFX11-NEXT: v_cndmask_b16 v0, v1, v0, s0
5303-
; GFX11-NEXT: s_ashr_i32 s0, s9, 31
5304-
; GFX11-NEXT: v_cndmask_b16 v1, v2, 0, s1
5301+
; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s1
5302+
; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
5303+
; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, 0, s0
53055304
; GFX11-NEXT: v_mov_b32_e32 v2, s5
5305+
; GFX11-NEXT: s_ashr_i32 s0, s9, 31
53065306
; GFX11-NEXT: s_add_i32 s1, s0, 0x80000000
53075307
; GFX11-NEXT: v_xor_b32_e32 v0, v1, v0
53085308
; GFX11-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_and_b32 v0, 1, v0
@@ -5447,20 +5447,20 @@ define amdgpu_ps <4 x float> @saddsat_i128_sv(i128 inreg %lhs, i128 %rhs) {
54475447
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo
54485448
; GFX11-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, s2, v2, vcc_lo
54495449
; GFX11-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, s3, v3, vcc_lo
5450-
; GFX11-NEXT: v_cmp_gt_u64_e64 s0, s[0:1], v[0:1]
5451-
; GFX11-NEXT: v_cmp_gt_i64_e64 s1, 0, v[2:3]
5450+
; GFX11-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[0:1], v[0:1]
5451+
; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo
5452+
; GFX11-NEXT: v_cmp_gt_i64_e32 vcc_lo, s[2:3], v[4:5]
5453+
; GFX11-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo
5454+
; GFX11-NEXT: v_cmp_gt_i64_e32 vcc_lo, 0, v[2:3]
5455+
; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo
54525456
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[2:3], v[4:5]
5453-
; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0
5454-
; GFX11-NEXT: v_cmp_gt_i64_e64 s0, s[2:3], v[4:5]
5455-
; GFX11-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0
5456-
; GFX11-NEXT: v_cmp_eq_u64_e64 s0, 0, v[2:3]
5457-
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s1
5458-
; GFX11-NEXT: v_cndmask_b16 v3, v7, v6, vcc_lo
5459-
; GFX11-NEXT: v_cndmask_b16 v2, v2, 0, s0
5460-
; GFX11-NEXT: v_xor_b32_e32 v2, v2, v3
5457+
; GFX11-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc_lo
5458+
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, 0, v[2:3]
54615459
; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v5
5462-
; GFX11-NEXT: v_and_b32_e32 v2, 1, v2
5460+
; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc_lo
5461+
; GFX11-NEXT: v_xor_b32_e32 v2, v2, v6
54635462
; GFX11-NEXT: v_add_nc_u32_e32 v6, 0x80000000, v3
5463+
; GFX11-NEXT: v_and_b32_e32 v2, 1, v2
54645464
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2
54655465
; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
54665466
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
@@ -5606,22 +5606,21 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) {
56065606
; GFX11-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, s1, v1, vcc_lo
56075607
; GFX11-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, s2, v2, vcc_lo
56085608
; GFX11-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, s3, v3, vcc_lo
5609-
; GFX11-NEXT: v_cmp_lt_u64_e64 s0, v[4:5], v[0:1]
5609+
; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[4:5], v[0:1]
56105610
; GFX11-NEXT: s_cmp_eq_u64 s[2:3], 0
5611-
; GFX11-NEXT: s_cselect_b32 s1, 1, 0
5612-
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[2:3]
5613-
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
5614-
; GFX11-NEXT: v_cmp_lt_i64_e64 s0, v[6:7], v[2:3]
5615-
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
5616-
; GFX11-NEXT: s_and_b32 s0, 1, s1
56175611
; GFX11-NEXT: v_cmp_lt_i64_e64 s1, s[2:3], 0
5612+
; GFX11-NEXT: s_cselect_b32 s0, 1, 0
5613+
; GFX11-NEXT: s_and_b32 s0, 1, s0
5614+
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
5615+
; GFX11-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[6:7], v[2:3]
5616+
; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, 1, s1
56185617
; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
5619-
; GFX11-NEXT: v_cndmask_b16 v0, v1, v0, vcc_lo
5620-
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s1
5621-
; GFX11-NEXT: v_cndmask_b16 v1, v2, 0, s0
5618+
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
5619+
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[2:3]
56225620
; GFX11-NEXT: v_ashrrev_i32_e32 v2, 31, v7
5621+
; GFX11-NEXT: v_dual_cndmask_b32 v0, v1, v0 :: v_dual_add_nc_u32 v3, 0x80000000, v2
5622+
; GFX11-NEXT: v_cndmask_b32_e64 v1, v8, 0, s0
56235623
; GFX11-NEXT: v_xor_b32_e32 v0, v1, v0
5624-
; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x80000000, v2
56255624
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
56265625
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
56275626
; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc_lo
@@ -5847,33 +5846,33 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
58475846
; GFX11-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v1, v9, vcc_lo
58485847
; GFX11-NEXT: v_add_co_ci_u32_e32 v16, vcc_lo, v2, v10, vcc_lo
58495848
; GFX11-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, v3, v11, vcc_lo
5850-
; GFX11-NEXT: v_cmp_lt_u64_e64 s0, v[8:9], v[0:1]
5851-
; GFX11-NEXT: v_cmp_gt_i64_e64 s1, 0, v[10:11]
5849+
; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[8:9], v[0:1]
5850+
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
5851+
; GFX11-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[16:17], v[2:3]
5852+
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
58525853
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[16:17], v[2:3]
5853-
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
5854-
; GFX11-NEXT: v_cmp_lt_i64_e64 s0, v[16:17], v[2:3]
5855-
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s1
5856-
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
5857-
; GFX11-NEXT: v_cmp_eq_u64_e64 s0, 0, v[10:11]
5858-
; GFX11-NEXT: v_add_co_u32 v10, s1, v4, v12
5859-
; GFX11-NEXT: v_add_co_ci_u32_e64 v11, s1, v5, v13, s1
5860-
; GFX11-NEXT: v_add_co_ci_u32_e64 v12, s1, v6, v14, s1
5861-
; GFX11-NEXT: v_cndmask_b16 v0, v1, v0, vcc_lo
5862-
; GFX11-NEXT: v_cndmask_b16 v1, v2, 0, s0
5863-
; GFX11-NEXT: v_cmp_lt_u64_e64 s0, v[10:11], v[4:5]
5864-
; GFX11-NEXT: v_add_co_ci_u32_e64 v13, s1, v7, v15, s1
5865-
; GFX11-NEXT: v_cmp_gt_i64_e64 s1, 0, v[14:15]
5854+
; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
5855+
; GFX11-NEXT: v_cmp_gt_i64_e32 vcc_lo, 0, v[10:11]
5856+
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
5857+
; GFX11-NEXT: v_add_co_u32 v12, vcc_lo, v4, v12
5858+
; GFX11-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, v5, v13, vcc_lo
5859+
; GFX11-NEXT: v_add_co_ci_u32_e32 v18, vcc_lo, v6, v14, vcc_lo
5860+
; GFX11-NEXT: v_add_co_ci_u32_e32 v19, vcc_lo, v7, v15, vcc_lo
5861+
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, 0, v[10:11]
5862+
; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc_lo
5863+
; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[12:13], v[4:5]
58665864
; GFX11-NEXT: v_xor_b32_e32 v0, v1, v0
5867-
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
5868-
; GFX11-NEXT: v_cmp_lt_i64_e64 s0, v[12:13], v[6:7]
5869-
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[12:13], v[6:7]
5870-
; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, 1, s1
5871-
; GFX11-NEXT: v_ashrrev_i32_e32 v6, 31, v13
5872-
; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0
5873-
; GFX11-NEXT: v_cmp_eq_u64_e64 s0, 0, v[14:15]
5865+
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
5866+
; GFX11-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[18:19], v[6:7]
5867+
; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
5868+
; GFX11-NEXT: v_cmp_gt_i64_e32 vcc_lo, 0, v[14:15]
5869+
; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
5870+
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[18:19], v[6:7]
5871+
; GFX11-NEXT: v_ashrrev_i32_e32 v6, 31, v19
5872+
; GFX11-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc_lo
5873+
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, 0, v[14:15]
58745874
; GFX11-NEXT: v_add_nc_u32_e32 v7, 0x80000000, v6
5875-
; GFX11-NEXT: v_cndmask_b16 v1, v3, v2, vcc_lo
5876-
; GFX11-NEXT: v_cndmask_b16 v2, v4, 0, s0
5875+
; GFX11-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc_lo
58775876
; GFX11-NEXT: v_xor_b32_e32 v1, v2, v1
58785877
; GFX11-NEXT: v_ashrrev_i32_e32 v2, 31, v17
58795878
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
@@ -5883,10 +5882,10 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
58835882
; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, v3
58845883
; GFX11-NEXT: v_cndmask_b32_e32 v1, v9, v2, vcc_lo
58855884
; GFX11-NEXT: v_dual_cndmask_b32 v2, v16, v2 :: v_dual_cndmask_b32 v3, v17, v4
5886-
; GFX11-NEXT: v_cndmask_b32_e64 v4, v10, v6, s0
5887-
; GFX11-NEXT: v_cndmask_b32_e64 v5, v11, v6, s0
5888-
; GFX11-NEXT: v_cndmask_b32_e64 v6, v12, v6, s0
5889-
; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, v7, s0
5885+
; GFX11-NEXT: v_cndmask_b32_e64 v4, v12, v6, s0
5886+
; GFX11-NEXT: v_cndmask_b32_e64 v5, v13, v6, s0
5887+
; GFX11-NEXT: v_cndmask_b32_e64 v6, v18, v6, s0
5888+
; GFX11-NEXT: v_cndmask_b32_e64 v7, v19, v7, s0
58905889
; GFX11-NEXT: s_setpc_b64 s[30:31]
58915890
%result = call <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128> %lhs, <2 x i128> %rhs)
58925891
ret <2 x i128> %result
@@ -6244,16 +6243,16 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
62446243
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
62456244
; GFX11-NEXT: s_and_b32 s0, 1, s18
62466245
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
6247-
; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
6248-
; GFX11-NEXT: s_cselect_b32 s1, 1, 0
62496246
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s2
6247+
; GFX11-NEXT: s_cselect_b32 s1, 1, 0
6248+
; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
62506249
; GFX11-NEXT: s_and_b32 s1, 1, s1
62516250
; GFX11-NEXT: s_ashr_i32 s10, s17, 31
6252-
; GFX11-NEXT: v_cmp_ne_u32_e64 s1, 0, s1
6251+
; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s1
62536252
; GFX11-NEXT: s_add_i32 s11, s10, 0x80000000
6254-
; GFX11-NEXT: v_cndmask_b16 v0, v1, v0, s0
6253+
; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
6254+
; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, 0, s0
62556255
; GFX11-NEXT: s_add_u32 s0, s4, s12
6256-
; GFX11-NEXT: v_cndmask_b16 v1, v2, 0, s1
62576256
; GFX11-NEXT: s_addc_u32 s1, s5, s13
62586257
; GFX11-NEXT: s_addc_u32 s2, s6, s14
62596258
; GFX11-NEXT: v_cmp_lt_u64_e64 s4, s[0:1], s[4:5]
@@ -6269,18 +6268,17 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
62696268
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
62706269
; GFX11-NEXT: s_and_b32 s4, 1, s12
62716270
; GFX11-NEXT: s_cmp_eq_u64 s[14:15], 0
6272-
; GFX11-NEXT: v_cmp_ne_u32_e64 s4, 0, s4
6273-
; GFX11-NEXT: s_cselect_b32 s5, 1, 0
62746271
; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s6
6272+
; GFX11-NEXT: s_cselect_b32 s5, 1, 0
6273+
; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4
62756274
; GFX11-NEXT: s_and_b32 s5, 1, s5
6276-
; GFX11-NEXT: v_cmp_ne_u32_e64 s5, 0, s5
6277-
; GFX11-NEXT: v_cndmask_b16 v1, v2, v1, s4
6278-
; GFX11-NEXT: s_ashr_i32 s4, s3, 31
6279-
; GFX11-NEXT: s_add_i32 s0, s4, 0x80000000
6280-
; GFX11-NEXT: v_cndmask_b16 v2, v3, 0, s5
6275+
; GFX11-NEXT: v_cmp_ne_u32_e64 s4, 0, s5
6276+
; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo
6277+
; GFX11-NEXT: v_cndmask_b32_e64 v2, v3, 0, s4
62816278
; GFX11-NEXT: v_mov_b32_e32 v3, s8
62826279
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
62836280
; GFX11-NEXT: v_mov_b32_e32 v0, s16
6281+
; GFX11-NEXT: s_ashr_i32 s4, s3, 31
62846282
; GFX11-NEXT: v_xor_b32_e32 v1, v2, v1
62856283
; GFX11-NEXT: v_mov_b32_e32 v4, s9
62866284
; GFX11-NEXT: v_mov_b32_e32 v2, s17
@@ -6289,6 +6287,7 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
62896287
; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
62906288
; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, s10, vcc_lo
62916289
; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo
6290+
; GFX11-NEXT: s_add_i32 s0, s4, 0x80000000
62926291
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
62936292
; GFX11-NEXT: v_mov_b32_e32 v1, s2
62946293
; GFX11-NEXT: v_readfirstlane_b32 s1, v4

0 commit comments

Comments
 (0)