@@ -2473,6 +2473,7 @@ def : AMDGPUPat <
24732473 $src1), sub1)
24742474>;
24752475
2476+ let True16Predicate = NotHasTrue16BitInsts in {
24762477def : ROTRPattern <V_ALIGNBIT_B32_e64>;
24772478
24782479def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
@@ -2482,6 +2483,42 @@ def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
24822483def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
24832484 (V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
24842485 (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
2486+ } //end True16Predicate = NotHasTrue16BitInsts
2487+
2488+ let True16Predicate = UseFakeTrue16Insts in {
2489+ def ROTRPattern_fake16 : GCNPat <
2490+ (rotr i32:$src0, i32:$src1),
2491+ (V_ALIGNBIT_B32_fake16_e64 /* src0_modifiers */ 0, $src0,
2492+ /* src1_modifiers */ 0, $src0,
2493+ /* src2_modifiers */ 0,
2494+ $src1, /* clamp */ 0, /* op_sel */ 0)
2495+ >;
2496+
2497+ def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
2498+ (V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
2499+ (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
2500+ 0, /* src1_modifiers */
2501+ (i32 (EXTRACT_SUBREG (i64 $src0), sub0)),
2502+ 0, /* src2_modifiers */
2503+ $src1, /* clamp */ 0, /* op_sel */ 0)
2504+ >;
2505+
2506+ def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
2507+ (V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
2508+ (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
2509+ 0, /* src1_modifiers */
2510+ (i32 (EXTRACT_SUBREG (i64 $src0), sub0)),
2511+ 0, /* src2_modifiers */
2512+ $src1, /* clamp */ 0, /* op_sel */ 0)
2513+ >;
2514+
2515+ def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
2516+ (V_ALIGNBIT_B32_fake16_e64 /* src0_modifiers */ 0, $src0,
2517+ /* src1_modifiers */ 0, $src1,
2518+ /* src2_modifiers */ 0,
2519+ $src2, /* clamp */ 0, /* op_sel */ 0)
2520+ >;
2521+ } // end True16Predicate = UseFakeTrue16Insts
24852522
24862523/********** ====================== **********/
24872524/********** Indirect addressing **********/
@@ -2984,15 +3021,35 @@ def : GCNPat <
29843021 (i32 (EXTRACT_SUBREG $a, sub0))), (i32 1))
29853022>;
29863023
3024+ let True16Predicate = NotHasTrue16BitInsts in
29873025def : GCNPat <
29883026 (i32 (bswap i32:$a)),
29893027 (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
29903028 (V_ALIGNBIT_B32_e64 VSrc_b32:$a, VSrc_b32:$a, (i32 24)),
29913029 (V_ALIGNBIT_B32_e64 VSrc_b32:$a, VSrc_b32:$a, (i32 8)))
29923030>;
29933031
3032+ let True16Predicate = UseFakeTrue16Insts in
3033+ def : GCNPat <
3034+ (i32 (bswap i32:$a)),
3035+ (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3036+ (V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
3037+ VSrc_b32:$a,
3038+ 0, /* src1_modifiers */
3039+ VSrc_b32:$a,
3040+ 0, /* src2_modifiers */
3041+ (i32 24), /* clamp */ 0, /* op_sel */ 0),
3042+ (V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
3043+ VSrc_b32:$a,
3044+ 0, /* src1_modifiers */
3045+ VSrc_b32:$a,
3046+ 0, /* src2_modifiers */
3047+ (i32 8), /* clamp */ 0, /* op_sel */ 0))
3048+ >;
3049+
29943050// FIXME: This should have been narrowed to i32 during legalization.
29953051// This pattern should also be skipped for GlobalISel
3052+ let True16Predicate = NotHasTrue16BitInsts in
29963053def : GCNPat <
29973054 (i64 (bswap i64:$a)),
29983055 (REG_SEQUENCE VReg_64,
@@ -3014,6 +3071,40 @@ def : GCNPat <
30143071 sub1)
30153072>;
30163073
3074+ let True16Predicate = UseFakeTrue16Insts in
3075+ def : GCNPat <
3076+ (i64 (bswap i64:$a)),
3077+ (REG_SEQUENCE VReg_64,
3078+ (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3079+ (V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
3080+ (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3081+ 0, /* src1_modifiers */
3082+ (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3083+ 0, /* src2_modifiers */
3084+ (i32 24), /* clamp */ 0, /* op_sel */ 0),
3085+ (V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
3086+ (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3087+ 0, /* src1_modifiers */
3088+ (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3089+ 0, /* src2_modifiers */
3090+ (i32 8), /* clamp */ 0, /* op_sel */ 0)),
3091+ sub0,
3092+ (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3093+ (V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
3094+ (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3095+ 0, /* src1_modifiers */
3096+ (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3097+ 0, /* src2_modifiers */
3098+ (i32 24), /* clamp */ 0, /* op_sel */ 0),
3099+ (V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
3100+ (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3101+ 0, /* src1_modifiers */
3102+ (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3103+ 0, /* src2_modifiers */
3104+ (i32 8), /* clamp */ 0, /* op_sel */ 0)),
3105+ sub1)
3106+ >;
3107+
30173108// FIXME: The AddedComplexity should not be needed, but in GlobalISel
30183109// the BFI pattern ends up taking precedence without it.
30193110let SubtargetPredicate = isGFX8Plus, AddedComplexity = 1 in {
@@ -3379,6 +3470,7 @@ def : GCNPat <
33793470
33803471// Take the upper 16 bits from V[0] and the lower 16 bits from V[1]
33813472// Special case, can use V_ALIGNBIT (always uses encoded literal)
3473+ let True16Predicate = NotHasTrue16BitInsts in
33823474def : GCNPat <
33833475 (vecTy (DivergentBinFrag<build_vector>
33843476 (Ty !if(!eq(Ty, i16),
@@ -3388,6 +3480,16 @@ def : GCNPat <
33883480 (V_ALIGNBIT_B32_e64 VGPR_32:$b, VGPR_32:$a, (i32 16))
33893481>;
33903482
3483+ let True16Predicate = UseFakeTrue16Insts in
3484+ def : GCNPat <
3485+ (vecTy (DivergentBinFrag<build_vector>
3486+ (Ty !if(!eq(Ty, i16),
3487+ (Ty (trunc (srl VGPR_32:$a, (i32 16)))),
3488+ (Ty (bitconvert (i16 (trunc (srl VGPR_32:$a, (i32 16)))))))),
3489+ (Ty VGPR_32:$b))),
3490+ (V_ALIGNBIT_B32_fake16_e64 0, VGPR_32:$b, 0, VGPR_32:$a, 0, (i16 16), 0, 0)
3491+ >;
3492+
33913493// Take the upper 16 bits from each VGPR_32 and concat them
33923494def : GCNPat <
33933495 (vecTy (DivergentBinFrag<build_vector>
0 commit comments