@@ -2498,6 +2498,7 @@ def : AMDGPUPat <
24982498 $src1), sub1)
24992499>;
25002500
2501+ let True16Predicate = NotHasTrue16BitInsts in {
25012502def : ROTRPattern <V_ALIGNBIT_B32_e64>;
25022503
25032504def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
@@ -2507,6 +2508,42 @@ def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
25072508def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
25082509 (V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
25092510 (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
2511+ } // end True16Predicate = NotHasTrue16BitInsts
2512+
2513+ let True16Predicate = UseFakeTrue16Insts in {
2514+ def : GCNPat <
2515+ (rotr i32:$src0, i32:$src1),
2516+ (V_ALIGNBIT_B32_fake16_e64 /* src0_modifiers */ 0, $src0,
2517+ /* src1_modifiers */ 0, $src0,
2518+ /* src2_modifiers */ 0,
2519+ $src1, /* clamp */ 0, /* op_sel */ 0)
2520+ >;
2521+
2522+ def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
2523+ (V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
2524+ (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
2525+ 0, /* src1_modifiers */
2526+ (i32 (EXTRACT_SUBREG (i64 $src0), sub0)),
2527+ 0, /* src2_modifiers */
2528+ $src1, /* clamp */ 0, /* op_sel */ 0)
2529+ >;
2530+
2531+ def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
2532+ (V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
2533+ (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
2534+ 0, /* src1_modifiers */
2535+ (i32 (EXTRACT_SUBREG (i64 $src0), sub0)),
2536+ 0, /* src2_modifiers */
2537+ $src1, /* clamp */ 0, /* op_sel */ 0)
2538+ >;
2539+
2540+ def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
2541+ (V_ALIGNBIT_B32_fake16_e64 /* src0_modifiers */ 0, $src0,
2542+ /* src1_modifiers */ 0, $src1,
2543+ /* src2_modifiers */ 0,
2544+ $src2, /* clamp */ 0, /* op_sel */ 0)
2545+ >;
2546+ } // end True16Predicate = UseFakeTrue16Insts
25102547
25112548/********** ====================== **********/
25122549/********** Indirect addressing **********/
@@ -3014,35 +3051,69 @@ def : GCNPat <
30143051 (i32 (EXTRACT_SUBREG $a, sub0))), (i32 1))
30153052>;
30163053
3054+ let True16Predicate = NotHasTrue16BitInsts in
30173055def : GCNPat <
30183056 (i32 (bswap i32:$a)),
30193057 (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
30203058 (V_ALIGNBIT_B32_e64 VSrc_b32:$a, VSrc_b32:$a, (i32 24)),
30213059 (V_ALIGNBIT_B32_e64 VSrc_b32:$a, VSrc_b32:$a, (i32 8)))
30223060>;
30233061
3024- // FIXME: This should have been narrowed to i32 during legalization.
3025- // This pattern should also be skipped for GlobalISel
3062+ let True16Predicate = UseFakeTrue16Insts in
3063+ def : GCNPat <
3064+ (i32 (bswap i32:$a)),
3065+ (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3066+ (V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
3067+ VSrc_b32:$a,
3068+ 0, /* src1_modifiers */
3069+ VSrc_b32:$a,
3070+ 0, /* src2_modifiers */
3071+ (i32 24), /* clamp */ 0, /* op_sel */ 0),
3072+ (V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
3073+ VSrc_b32:$a,
3074+ 0, /* src1_modifiers */
3075+ VSrc_b32:$a,
3076+ 0, /* src2_modifiers */
3077+ (i32 8), /* clamp */ 0, /* op_sel */ 0))
3078+ >;
3079+
3080+ class AlignBit32Inst<dag op1, dag op2, dag op3, bit isTrue16> {
3081+ defvar inst = !if(isTrue16, V_ALIGNBIT_B32_fake16_e64, V_ALIGNBIT_B32_e64);
3082+ defvar NoMods = !if(isTrue16, (inst 0), (inst));
3083+ dag ret = !con(NoMods, (inst op1), NoMods, (inst op2),
3084+ NoMods, (inst op3), NoMods, NoMods);
3085+ }
3086+
3087+ multiclass bswapi64ExtPat<bit hasTrue16> {
30263088def : GCNPat <
30273089 (i64 (bswap i64:$a)),
30283090 (REG_SEQUENCE VReg_64,
30293091 (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3030- (V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3031- (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3032- (i32 24)) ,
3033- (V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3034- (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3035- (i32 8)) ),
3092+ AlignBit32Inst< (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3093+ (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3094+ (i32 24), hasTrue16>.ret ,
3095+ AlignBit32Inst< (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3096+ (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3097+ (i32 8), hasTrue16>.ret ),
30363098 sub0,
30373099 (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3038- (V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3039- (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3040- (i32 24)) ,
3041- (V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3042- (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3043- (i32 8)) ),
3100+ AlignBit32Inst< (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3101+ (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3102+ (i32 24), hasTrue16>.ret ,
3103+ AlignBit32Inst< (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3104+ (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3105+ (i32 8), hasTrue16>.ret ),
30443106 sub1)
30453107>;
3108+ }
3109+
3110+ // FIXME: This should have been narrowed to i32 during legalization.
3111+ // This pattern should also be skipped for GlobalISel
3112+ let True16Predicate = NotHasTrue16BitInsts in
3113+ defm : bswapi64ExtPat</*hasTrue16*/0>;
3114+
3115+ let True16Predicate = UseFakeTrue16Insts in
3116+ defm : bswapi64ExtPat</*hasTrue16*/1>;
30463117
30473118// FIXME: The AddedComplexity should not be needed, but in GlobalISel
30483119// the BFI pattern ends up taking precedence without it.
@@ -3455,8 +3526,7 @@ def : GCNPat <
34553526
34563527// Take the upper 16 bits from V[0] and the lower 16 bits from V[1]
34573528// Special case, can use V_ALIGNBIT (always uses encoded literal)
3458- foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
3459- let True16Predicate = p in {
3529+ let True16Predicate = NotHasTrue16BitInsts in
34603530def : GCNPat <
34613531 (vecTy (DivergentBinFrag<build_vector>
34623532 (Ty !if(!eq(Ty, i16),
@@ -3466,7 +3536,19 @@ def : GCNPat <
34663536 (V_ALIGNBIT_B32_e64 VGPR_32:$b, VGPR_32:$a, (i32 16))
34673537>;
34683538
3539+ let True16Predicate = UseFakeTrue16Insts in
3540+ def : GCNPat <
3541+ (vecTy (DivergentBinFrag<build_vector>
3542+ (Ty !if(!eq(Ty, i16),
3543+ (Ty (trunc (srl VGPR_32:$a, (i32 16)))),
3544+ (Ty (bitconvert (i16 (trunc (srl VGPR_32:$a, (i32 16)))))))),
3545+ (Ty VGPR_32:$b))),
3546+ (V_ALIGNBIT_B32_fake16_e64 0, VGPR_32:$b, 0, VGPR_32:$a, 0, (i16 16), 0, 0)
3547+ >;
3548+
34693549// Take the upper 16 bits from each VGPR_32 and concat them
3550+ foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
3551+ let True16Predicate = p in
34703552def : GCNPat <
34713553 (vecTy (DivergentBinFrag<build_vector>
34723554 (Ty !if(!eq(Ty, i16),
@@ -3477,7 +3559,6 @@ def : GCNPat <
34773559 (Ty (bitconvert (i16 (trunc (srl VGPR_32:$b, (i32 16)))))))))),
34783560 (V_PERM_B32_e64 VGPR_32:$b, VGPR_32:$a, (S_MOV_B32 (i32 0x07060302)))
34793561>;
3480- }
34813562
34823563} // end foreach Ty
34833564
0 commit comments