@@ -791,6 +791,15 @@ def : GCNPat<
791791 (SI_CALL_ISEL $src0, (i64 0))
792792>;
793793
794+ // Handle fshr with uniform inputs to map to scalar instructions
795+ def : GCNPat<(UniformTernaryFrag<fshr> i32:$src0, i32:$src1, i32:$src2),
796+ (i32 (EXTRACT_SUBREG (S_LSHR_B64 (REG_SEQUENCE SReg_64, $src1, sub0, $src0, sub1), (S_AND_B32 $src2, (i32 31))), sub0))
797+ >;
798+
799+ def : GCNPat<(UniformTernaryFrag<fshr> i32:$src0, i32:$src1, (i32 ShiftAmt32Imm:$src2)),
800+ (i32 (EXTRACT_SUBREG (S_LSHR_B64 (REG_SEQUENCE SReg_64, $src1, sub0, $src0, sub1), $src2), sub0))
801+ >;
802+
794803// Wrapper around s_swappc_b64 with extra $callee parameter to track
795804// the called function after regalloc.
796805def SI_CALL : SPseudoInstSI <
@@ -2694,7 +2703,7 @@ def : GCNPat<pat,
26942703 $src1, /* clamp */ 0, /* op_sel */ 0)
26952704>;
26962705
2697- def : GCNPat<(DivergentTernaryFrag< fshr> i32:$src0, i32:$src1, i32:$src2),
2706+ def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
26982707 (V_ALIGNBIT_B32_opsel_e64 /* src0_modifiers */ 0, $src0,
26992708 /* src1_modifiers */ 0, $src1,
27002709 /* src2_modifiers */ 0,
@@ -2723,7 +2732,7 @@ def : GCNPat<(i32 (DivergentUnaryFrag<trunc> (srl i64:$src0, (i32 ShiftAmt32Imm:
27232732 (i16 (EXTRACT_SUBREG VGPR_32:$src1, lo16)),
27242733 /* clamp */ 0, /* op_sel */ 0)>;
27252734
2726- def : GCNPat<(DivergentTernaryFrag< fshr> i32:$src0, i32:$src1, i32:$src2),
2735+ def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
27272736 (V_ALIGNBIT_B32_t16_e64 /* src0_modifiers */ 0, $src0,
27282737 /* src1_modifiers */ 0, $src1,
27292738 /* src2_modifiers */ 0,
@@ -2759,7 +2768,7 @@ def : GCNPat<(i32 (DivergentUnaryFrag<trunc> (srl i64:$src0, (i32 ShiftAmt32Imm:
27592768 $src1, /* clamp */ 0, /* op_sel */ 0)
27602769>;
27612770
2762- def : GCNPat<(DivergentTernaryFrag< fshr> i32:$src0, i32:$src1, i32:$src2),
2771+ def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
27632772 (V_ALIGNBIT_B32_fake16_e64 /* src0_modifiers */ 0, $src0,
27642773 /* src1_modifiers */ 0, $src1,
27652774 /* src2_modifiers */ 0,
@@ -3854,15 +3863,8 @@ class PackB32Pat<Instruction inst> : GCNPat <
38543863 (inst $src0_mods, VGPR_32:$src0, $src1_mods, VGPR_32:$src1)
38553864>;
38563865}
3857- let SubtargetPredicate = isGFX9Plus in {
3858- def : GCNPat<(UniformTernaryFrag<fshr> i32:$src0, i32:$src1, i32:$src2),
3859- (i32 (EXTRACT_SUBREG (S_LSHR_B64 (REG_SEQUENCE SReg_64, $src1, sub0, $src0, sub1), (S_AND_B32 $src2, (i32 31))), sub0))
3860- >;
38613866
3862- def : GCNPat<(UniformTernaryFrag<fshr> i32:$src0, i32:$src1, (i32 ShiftAmt32Imm:$src2)),
3863- (i32 (EXTRACT_SUBREG (S_LSHR_B64 (REG_SEQUENCE SReg_64, $src1, sub0, $src0, sub1), $src2), sub0))
3864- >;
3865-
3867+ let SubtargetPredicate = isGFX9Plus in {
38663868let True16Predicate = NotHasTrue16BitInsts in
38673869 def : PackB32Pat<V_PACK_B32_F16_e64>;
38683870
0 commit comments