@@ -2201,6 +2201,8 @@ def : GCNPat <
22012201}
22022202
22032203foreach fp16vt = [f16, bf16] in {
2204+ foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
2205+ let True16Predicate = p in {
22042206def : GCNPat <
22052207 (fcopysign fp16vt:$src0, fp16vt:$src1),
22062208 (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1)
@@ -2231,6 +2233,42 @@ def : GCNPat <
22312233 (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00007fff)), $src0,
22322234 (V_LSHRREV_B32_e64 (i32 16), (EXTRACT_SUBREG $src1, sub1)))
22332235>;
2236+ }
2237+ let True16Predicate = UseRealTrue16Insts in {
2238+ def : GCNPat <
2239+ (fcopysign fp16vt:$src0, fp16vt:$src1),
2240+ (EXTRACT_SUBREG (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00007fff)),
2241+ (REG_SEQUENCE VGPR_32, $src0, lo16, (i16 (IMPLICIT_DEF)), hi16),
2242+ (REG_SEQUENCE VGPR_32, $src1, lo16, (i16 (IMPLICIT_DEF)), hi16)), lo16)
2243+ >;
2244+
2245+ def : GCNPat <
2246+ (fcopysign f32:$src0, fp16vt:$src1),
2247+ (V_BFI_B32_e64 (S_MOV_B32 (i32 0x7fffffff)), $src0,
2248+ (REG_SEQUENCE VGPR_32, (i16 (IMPLICIT_DEF)), lo16, $src1, hi16))
2249+ >;
2250+
2251+ def : GCNPat <
2252+ (fcopysign f64:$src0, fp16vt:$src1),
2253+ (REG_SEQUENCE VReg_64,
2254+ (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
2255+ (V_BFI_B32_e64 (S_MOV_B32 (i32 0x7fffffff)), (i32 (EXTRACT_SUBREG $src0, sub1)),
2256+ (REG_SEQUENCE VGPR_32, (i16 (IMPLICIT_DEF)), lo16, $src1, hi16)), sub1)
2257+ >;
2258+
2259+ def : GCNPat <
2260+ (fcopysign fp16vt:$src0, f32:$src1),
2261+ (EXTRACT_SUBREG (V_BFI_B32_e64 (S_MOV_B32 (i32 0x7fff0000)),
2262+ (REG_SEQUENCE VGPR_32, (i16 (IMPLICIT_DEF)), lo16, $src0, hi16), $src1), hi16)
2263+ >;
2264+
2265+ def : GCNPat <
2266+ (fcopysign fp16vt:$src0, f64:$src1),
2267+ (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00007fff)),
2268+ (REG_SEQUENCE VGPR_32, $src0, lo16, (i16 (IMPLICIT_DEF)), hi16),
2269+ (V_LSHRREV_B32_e64 (i32 16), (EXTRACT_SUBREG $src1, sub1)))
2270+ >;
2271+ }
22342272} // End foreach fp16vt = [f16, bf16]
22352273
22362274
@@ -3154,6 +3192,11 @@ def : GCNPat<
31543192 (i32 (zext (i16 (bitconvert fp16_zeros_high_16bits:$src)))),
31553193 (COPY VSrc_b16:$src)
31563194>;
3195+
3196+ def : GCNPat <
3197+ (i1 (DivergentUnaryFrag<trunc> i16:$a)),
3198+ (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), $a), (i32 1))
3199+ >;
31573200}
31583201
31593202let True16Predicate = UseRealTrue16Insts in {
@@ -3171,6 +3214,11 @@ def : GCNPat<
31713214 (i32 (zext (i16 (bitconvert fp16_zeros_high_16bits:$src)))),
31723215 (REG_SEQUENCE VGPR_32, VGPR_16:$src, lo16, (V_MOV_B16_t16_e64 0, (i16 0), 0), hi16)
31733216>;
3217+
3218+ def : GCNPat <
3219+ (i1 (DivergentUnaryFrag<trunc> i16:$a)),
3220+ (V_CMP_EQ_U16_t16_e64 (i32 0), (V_AND_B16_t16_e64 (i32 0), (i16 1), (i32 0), $a), (i32 0), (i16 1), (i32 0))
3221+ >;
31743222}
31753223
31763224def : GCNPat <
@@ -3199,11 +3247,6 @@ def : GCNPat <
31993247 (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), $a), (i32 1))
32003248>;
32013249
3202- def : GCNPat <
3203- (i1 (DivergentUnaryFrag<trunc> i16:$a)),
3204- (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), $a), (i32 1))
3205- >;
3206-
32073250def IMMBitSelConst : SDNodeXForm<imm, [{
32083251 return CurDAG->getTargetConstant(1ULL << N->getZExtValue(), SDLoc(N),
32093252 MVT::i32);
@@ -3807,7 +3850,8 @@ def : GCNPat <
38073850 (v2i16 (S_PACK_HL_B32_B16 SReg_32:$src0, SReg_32:$src1))
38083851>;
38093852
3810-
3853+ foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
3854+ let True16Predicate = p in {
38113855def : GCNPat <
38123856 (v2f16 (scalar_to_vector f16:$src0)),
38133857 (COPY $src0)
@@ -3827,6 +3871,29 @@ def : GCNPat <
38273871 (v4f16 (scalar_to_vector f16:$src0)),
38283872 (INSERT_SUBREG (IMPLICIT_DEF), $src0, sub0)
38293873>;
3874+ }
3875+
3876+ let True16Predicate = UseRealTrue16Insts in {
3877+ def : GCNPat <
3878+ (v2f16 (scalar_to_vector f16:$src0)),
3879+ (REG_SEQUENCE VGPR_32, $src0, lo16, (i16 (IMPLICIT_DEF)), hi16)
3880+ >;
3881+
3882+ def : GCNPat <
3883+ (v2i16 (scalar_to_vector i16:$src0)),
3884+ (REG_SEQUENCE VGPR_32, $src0, lo16, (i16 (IMPLICIT_DEF)), hi16)
3885+ >;
3886+
3887+ def : GCNPat <
3888+ (v4i16 (scalar_to_vector i16:$src0)),
3889+ (REG_SEQUENCE VGPR_32, $src0, lo16, (i16 (IMPLICIT_DEF)), hi16, (i32 (IMPLICIT_DEF)), sub1)
3890+ >;
3891+
3892+ def : GCNPat <
3893+ (v4f16 (scalar_to_vector f16:$src0)),
3894+ (REG_SEQUENCE VGPR_32, $src0, lo16, (i16 (IMPLICIT_DEF)), hi16, (i32 (IMPLICIT_DEF)), sub1)
3895+ >;
3896+ }
38303897
38313898def : GCNPat <
38323899 (i64 (int_amdgcn_mov_dpp i64:$src, timm:$dpp_ctrl, timm:$row_mask,
0 commit comments