Skip to content

Commit 1290077

Browse files
authored
[AMDGPU][True16][CodeGen] update isel pattern with vgpr16 for 16 bit types (llvm#154875)
Update isel pattern with 16bit types to use vgp16 in true16 mode. This stop isel from generating illegal `vgpr32 = copy vpgr16` This includes fcopysign, scalar_to_vector and i1 trunc. Updated lit test and added a few mir tests. Stacking up these changes in one patch as I realized that doing these seperately could lead to unexpected failures in between.
1 parent 2ad7a1d commit 1290077

18 files changed

+1908
-1225
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 73 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2201,6 +2201,8 @@ def : GCNPat <
22012201
}
22022202

22032203
foreach fp16vt = [f16, bf16] in {
2204+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
2205+
let True16Predicate = p in {
22042206
def : GCNPat <
22052207
(fcopysign fp16vt:$src0, fp16vt:$src1),
22062208
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1)
@@ -2231,6 +2233,42 @@ def : GCNPat <
22312233
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x00007fff)), $src0,
22322234
(V_LSHRREV_B32_e64 (i32 16), (EXTRACT_SUBREG $src1, sub1)))
22332235
>;
2236+
}
2237+
let True16Predicate = UseRealTrue16Insts in {
2238+
def : GCNPat <
2239+
(fcopysign fp16vt:$src0, fp16vt:$src1),
2240+
(EXTRACT_SUBREG (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00007fff)),
2241+
(REG_SEQUENCE VGPR_32, $src0, lo16, (i16 (IMPLICIT_DEF)), hi16),
2242+
(REG_SEQUENCE VGPR_32, $src1, lo16, (i16 (IMPLICIT_DEF)), hi16)), lo16)
2243+
>;
2244+
2245+
def : GCNPat <
2246+
(fcopysign f32:$src0, fp16vt:$src1),
2247+
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x7fffffff)), $src0,
2248+
(REG_SEQUENCE VGPR_32, (i16 (IMPLICIT_DEF)), lo16, $src1, hi16))
2249+
>;
2250+
2251+
def : GCNPat <
2252+
(fcopysign f64:$src0, fp16vt:$src1),
2253+
(REG_SEQUENCE VReg_64,
2254+
(i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
2255+
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x7fffffff)), (i32 (EXTRACT_SUBREG $src0, sub1)),
2256+
(REG_SEQUENCE VGPR_32, (i16 (IMPLICIT_DEF)), lo16, $src1, hi16)), sub1)
2257+
>;
2258+
2259+
def : GCNPat <
2260+
(fcopysign fp16vt:$src0, f32:$src1),
2261+
(EXTRACT_SUBREG (V_BFI_B32_e64 (S_MOV_B32 (i32 0x7fff0000)),
2262+
(REG_SEQUENCE VGPR_32, (i16 (IMPLICIT_DEF)), lo16, $src0, hi16), $src1), hi16)
2263+
>;
2264+
2265+
def : GCNPat <
2266+
(fcopysign fp16vt:$src0, f64:$src1),
2267+
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x00007fff)),
2268+
(REG_SEQUENCE VGPR_32, $src0, lo16, (i16 (IMPLICIT_DEF)), hi16),
2269+
(V_LSHRREV_B32_e64 (i32 16), (EXTRACT_SUBREG $src1, sub1)))
2270+
>;
2271+
}
22342272
} // End foreach fp16vt = [f16, bf16]
22352273

22362274

@@ -3154,6 +3192,11 @@ def : GCNPat<
31543192
(i32 (zext (i16 (bitconvert fp16_zeros_high_16bits:$src)))),
31553193
(COPY VSrc_b16:$src)
31563194
>;
3195+
3196+
def : GCNPat <
3197+
(i1 (DivergentUnaryFrag<trunc> i16:$a)),
3198+
(V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), $a), (i32 1))
3199+
>;
31573200
}
31583201

31593202
let True16Predicate = UseRealTrue16Insts in {
@@ -3171,6 +3214,11 @@ def : GCNPat<
31713214
(i32 (zext (i16 (bitconvert fp16_zeros_high_16bits:$src)))),
31723215
(REG_SEQUENCE VGPR_32, VGPR_16:$src, lo16, (V_MOV_B16_t16_e64 0, (i16 0), 0), hi16)
31733216
>;
3217+
3218+
def : GCNPat <
3219+
(i1 (DivergentUnaryFrag<trunc> i16:$a)),
3220+
(V_CMP_EQ_U16_t16_e64 (i32 0), (V_AND_B16_t16_e64 (i32 0), (i16 1), (i32 0), $a), (i32 0), (i16 1), (i32 0))
3221+
>;
31743222
}
31753223

31763224
def : GCNPat <
@@ -3199,11 +3247,6 @@ def : GCNPat <
31993247
(V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), $a), (i32 1))
32003248
>;
32013249

3202-
def : GCNPat <
3203-
(i1 (DivergentUnaryFrag<trunc> i16:$a)),
3204-
(V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), $a), (i32 1))
3205-
>;
3206-
32073250
def IMMBitSelConst : SDNodeXForm<imm, [{
32083251
return CurDAG->getTargetConstant(1ULL << N->getZExtValue(), SDLoc(N),
32093252
MVT::i32);
@@ -3807,7 +3850,8 @@ def : GCNPat <
38073850
(v2i16 (S_PACK_HL_B32_B16 SReg_32:$src0, SReg_32:$src1))
38083851
>;
38093852

3810-
3853+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
3854+
let True16Predicate = p in {
38113855
def : GCNPat <
38123856
(v2f16 (scalar_to_vector f16:$src0)),
38133857
(COPY $src0)
@@ -3827,6 +3871,29 @@ def : GCNPat <
38273871
(v4f16 (scalar_to_vector f16:$src0)),
38283872
(INSERT_SUBREG (IMPLICIT_DEF), $src0, sub0)
38293873
>;
3874+
}
3875+
3876+
let True16Predicate = UseRealTrue16Insts in {
3877+
def : GCNPat <
3878+
(v2f16 (scalar_to_vector f16:$src0)),
3879+
(REG_SEQUENCE VGPR_32, $src0, lo16, (i16 (IMPLICIT_DEF)), hi16)
3880+
>;
3881+
3882+
def : GCNPat <
3883+
(v2i16 (scalar_to_vector i16:$src0)),
3884+
(REG_SEQUENCE VGPR_32, $src0, lo16, (i16 (IMPLICIT_DEF)), hi16)
3885+
>;
3886+
3887+
def : GCNPat <
3888+
(v4i16 (scalar_to_vector i16:$src0)),
3889+
(REG_SEQUENCE VGPR_32, $src0, lo16, (i16 (IMPLICIT_DEF)), hi16, (i32 (IMPLICIT_DEF)), sub1)
3890+
>;
3891+
3892+
def : GCNPat <
3893+
(v4f16 (scalar_to_vector f16:$src0)),
3894+
(REG_SEQUENCE VGPR_32, $src0, lo16, (i16 (IMPLICIT_DEF)), hi16, (i32 (IMPLICIT_DEF)), sub1)
3895+
>;
3896+
}
38303897

38313898
def : GCNPat <
38323899
(i64 (int_amdgcn_mov_dpp i64:$src, timm:$dpp_ctrl, timm:$row_mask,

0 commit comments

Comments
 (0)