Skip to content

Commit 32ee01d

Browse files
[AArch64] Generate zeroing forms of certain SVE2.2 instructions (2/11)
SVE2.2 introduces instructions with predicated forms with zeroing of the inactive lanes. This allows in some cases to save a `movprfx` or a `mov` instruction when emitting code for `_x` or `_z` variants of intrinsics. This patch adds support for emitting the zeroing forms of certain `FCVT`, and `BFCVT` instructions.
1 parent 67a7378 commit 32ee01d

File tree

3 files changed

+365
-3
lines changed

3 files changed

+365
-3
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2419,6 +2419,10 @@ let Predicates = [HasSVEorSME] in {
24192419
(nxv2i64 (splat_vector (i64 0xFFFFFFFF)))), nxv2f64:$Zd)),
24202420
(UCVTF_ZPmZ_StoD_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
24212421

2422+
let Predicates = [HasSVEorSME, UseUnaryUndefPseudos] in {
2423+
defm FCVT_ZPmZ : sve_fp_2op_p_zd_pat<"int_aarch64_sve_fcvt">;
2424+
}
2425+
24222426
defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", AArch64frintn_mt>;
24232427
defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", AArch64frintp_mt>;
24242428
defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", AArch64frintm_mt>;
@@ -4254,7 +4258,7 @@ defm TBLQ_ZZZ : sve2p1_tblq<"tblq", int_aarch64_sve_tblq>;
42544258
//===----------------------------------------------------------------------===//
42554259
let Predicates = [HasSVE2p2orSME2p2] in {
42564260
// SVE Floating-point convert precision, zeroing predicate
4257-
defm FCVT_ZPzZ : sve_fp_z2op_p_zd_b_0<"fcvt">;
4261+
defm FCVT_ZPzZ : sve_fp_z2op_p_zd_b_0<"fcvt", "int_aarch64_sve_fcvt">;
42584262

42594263
// SVE2p2 floating-point convert precision down (placing odd), zeroing predicate
42604264
defm FCVTNT_ZPzZ : sve_fp_fcvtntz<"fcvtnt">;
@@ -4268,7 +4272,7 @@ let Predicates = [HasSVE2p2orSME2p2] in {
42684272
// SVE2p2 floating-point convert single-to-bf (placing odd), zeroing predicate
42694273
def BFCVTNT_ZPzZ : sve_fp_fcvt2z<0b1010, "bfcvtnt", ZPR16, ZPR32>;
42704274
// Placing corresponding
4271-
def BFCVT_ZPzZ_StoH : sve_fp_z2op_p_zd<0b1001010, "bfcvt", ZPR32, ZPR16>;
4275+
defm BFCVT_ZPzZ_StoH : sve_fp_z2op_p_zd_bfcvt<0b1001010, "bfcvt", int_aarch64_sve_fcvt_bf16f32_v2>;
42724276

42734277
// Floating-point convert to integer, zeroing predicate
42744278
defm FCVTZS_ZPzZ : sve_fp_z2op_p_zd_d<0b0, "fcvtzs">;

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,11 @@ multiclass SVE_3_Op_Undef_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1
576576
(inst $Op1, $Op2, $Op3)>;
577577
}
578578

579+
class SVE_3_Op_UndefZero_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
580+
ValueType vt2, ValueType vt3, Instruction inst>
581+
: Pat<(vtd (op (vt1 (SVEDup0Undef)), vt2:$Op1, vt3:$Op2)),
582+
(inst $Op1, $Op2)>;
583+
579584
class SVE_4_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
580585
ValueType vt2, ValueType vt3, ValueType vt4,
581586
Instruction inst>
@@ -3139,6 +3144,16 @@ multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm,
31393144
defm : SVE_1_Op_PassthruUndef_Round_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME # _UNDEF)>;
31403145
}
31413146

3147+
multiclass sve_fp_2op_p_zd_pat<string op> {
3148+
defm : SVE_3_Op_Undef_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f32), nxv8f16, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
3149+
defm : SVE_3_Op_Undef_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f64), nxv8f16, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoH)>;
3150+
defm : SVE_3_Op_Undef_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
3151+
3152+
defm : SVE_3_Op_Undef_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
3153+
defm : SVE_3_Op_Undef_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f16), nxv2f64, nxv2i1, nxv8f16, !cast<Instruction>(NAME # _HtoD)>;
3154+
defm : SVE_3_Op_Undef_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
3155+
}
3156+
31423157
multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op> {
31433158
def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>,
31443159
SVEPseudo2Instr<NAME # _H, 1>;
@@ -3273,6 +3288,12 @@ multiclass sve_fp_z2op_p_zd_frint<bits<2> opc, string asm> {
32733288
def _D : sve_fp_z2op_p_zd<{ 0b0010, opc{1}, 1, opc{0} }, asm, ZPR64, ZPR64>;
32743289
}
32753290

3291+
multiclass sve_fp_z2op_p_zd_bfcvt<bits<7> opc, string asm, SDPatternOperator op> {
3292+
def _StoH : sve_fp_z2op_p_zd<opc, asm, ZPR32, ZPR16>;
3293+
3294+
def : SVE_3_Op_UndefZero_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
3295+
}
3296+
32763297
multiclass sve_fp_z2op_p_zd_d<bit U, string asm> {
32773298
def _HtoH : sve_fp_z2op_p_zd<{ 0b011101, U }, asm, ZPR16, ZPR16>;
32783299
def _HtoS : sve_fp_z2op_p_zd<{ 0b011110, U }, asm, ZPR16, ZPR32>;
@@ -3299,13 +3320,20 @@ multiclass sve_fp_z2op_p_zd_d_flogb<string asm> {
32993320
def _D : sve_fp_z2op_p_zd<0b0011011, asm, ZPR64, ZPR64>;
33003321
}
33013322

3302-
multiclass sve_fp_z2op_p_zd_b_0<string asm> {
3323+
multiclass sve_fp_z2op_p_zd_b_0<string asm, string op> {
33033324
def _StoH : sve_fp_z2op_p_zd<0b1001000, asm, ZPR32, ZPR16>;
33043325
def _HtoS : sve_fp_z2op_p_zd<0b1001001, asm, ZPR16, ZPR32>;
33053326
def _DtoH : sve_fp_z2op_p_zd<0b1101000, asm, ZPR64, ZPR16>;
33063327
def _HtoD : sve_fp_z2op_p_zd<0b1101001, asm, ZPR16, ZPR64>;
33073328
def _DtoS : sve_fp_z2op_p_zd<0b1101010, asm, ZPR64, ZPR32>;
33083329
def _StoD : sve_fp_z2op_p_zd<0b1101011, asm, ZPR32, ZPR64>;
3330+
3331+
def : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f32), nxv8f16, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
3332+
def : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f64), nxv8f16, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoH)>;
3333+
def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
3334+
def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
3335+
def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f16), nxv2f64, nxv2i1, nxv8f16, !cast<Instruction>(NAME # _HtoD)>;
3336+
def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
33093337
}
33103338

33113339
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)