Skip to content

Commit 2bae96d

Browse files
MDevereaullvmbot
authored andcommitted
[AArch64][SVE] Remove false register dependency for unary FP convert operations
Generate movprfx for floating point convert zeroing pseudo operations Differential Revision: https://reviews.llvm.org/D118617 (cherry picked from commit 6b73a4c)
1 parent 0f27bf2 commit 2bae96d

File tree

5 files changed

+470
-45
lines changed

5 files changed

+470
-45
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1679,60 +1679,61 @@ let Predicates = [HasSVEorStreamingSVE] in {
16791679
defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111110, "fcvtzs", ZPR64, ZPR64, null_frag, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
16801680
defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111111, "fcvtzu", ZPR64, ZPR64, null_frag, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
16811681

1682-
def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 PPR:$Pg), (nxv2f16 ZPR:$Zs), (nxv2f32 ZPR:$Zd))),
1683-
(FCVT_ZPmZ_HtoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
1682+
//These patterns exist to improve the code quality of conversions on unpacked types.
1683+
def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 (SVEAllActive):$Pg), (nxv2f16 ZPR:$Zs), (nxv2f32 ZPR:$Zd))),
1684+
(FCVT_ZPmZ_HtoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
16841685

16851686
// FP_ROUND has an additional 'precise' flag which indicates the type of rounding.
16861687
// This is ignored by the pattern below where it is matched by (i64 timm0_1)
1687-
def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 PPR:$Pg), (nxv2f32 ZPR:$Zs), (i64 timm0_1), (nxv2f16 ZPR:$Zd))),
1688-
(FCVT_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
1688+
def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 (SVEAllActive):$Pg), (nxv2f32 ZPR:$Zs), (i64 timm0_1), (nxv2f16 ZPR:$Zd))),
1689+
(FCVT_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
16891690

1690-
// Floating-point -> signed integer
1691-
def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
1691+
// Signed integer -> Floating-point
1692+
def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg),
16921693
(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (nxv2f16 ZPR:$Zd))),
1693-
(SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
1694+
(SCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
16941695

1695-
def : Pat<(nxv4f16 (AArch64scvtf_mt (nxv4i1 PPR:$Pg),
1696+
def : Pat<(nxv4f16 (AArch64scvtf_mt (nxv4i1 (SVEAllActive):$Pg),
16961697
(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (nxv4f16 ZPR:$Zd))),
1697-
(SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
1698+
(SCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
16981699

1699-
def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
1700+
def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg),
17001701
(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f16 ZPR:$Zd))),
1701-
(SCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
1702+
(SCVTF_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
17021703

1703-
def : Pat<(nxv2f32 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
1704+
def : Pat<(nxv2f32 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg),
17041705
(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f32 ZPR:$Zd))),
1705-
(SCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
1706+
(SCVTF_ZPmZ_StoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
17061707

1707-
def : Pat<(nxv2f64 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
1708+
def : Pat<(nxv2f64 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg),
17081709
(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f64 ZPR:$Zd))),
1709-
(SCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
1710+
(SCVTF_ZPmZ_StoD_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
17101711

1711-
// Floating-point -> unsigned integer
1712-
def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
1712+
// Unsigned integer -> Floating-point
1713+
def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg),
17131714
(and (nxv2i64 ZPR:$Zs),
17141715
(nxv2i64 (AArch64dup (i64 0xFFFF)))), (nxv2f16 ZPR:$Zd))),
1715-
(UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
1716+
(UCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
17161717

1717-
def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
1718+
def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg),
17181719
(and (nxv2i64 ZPR:$Zs),
17191720
(nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f16 ZPR:$Zd))),
1720-
(UCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
1721+
(UCVTF_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
17211722

1722-
def : Pat<(nxv4f16 (AArch64ucvtf_mt (nxv4i1 PPR:$Pg),
1723+
def : Pat<(nxv4f16 (AArch64ucvtf_mt (nxv4i1 (SVEAllActive):$Pg),
17231724
(and (nxv4i32 ZPR:$Zs),
17241725
(nxv4i32 (AArch64dup (i32 0xFFFF)))), (nxv4f16 ZPR:$Zd))),
1725-
(UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
1726+
(UCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
17261727

1727-
def : Pat<(nxv2f32 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
1728+
def : Pat<(nxv2f32 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg),
17281729
(and (nxv2i64 ZPR:$Zs),
17291730
(nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f32 ZPR:$Zd))),
1730-
(UCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
1731+
(UCVTF_ZPmZ_StoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
17311732

1732-
def : Pat<(nxv2f64 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
1733+
def : Pat<(nxv2f64 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg),
17331734
(and (nxv2i64 ZPR:$Zs),
17341735
(nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f64 ZPR:$Zd))),
1735-
(UCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
1736+
(UCVTF_ZPmZ_StoD_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
17361737

17371738
defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", AArch64frintn_mt>;
17381739
defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", AArch64frintp_mt>;

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,14 @@ class SVE_1_Op_Passthru_Round_Pat<ValueType vtd, SDPatternOperator op, ValueType
370370
: Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), vtd:$Op3)),
371371
(inst $Op3, $Op1, $Op2)>;
372372

373+
multiclass SVE_1_Op_PassthruUndef_Round_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
374+
ValueType vts, Instruction inst>{
375+
def : Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), (vtd undef))),
376+
(inst (IMPLICIT_DEF), $Op1, $Op2)>;
377+
def : Pat<(vtd (op (pg (SVEAllActive:$Op1)), vts:$Op2, (i64 timm0_1), vtd:$Op3)),
378+
(inst $Op3, $Op1, $Op2)>;
379+
}
380+
373381
class SVE_1_Op_Imm_OptLsl_Reverse_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty,
374382
ValueType it, ComplexPattern cpx, Instruction inst>
375383
: Pat<(vt (op (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))), (vt zprty:$Op1))),
@@ -2589,8 +2597,8 @@ multiclass sve_fp_2op_p_zd<bits<7> opc, string asm,
25892597
SDPatternOperator int_op,
25902598
SDPatternOperator ir_op, ValueType vt1,
25912599
ValueType vt2, ValueType vt3, ElementSizeEnum Sz> {
2592-
def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>;
2593-
2600+
def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>,
2601+
SVEPseudo2Instr<NAME, 1>;
25942602
// convert vt1 to a packed type for the intrinsic patterns
25952603
defvar packedvt1 = !cond(!eq(!cast<string>(vt1), "nxv2f16"): nxv8f16,
25962604
!eq(!cast<string>(vt1), "nxv4f16"): nxv8f16,
@@ -2604,8 +2612,11 @@ multiclass sve_fp_2op_p_zd<bits<7> opc, string asm,
26042612
1 : vt3);
26052613

26062614
def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, packedvt3, !cast<Instruction>(NAME)>;
2607-
26082615
def : SVE_1_Op_Passthru_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;
2616+
2617+
def _UNDEF : PredOneOpPassthruPseudo<NAME, !cast<ZPRRegOp>(i_zprtype)>;
2618+
2619+
defm : SVE_1_Op_PassthruUndef_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME # _UNDEF)>;
26092620
}
26102621

26112622
multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm,
@@ -2614,7 +2625,8 @@ multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm,
26142625
SDPatternOperator int_op,
26152626
SDPatternOperator ir_op, ValueType vt1,
26162627
ValueType vt2, ValueType vt3, ElementSizeEnum Sz> {
2617-
def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>;
2628+
def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>,
2629+
SVEPseudo2Instr<NAME, 1>;
26182630

26192631
// convert vt1 to a packed type for the intrinsic patterns
26202632
defvar packedvt1 = !cond(!eq(!cast<string>(vt1), "nxv2f16"): nxv8f16,
@@ -2623,8 +2635,11 @@ multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm,
26232635
1 : vt1);
26242636

26252637
def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, vt3, !cast<Instruction>(NAME)>;
2626-
26272638
def : SVE_1_Op_Passthru_Round_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;
2639+
2640+
def _UNDEF : PredOneOpPassthruPseudo<NAME, !cast<ZPRRegOp>(i_zprtype)>;
2641+
2642+
defm : SVE_1_Op_PassthruUndef_Round_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME # _UNDEF)>;
26282643
}
26292644

26302645
multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op> {

0 commit comments

Comments
 (0)