@@ -3315,6 +3315,10 @@ defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>;
33153315// Pre-fetch.
33163316defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">;
33173317
3318+ def vec_ins_or_scal_vec : PatFrags<(ops node:$src),
3319+ [(vector_insert undef, node:$src, (i64 0)),
3320+ (scalar_to_vector node:$src)]>;
3321+
33183322// For regular load, we do not have any alignment requirement.
33193323// Thus, it is safe to directly map the vector loads with interesting
33203324// addressing modes.
@@ -3323,13 +3327,13 @@ multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop,
33233327 ValueType ScalTy, ValueType VecTy,
33243328 Instruction LOADW, Instruction LOADX,
33253329 SubRegIndex sub> {
3326- def : Pat<(VecTy (scalar_to_vector (ScalTy
3330+ def : Pat<(VecTy (vec_ins_or_scal_vec (ScalTy
33273331 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))),
33283332 (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
33293333 (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset),
33303334 sub)>;
33313335
3332- def : Pat<(VecTy (scalar_to_vector (ScalTy
3336+ def : Pat<(VecTy (vec_ins_or_scal_vec (ScalTy
33333337 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))),
33343338 (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
33353339 (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset),
@@ -3357,12 +3361,12 @@ defm : ScalToVecROLoadPat<ro64, load, i64, v2i64, LDRDroW, LDRDroX, dsub>;
33573361defm : ScalToVecROLoadPat<ro64, load, f64, v2f64, LDRDroW, LDRDroX, dsub>;
33583362
33593363
3360- def : Pat <(v1i64 (scalar_to_vector (i64
3364+ def : Pat <(v1i64 (vec_ins_or_scal_vec (i64
33613365 (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
33623366 ro_Wextend64:$extend))))),
33633367 (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
33643368
3365- def : Pat <(v1i64 (scalar_to_vector (i64
3369+ def : Pat <(v1i64 (vec_ins_or_scal_vec (i64
33663370 (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
33673371 ro_Xextend64:$extend))))),
33683372 (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
@@ -3495,34 +3499,34 @@ def : Pat <(bf16 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
34953499// Thus, it is safe to directly map the vector loads with interesting
34963500// addressing modes.
34973501// FIXME: We could do the same for bitconvert to floating point vectors.
3498- def : Pat <(v8i8 (scalar_to_vector (i32
3502+ def : Pat <(v8i8 (vec_ins_or_scal_vec (i32
34993503 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
35003504 (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
35013505 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
3502- def : Pat <(v16i8 (scalar_to_vector (i32
3506+ def : Pat <(v16i8 (vec_ins_or_scal_vec (i32
35033507 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
35043508 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
35053509 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
3506- def : Pat <(v4i16 (scalar_to_vector (i32
3510+ def : Pat <(v4i16 (vec_ins_or_scal_vec (i32
35073511 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
35083512 (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
35093513 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
3510- def : Pat <(v8i16 (scalar_to_vector (i32
3514+ def : Pat <(v8i16 (vec_ins_or_scal_vec (i32
35113515 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
35123516 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
35133517 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
3514- def : Pat <(v2i32 (scalar_to_vector (i32
3518+ def : Pat <(v2i32 (vec_ins_or_scal_vec (i32
35153519 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
35163520 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
35173521 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
3518- def : Pat <(v4i32 (scalar_to_vector (i32
3522+ def : Pat <(v4i32 (vec_ins_or_scal_vec (i32
35193523 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
35203524 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
35213525 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
3522- def : Pat <(v1i64 (scalar_to_vector (i64
3526+ def : Pat <(v1i64 (vec_ins_or_scal_vec (i64
35233527 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
35243528 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3525- def : Pat <(v2i64 (scalar_to_vector (i64
3529+ def : Pat <(v2i64 (vec_ins_or_scal_vec (i64
35263530 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
35273531 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
35283532 (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>;
@@ -6848,61 +6852,60 @@ def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
68486852
68496853defm INS : SIMDIns;
68506854
6851- def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
6855+ def : Pat<(v16i8 (vec_ins_or_scal_vec GPR32:$Rn)),
68526856 (SUBREG_TO_REG (i32 0),
68536857 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6854- def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
6858+ def : Pat<(v8i8 (vec_ins_or_scal_vec GPR32:$Rn)),
68556859 (SUBREG_TO_REG (i32 0),
68566860 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
68576861
68586862// The top bits will be zero from the FMOVWSr
68596863def : Pat<(v8i8 (bitconvert (i64 (zext GPR32:$Rn)))),
68606864 (SUBREG_TO_REG (i32 0), (f32 (FMOVWSr GPR32:$Rn)), ssub)>;
68616865
6862- def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
6866+ def : Pat<(v8i16 (vec_ins_or_scal_vec GPR32:$Rn)),
68636867 (SUBREG_TO_REG (i32 0),
68646868 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6865- def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
6869+ def : Pat<(v4i16 (vec_ins_or_scal_vec GPR32:$Rn)),
68666870 (SUBREG_TO_REG (i32 0),
68676871 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
68686872
6869- def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
6873+ def : Pat<(v4f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),
68706874 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6871- def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
6875+ def : Pat<(v8f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),
68726876 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
68736877
6874- def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6878+ def : Pat<(v4bf16 (vec_ins_or_scal_vec (bf16 FPR16:$Rn))),
68756879 (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6876- def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6880+ def : Pat<(v8bf16 (vec_ins_or_scal_vec (bf16 FPR16:$Rn))),
68776881 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
68786882
6879- def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))),
6883+ def : Pat<(v2i32 (vec_ins_or_scal_vec (i32 FPR32:$Rn))),
68806884 (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
68816885 (i32 FPR32:$Rn), ssub))>;
6882- def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))),
6886+ def : Pat<(v4i32 (vec_ins_or_scal_vec (i32 FPR32:$Rn))),
68836887 (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
68846888 (i32 FPR32:$Rn), ssub))>;
6885-
6886- def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))),
6889+ def : Pat<(v2i64 (vec_ins_or_scal_vec (i64 FPR64:$Rn))),
68876890 (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
68886891 (i64 FPR64:$Rn), dsub))>;
68896892
6890- def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
6893+ def : Pat<(v4f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),
68916894 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6892- def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
6895+ def : Pat<(v8f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),
68936896 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
68946897
6895- def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6898+ def : Pat<(v4bf16 (vec_ins_or_scal_vec (bf16 FPR16:$Rn))),
68966899 (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6897- def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6900+ def : Pat<(v8bf16 (vec_ins_or_scal_vec (bf16 FPR16:$Rn))),
68986901 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
68996902
6900- def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
6903+ def : Pat<(v4f32 (vec_ins_or_scal_vec (f32 FPR32:$Rn))),
69016904 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
6902- def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
6905+ def : Pat<(v2f32 (vec_ins_or_scal_vec (f32 FPR32:$Rn))),
69036906 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
69046907
6905- def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))),
6908+ def : Pat<(v2f64 (vec_ins_or_scal_vec (f64 FPR64:$Rn))),
69066909 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>;
69076910
69086911def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
@@ -8507,7 +8510,7 @@ def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexH, v4i16, i32, LD1i8, VectorIndexH
85078510let Predicates = [HasNEON] in {
85088511 class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy,
85098512 SDPatternOperator ExtLoad, Instruction LD1>
8510- : Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))),
8513+ : Pat<(ResultTy (vec_ins_or_scal_vec (i32 (ExtLoad GPR64sp:$Rn)))),
85118514 (ResultTy (EXTRACT_SUBREG
85128515 (LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>;
85138516
@@ -8940,11 +8943,11 @@ def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
89408943def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
89418944def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))),
89428945 (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8943- def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)),
8946+ def : Pat<(v1i64 (vec_ins_or_scal_vec GPR64:$Xn)),
89448947 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8945- def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)),
8948+ def : Pat<(v1f64 (vec_ins_or_scal_vec GPR64:$Xn)),
89468949 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8947- def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>;
8950+ def : Pat<(v1f64 (vec_ins_or_scal_vec (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>;
89488951
89498952def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))),
89508953 (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>;
0 commit comments