@@ -3590,7 +3590,7 @@ defm LDRW : LoadUI<0b10, 0, 0b01, GPR32z, uimm12s4, "ldr",
35903590 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
35913591let Predicates = [HasFPARMv8] in {
35923592defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr",
3593- [(set FPR8Op:$Rt,
3593+ [(set (i8 FPR8Op:$Rt) ,
35943594 (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>;
35953595defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr",
35963596 [(set (f16 FPR16Op:$Rt),
@@ -3778,7 +3778,7 @@ defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32z, "ldur",
37783778 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
37793779let Predicates = [HasFPARMv8] in {
37803780defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur",
3781- [(set FPR8Op:$Rt,
3781+ [(set (i8 FPR8Op:$Rt) ,
37823782 (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
37833783defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur",
37843784 [(set (f16 FPR16Op:$Rt),
@@ -4348,7 +4348,7 @@ defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str",
43484348 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
43494349let Predicates = [HasFPARMv8] in {
43504350defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str",
4351- [(store FPR8Op:$Rt,
4351+ [(store (i8 FPR8Op:$Rt) ,
43524352 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>;
43534353defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str",
43544354 [(store (f16 FPR16Op:$Rt),
@@ -4484,7 +4484,7 @@ defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32z, "stur",
44844484 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
44854485let Predicates = [HasFPARMv8] in {
44864486defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur",
4487- [(store FPR8Op:$Rt,
4487+ [(store (i8 FPR8Op:$Rt) ,
44884488 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
44894489defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur",
44904490 [(store (f16 FPR16Op:$Rt),
@@ -4604,6 +4604,12 @@ def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
46044604def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
46054605 (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
46064606
4607+ // aarch64mfp8 (bsub) stores
4608+ def : Pat<(store aarch64mfp8:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
4609+ (STURBi FPR8:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4610+ def : Pat<(store aarch64mfp8:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)),
4611+ (STRBui FPR8:$Rt, GPR64sp:$Rn, uimm12s1:$offset)>;
4612+
46074613// Match stores from lane 0 to the appropriate subreg's store.
46084614multiclass VecStoreULane0Pat<SDPatternOperator StoreOp,
46094615 ValueType VTy, ValueType STy,
@@ -7245,8 +7251,15 @@ def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane
72457251
72467252// Move elements between vectors
72477253multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, ValueType VTSVE,
7248- ValueType VTScal, Operand SVEIdxTy, Instruction INS> {
7254+ ValueType VTScal, Operand SVEIdxTy, Instruction INS, Instruction DUP, SubRegIndex DUPSub > {
72497255 // Extracting from the lowest 128-bits of an SVE vector
7256+ def : Pat<(VT128 (vector_insert undef,
7257+ (VTScal (vector_extract VTSVE:$Rm, (i64 SVEIdxTy:$Immn))),
7258+ (i64 0))),
7259+ (INSERT_SUBREG (VT128 (IMPLICIT_DEF)),
7260+ (DUP (VT128 (EXTRACT_SUBREG VTSVE:$Rm, zsub)), SVEIdxTy:$Immn),
7261+ DUPSub)>;
7262+
72507263 def : Pat<(VT128 (vector_insert VT128:$Rn,
72517264 (VTScal (vector_extract VTSVE:$Rm, (i64 SVEIdxTy:$Immn))),
72527265 (i64 imm:$Immd))),
@@ -7265,6 +7278,11 @@ multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, ValueType VTSVE
72657278 (i64 imm:$Immd))),
72667279 (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>;
72677280
7281+ def : Pat<(VT128 (vector_insert undef,
7282+ (VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
7283+ (i64 0))),
7284+ (INSERT_SUBREG (VT128 (IMPLICIT_DEF)), (DUP V128:$Rn, imm:$Immn), DUPSub)>;
7285+
72687286 def : Pat<(VT128 (vector_insert V128:$src,
72697287 (VTScal (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
72707288 (i64 imm:$Immd))),
@@ -7287,15 +7305,15 @@ multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, ValueType VTSVE
72877305 dsub)>;
72887306}
72897307
7290- defm : Neon_INS_elt_pattern<v8f16, v4f16, nxv8f16, f16, VectorIndexH, INSvi16lane>;
7291- defm : Neon_INS_elt_pattern<v8bf16, v4bf16, nxv8bf16, bf16, VectorIndexH, INSvi16lane>;
7292- defm : Neon_INS_elt_pattern<v4f32, v2f32, nxv4f32, f32, VectorIndexS, INSvi32lane>;
7293- defm : Neon_INS_elt_pattern<v2f64, v1f64, nxv2f64, f64, VectorIndexD, INSvi64lane>;
7308+ defm : Neon_INS_elt_pattern<v8f16, v4f16, nxv8f16, f16, VectorIndexH, INSvi16lane, DUPi16, hsub >;
7309+ defm : Neon_INS_elt_pattern<v8bf16, v4bf16, nxv8bf16, bf16, VectorIndexH, INSvi16lane, DUPi16, hsub >;
7310+ defm : Neon_INS_elt_pattern<v4f32, v2f32, nxv4f32, f32, VectorIndexS, INSvi32lane, DUPi32, ssub >;
7311+ defm : Neon_INS_elt_pattern<v2f64, v1f64, nxv2f64, f64, VectorIndexD, INSvi64lane, DUPi64, dsub >;
72947312
7295- defm : Neon_INS_elt_pattern<v16i8, v8i8, nxv16i8, i32, VectorIndexB, INSvi8lane>;
7296- defm : Neon_INS_elt_pattern<v8i16, v4i16, nxv8i16, i32, VectorIndexH, INSvi16lane>;
7297- defm : Neon_INS_elt_pattern<v4i32, v2i32, nxv4i32, i32, VectorIndexS, INSvi32lane>;
7298- defm : Neon_INS_elt_pattern<v2i64, v1i64, nxv2i64, i64, VectorIndexD, INSvi64lane>;
7313+ defm : Neon_INS_elt_pattern<v16i8, v8i8, nxv16i8, i32, VectorIndexB, INSvi8lane, DUPi8, bsub >;
7314+ defm : Neon_INS_elt_pattern<v8i16, v4i16, nxv8i16, i32, VectorIndexH, INSvi16lane, DUPi16, hsub >;
7315+ defm : Neon_INS_elt_pattern<v4i32, v2i32, nxv4i32, i32, VectorIndexS, INSvi32lane, DUPi32, ssub >;
7316+ defm : Neon_INS_elt_pattern<v2i64, v1i64, nxv2i64, i64, VectorIndexD, INSvi64lane, DUPi64, dsub >;
72997317
73007318// Insert from bitcast
73017319// vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0)
0 commit comments