@@ -7222,44 +7222,101 @@ def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane
72227222 V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2)
72237223 )>;
72247224
7225- multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
7226- ValueType VTScal, Instruction INS> {
7227- def : Pat<(VT128 (vector_insert V128:$src,
7228- (VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
7225+ // Insert an extracted vector element into a 128-bit Neon vector
7226+ multiclass Neon_INS_elt_pattern_v128<ValueType VT128, ValueType VT64, ValueType VTSVE,
7227+ ValueType VTScal, Operand ExIdxTy, Instruction INS> {
7228+ // Extracting from the lower 128-bits of an SVE vector
7229+ def : Pat<(VT128 (vector_insert VT128:$Rn,
7230+ (VTScal (vector_extract VTSVE:$Rm, (i64 ExIdxTy:$Immn))),
7231+ (i64 imm:$Immd))),
7232+ (INS VT128:$Rn, imm:$Immd, (VT128 (EXTRACT_SUBREG VTSVE:$Rm, zsub)), ExIdxTy:$Immn)>;
7233+
7234+ // Extracting from another Neon vector
7235+ def : Pat<(VT128 (vector_insert V128:$Rn,
7236+ (VTScal (vector_extract (VT128 V128:$Rm), (i64 imm:$Immn))),
72297237 (i64 imm:$Immd))),
7230- (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>;
7231-
7232- def : Pat<(VT128 (vector_insert V128:$src,
7233- (VTScal (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
7234- (i64 imm:$Immd))),
7235- (INS V128:$src, imm:$Immd,
7236- (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>;
7238+ (INS V128:$Rn, imm:$Immd, V128:$Rm, imm:$Immn)>;
7239+
7240+ def : Pat<(VT128 (vector_insert V128:$Rn,
7241+ (VTScal (vector_extract (VT64 V64:$Rm), (i64 imm:$Immn))),
7242+ (i64 imm:$Immd))),
7243+ (INS V128:$Rn, imm:$Immd,
7244+ (SUBREG_TO_REG (i64 0), V64:$Rm, dsub), imm:$Immn)>;
7245+ }
7246+
7247+ // Insert an extracted vector element into a 64-bit Neon vector
7248+ multiclass Neon_INS_elt_pattern_v64<ValueType VT128, ValueType VT64, ValueType VTSVE,
7249+ ValueType VTScal, Operand ExIdxTy, Instruction INS> {
7250+ // Extracting from the lower 128-bits of an SVE vector
7251+ def : Pat<(VT64 (vector_insert VT64:$Rn,
7252+ (VTScal (vector_extract VTSVE:$Rm, (i64 ExIdxTy:$Immn))),
7253+ (i64 imm:$Immd))),
7254+ (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), VT64:$Rn, dsub), imm:$Immd,
7255+ (VT128 (EXTRACT_SUBREG VTSVE:$Rm, zsub)), ExIdxTy:$Immn),
7256+ dsub)>;
72377257
7238- def : Pat<(VT64 (vector_insert V64:$src,
7239- (VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
7258+ // Extracting from another Neon vector
7259+ def : Pat<(VT64 (vector_insert V64:$Rn,
7260+ (VTScal (vector_extract (VT128 V128:$Rm), (i64 imm:$Immn))),
72407261 (i64 imm:$Immd))),
7241- (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src , dsub),
7242- imm:$Immd, V128:$Rn , imm:$Immn),
7262+ (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$Rn , dsub),
7263+ imm:$Immd, V128:$Rm , imm:$Immn),
72437264 dsub)>;
72447265
7245- def : Pat<(VT64 (vector_insert V64:$src ,
7246- (VTScal (vector_extract (VT64 V64:$Rn ), (i64 imm:$Immn))),
7266+ def : Pat<(VT64 (vector_insert V64:$Rn ,
7267+ (VTScal (vector_extract (VT64 V64:$Rm ), (i64 imm:$Immn))),
72477268 (i64 imm:$Immd))),
72487269 (EXTRACT_SUBREG
7249- (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd,
7250- (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn),
7270+ (INS (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immd,
7271+ (SUBREG_TO_REG (i64 0), V64:$Rm, dsub), imm:$Immn),
7272+ dsub)>;
7273+ }
7274+
7275+ // Special case for <1 x double/i64> - insertion may be vector_from_scalar or
7276+ // (vector_insert (vec) 0).
7277+ multiclass Neon_INS_elt_pattern_v64d<ValueType VT128, ValueType VT64, ValueType VTSVE,
7278+ ValueType VTScal> {
7279+ // Extracting from the lower 128-bits of an SVE vector
7280+ def : Pat<(VT64 (vec_ins_or_scal_vec
7281+ (VTScal (vector_extract VTSVE:$Rm, VectorIndexD:$Immn)))),
7282+ (EXTRACT_SUBREG
7283+ (INSvi64lane (IMPLICIT_DEF), 0, (VT128 (EXTRACT_SUBREG VTSVE:$Rm, zsub)),
7284+ VectorIndexD:$Immn),
72517285 dsub)>;
7286+
7287+ def : Pat<(VT64 (vec_ins_or_scal_vec
7288+ (VTScal (vector_extract (VT128 V128:$Rm), (i64 imm:$Immn))))),
7289+ (EXTRACT_SUBREG
7290+ (INSvi64lane (IMPLICIT_DEF), (i64 0), V128:$Rm, imm:$Immn),
7291+ dsub)>;
7292+
7293+ // Extracting from another NEON vector
7294+ def : Pat<(VT64 (vec_ins_or_scal_vec
7295+ (VTScal (vector_extract (VT64 V64:$Rm), (i64 imm:$Immn))))),
7296+ (EXTRACT_SUBREG
7297+ (INSvi64lane (IMPLICIT_DEF), (i64 0),
7298+ (SUBREG_TO_REG (i64 0), V64:$Rm, dsub), imm:$Immn),
7299+ dsub)>;
7300+ }
7301+
7302+ multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, ValueType SVESrcVT,
7303+ ValueType VTScal, Operand ExIdxTy, Instruction INS> {
7304+ defm : Neon_INS_elt_pattern_v64<VT128, VT64, SVESrcVT, VTScal, ExIdxTy, INS>;
7305+ defm : Neon_INS_elt_pattern_v128<VT128, VT64, SVESrcVT, VTScal, ExIdxTy, INS>;
72527306}
72537307
7254- defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>;
7255- defm : Neon_INS_elt_pattern<v8bf16, v4bf16, bf16, INSvi16lane>;
7256- defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
7257- defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
7308+ defm : Neon_INS_elt_pattern<v4f32, v2f32, nxv4f32, f32, VectorIndexS, INSvi32lane>;
7309+ defm : Neon_INS_elt_pattern<v8f16, v4f16, nxv8f16, f16, VectorIndexH, INSvi16lane>;
7310+ defm : Neon_INS_elt_pattern<v8bf16, v4bf16, nxv8bf16, bf16, VectorIndexH, INSvi16lane>;
7311+ defm : Neon_INS_elt_pattern<v4f32, v2f32, nxv4f32, f32, VectorIndexS, INSvi32lane>;
7312+ defm : Neon_INS_elt_pattern<v16i8, v8i8, nxv16i8, i32, VectorIndexB, INSvi8lane>;
7313+ defm : Neon_INS_elt_pattern<v8i16, v4i16, nxv8i16, i32, VectorIndexH, INSvi16lane>;
7314+ defm : Neon_INS_elt_pattern<v4i32, v2i32, nxv4i32, i32, VectorIndexS, INSvi32lane>;
72587315
7259- defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane >;
7260- defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane >;
7261- defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane >;
7262- defm : Neon_INS_elt_pattern <v2i64, v1i64, i64, INSvi64lane >;
7316+ defm : Neon_INS_elt_pattern_v128<v2f64, v1f64, nxv2f64, f64, VectorIndexD, INSvi64lane >;
7317+ defm : Neon_INS_elt_pattern_v64d<v2f64, v1f64, nxv2f64, f64 >;
7318+ defm : Neon_INS_elt_pattern_v128<v2i64, v1i64, nxv2i64, i64, VectorIndexD, INSvi64lane >;
7319+ defm : Neon_INS_elt_pattern_v64d <v2i64, v1i64, nxv2i64, i64 >;
72637320
72647321// Insert from bitcast
72657322// vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0)
0 commit comments