Skip to content

Commit 5fce5b4

Browse files
committed
[AArch64][SVE] Use INS when moving elements from bottom 128b of SVE type
Moving elements from a scalable vector to a fixed-lengh vector should use INS when we know that the extracted element is in the bottom 128-bits of the scalable vector. This avoids inserting unecessary UMOV/FMOV instructions.
1 parent 98c8d64 commit 5fce5b4

File tree

4 files changed

+578
-86
lines changed

4 files changed

+578
-86
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3367,6 +3367,47 @@ let Predicates = [HasSVEorSME] in {
33673367
(UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index)>;
33683368
} // End HasNEON
33693369

3370+
// Use INS (element) when moving an element from the bottom 128-bits of an SVE type to a NEON vector.
3371+
multiclass Neon_ins_sve_elt_pattern<ValueType NeonTy, ValueType NeonQTy, ValueType SVETy, ValueType ScalTy,
3372+
Operand IdxTy, Operand NarrowIdxTy, Instruction INS> {
3373+
// Insert into 128-bit NEON type from lowest 128-bits of SVE type
3374+
def : Pat<(NeonQTy (vector_insert V128:$src,
3375+
(ScalTy (vector_extract SVETy:$Rn, IdxTy:$idx_extract)),
3376+
(IdxTy:$idx_insert))),
3377+
(INS V128:$src, IdxTy:$idx_insert,
3378+
(NeonQTy (EXTRACT_SUBREG SVETy:$Rn, zsub)), IdxTy:$idx_extract)>;
3379+
3380+
// Insert into 64-bit NEON type from lowest 128-bits of SVE type
3381+
def : Pat<(NeonTy (vector_insert V64:$src,
3382+
(ScalTy (vector_extract SVETy:$Rn, IdxTy:$idx_extract)),
3383+
(NarrowIdxTy:$idx_insert))),
3384+
(EXTRACT_SUBREG
3385+
(INS
3386+
(INSERT_SUBREG (NeonQTy (IMPLICIT_DEF)), V64:$src, dsub), NarrowIdxTy:$idx_insert,
3387+
(NeonQTy (EXTRACT_SUBREG SVETy:$Rn, zsub)), IdxTy:$idx_extract),
3388+
dsub)>;
3389+
}
3390+
3391+
// Inserting into <1 x double/i64> will just create a new vector from the scalar value.
3392+
multiclass Neon_ins_64b_sve_elt_pattern<ValueType NeonTy, ValueType NeonQTy, ValueType SVETy,
3393+
ValueType ScalTy> {
3394+
// Insert into 128-bit NEON type from lowest 128-bits of SVE type
3395+
def : Pat<(NeonQTy (vector_insert V128:$src,
3396+
(ScalTy (vector_extract SVETy:$Rn, VectorIndexD:$idx_extract)),
3397+
(VectorIndexD:$idx_insert))),
3398+
(INSvi64lane
3399+
V128:$src, VectorIndexD:$idx_insert, (NeonQTy (EXTRACT_SUBREG SVETy:$Rn, zsub)),
3400+
VectorIndexD:$idx_extract)>;
3401+
3402+
// Insert into 64-bit NEON type from lowest 128-bits of SVE type
3403+
def : Pat<(NeonTy (scalar_to_vector
3404+
(ScalTy (vector_extract SVETy:$Rn, VectorIndexD:$idx_extract)))),
3405+
(EXTRACT_SUBREG
3406+
(INSvi64lane (IMPLICIT_DEF), 0, (NeonQTy (EXTRACT_SUBREG SVETy:$Rn, zsub)),
3407+
VectorIndexD:$idx_extract),
3408+
dsub)>;
3409+
}
3410+
33703411
let Predicates = [HasNEON] in {
33713412
def : Pat<(sext_inreg (vector_extract nxv16i8:$vec, VectorIndexB:$index), i8),
33723413
(SMOVvi8to32 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index)>;
@@ -3380,6 +3421,15 @@ let Predicates = [HasSVEorSME] in {
33803421

33813422
def : Pat<(sext (i32 (vector_extract nxv4i32:$vec, VectorIndexS:$index))),
33823423
(SMOVvi32to64 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index)>;
3424+
3425+
defm : Neon_ins_sve_elt_pattern<v8i8, v16i8, nxv16i8, i32, VectorIndexB, VectorIndexH, INSvi8lane>;
3426+
defm : Neon_ins_sve_elt_pattern<v4f16, v8f16, nxv8f16, f16, VectorIndexH, VectorIndexS, INSvi16lane>;
3427+
defm : Neon_ins_sve_elt_pattern<v4bf16, v8bf16, nxv8bf16, bf16, VectorIndexH, VectorIndexS, INSvi16lane>;
3428+
defm : Neon_ins_sve_elt_pattern<v4i16, v8i16, nxv8i16, i32, VectorIndexH, VectorIndexS, INSvi16lane>;
3429+
defm : Neon_ins_sve_elt_pattern<v2f32, v4f32, nxv4f32, f32, VectorIndexS, VectorIndexD, INSvi32lane>;
3430+
defm : Neon_ins_sve_elt_pattern<v2i32, v4i32, nxv4i32, i32, VectorIndexS, VectorIndexD, INSvi32lane>;
3431+
defm : Neon_ins_64b_sve_elt_pattern<v1f64, v2f64, nxv2f64, f64>;
3432+
defm : Neon_ins_64b_sve_elt_pattern<v1i64, v2i64, nxv2i64, i64>;
33833433
} // End HasNEON
33843434

33853435
// Extract first element from vector.

0 commit comments

Comments
 (0)