@@ -59,6 +59,57 @@ class SVEType<ValueType VT> {
5959 !eq(VT, nxv8f16): nxv2f16,
6060 !eq(VT, nxv8bf16): nxv2bf16,
6161 true : untyped);
62+
63+ // The 64-bit vector subreg of VT.
64+ ValueType DSub = !cond(
65+ !eq(VT, nxv16i8): v8i8,
66+ !eq(VT, nxv8i16): v4i16,
67+ !eq(VT, nxv4i32): v2i32,
68+ !eq(VT, nxv2i64): v1i64,
69+ !eq(VT, nxv2f16): v4f16,
70+ !eq(VT, nxv4f16): v4f16,
71+ !eq(VT, nxv8f16): v4f16,
72+ !eq(VT, nxv2f32): v2f32,
73+ !eq(VT, nxv4f32): v2f32,
74+ !eq(VT, nxv2f64): v1f64,
75+ !eq(VT, nxv2bf16): v4bf16,
76+ !eq(VT, nxv4bf16): v4bf16,
77+ !eq(VT, nxv8bf16): v4bf16,
78+ true : untyped);
79+
80+ // The 128-bit vector subreg of VT.
81+ ValueType ZSub = !cond(
82+ !eq(VT, nxv16i8): v16i8,
83+ !eq(VT, nxv8i16): v8i16,
84+ !eq(VT, nxv4i32): v4i32,
85+ !eq(VT, nxv2i64): v2i64,
86+ !eq(VT, nxv2f16): v8f16,
87+ !eq(VT, nxv4f16): v8f16,
88+ !eq(VT, nxv8f16): v8f16,
89+ !eq(VT, nxv2f32): v4f32,
90+ !eq(VT, nxv4f32): v4f32,
91+ !eq(VT, nxv2f64): v2f64,
92+ !eq(VT, nxv2bf16): v8bf16,
93+ !eq(VT, nxv4bf16): v8bf16,
94+ !eq(VT, nxv8bf16): v8bf16,
95+ true : untyped);
96+
97+ // The legal scalar used to hold a vector element.
98+ ValueType EltAsScalar = !cond(
99+ !eq(VT, nxv16i8): i32,
100+ !eq(VT, nxv8i16): i32,
101+ !eq(VT, nxv4i32): i32,
102+ !eq(VT, nxv2i64): i64,
103+ !eq(VT, nxv2f16): f16,
104+ !eq(VT, nxv4f16): f16,
105+ !eq(VT, nxv8f16): f16,
106+ !eq(VT, nxv2f32): f32,
107+ !eq(VT, nxv4f32): f32,
108+ !eq(VT, nxv2f64): f64,
109+ !eq(VT, nxv2bf16): bf16,
110+ !eq(VT, nxv4bf16): bf16,
111+ !eq(VT, nxv8bf16): bf16,
112+ true : untyped);
62113}
63114
64115def SDT_AArch64Setcc : SDTypeProfile<1, 4, [
@@ -1402,29 +1453,61 @@ multiclass sve_int_perm_dup_i<string asm> {
14021453 def : InstAlias<"mov $Zd, $Qn",
14031454 (!cast<Instruction>(NAME # _Q) ZPR128:$Zd, FPR128asZPR:$Qn, 0), 2>;
14041455
1405- // Duplicate extracted element of vector into all vector elements
1456+ // Duplicate an extracted vector element across a vector.
1457+
14061458 def : Pat<(nxv16i8 (splat_vector (i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)))),
14071459 (!cast<Instruction>(NAME # _B) ZPR:$vec, sve_elm_idx_extdup_b:$index)>;
1408- def : Pat<(nxv8i16 (splat_vector (i32 (vector_extract (nxv8i16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
1409- (!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
1410- def : Pat<(nxv4i32 (splat_vector (i32 (vector_extract (nxv4i32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
1411- (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
1412- def : Pat<(nxv2i64 (splat_vector (i64 (vector_extract (nxv2i64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
1413- (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
1414- def : Pat<(nxv8f16 (splat_vector (f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
1415- (!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
1416- def : Pat<(nxv8bf16 (splat_vector (bf16 (vector_extract (nxv8bf16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
1417- (!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
1418- def : Pat<(nxv4f16 (splat_vector (f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
1419- (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
1420- def : Pat<(nxv2f16 (splat_vector (f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
1421- (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
1422- def : Pat<(nxv4f32 (splat_vector (f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
1423- (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
1424- def : Pat<(nxv2f32 (splat_vector (f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
1425- (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
1426- def : Pat<(nxv2f64 (splat_vector (f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
1427- (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
1460+ def : Pat<(nxv16i8 (splat_vector (i32 (vector_extract (v16i8 V128:$vec), sve_elm_idx_extdup_b:$index)))),
1461+ (!cast<Instruction>(NAME # _B) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_b:$index)>;
1462+ def : Pat<(nxv16i8 (splat_vector (i32 (vector_extract (v8i8 V64:$vec), sve_elm_idx_extdup_b:$index)))),
1463+ (!cast<Instruction>(NAME # _B) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_b:$index)>;
1464+
1465+ foreach VT = [nxv8i16, nxv2f16, nxv4f16, nxv8f16, nxv2bf16, nxv4bf16, nxv8bf16] in {
1466+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.Packed ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
1467+ (!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
1468+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.ZSub V128:$vec), sve_elm_idx_extdup_h:$index)))),
1469+ (!cast<Instruction>(NAME # _H) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_h:$index)>;
1470+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.DSub V64:$vec), sve_elm_idx_extdup_h:$index)))),
1471+ (!cast<Instruction>(NAME # _H) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_h:$index)>;
1472+ }
1473+
1474+ foreach VT = [nxv4i32, nxv2f32, nxv4f32 ] in {
1475+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.Packed ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
1476+ (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
1477+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.ZSub V128:$vec), sve_elm_idx_extdup_s:$index)))),
1478+ (!cast<Instruction>(NAME # _S) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_s:$index)>;
1479+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.DSub V64:$vec), sve_elm_idx_extdup_s:$index)))),
1480+ (!cast<Instruction>(NAME # _S) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_s:$index)>;
1481+ }
1482+
1483+ foreach VT = [nxv2i64, nxv2f64] in {
1484+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (VT ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
1485+ (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
1486+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.ZSub V128:$vec), sve_elm_idx_extdup_d:$index)))),
1487+ (!cast<Instruction>(NAME # _D) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_d:$index)>;
1488+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.DSub V64:$vec), sve_elm_idx_extdup_d:$index)))),
1489+ (!cast<Instruction>(NAME # _D) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_d:$index)>;
1490+ }
1491+
1492+ // When extracting from an unpacked vector the index must be scaled to account
1493+ // for the "holes" in the underlying packed vector type. We get the scaling
1494+ // for free by "promoting" the element type to one whose underlying vector
1495+ // type is packed. This is only valid when extracting from a vector whose
1496+ // length is the same or bigger than the result of the splat.
1497+
1498+ foreach VT = [nxv4f16, nxv4bf16] in {
1499+ def : Pat<(SVEType<VT>.HalfLength (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (VT ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
1500+ (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
1501+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (VT ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
1502+ (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
1503+ }
1504+
1505+ foreach VT = [nxv2f16, nxv2f32, nxv2bf16] in {
1506+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (VT ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
1507+ (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
1508+ }
1509+
1510+ // Duplicate an indexed 128-bit segment across a vector.
14281511
14291512 def : Pat<(nxv16i8 (AArch64duplane128 nxv16i8:$Op1, i64:$imm)),
14301513 (!cast<Instruction>(NAME # _Q) $Op1, $imm)>;
0 commit comments