Skip to content

Commit 51a2974

Browse files
[LLVM][SVE] Extend dup(extract_elt(v,i)) isel patterns to cover all combinations.
Adds missing bfloat patterns for unpacked scalable vectors. Adds patterns for splatting extracts from fixed length vectors.
1 parent fc0443c commit 51a2974

File tree

2 files changed

+157
-95
lines changed

2 files changed

+157
-95
lines changed

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 109 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,57 @@ class SVEType<ValueType VT> {
5959
!eq(VT, nxv8f16): nxv2f16,
6060
!eq(VT, nxv8bf16): nxv2bf16,
6161
true : untyped);
62+
63+
// The 64-bit vector subreg of VT.
64+
ValueType DSub = !cond(
65+
!eq(VT, nxv16i8): v8i8,
66+
!eq(VT, nxv8i16): v4i16,
67+
!eq(VT, nxv4i32): v2i32,
68+
!eq(VT, nxv2i64): v1i64,
69+
!eq(VT, nxv2f16): v4f16,
70+
!eq(VT, nxv4f16): v4f16,
71+
!eq(VT, nxv8f16): v4f16,
72+
!eq(VT, nxv2f32): v2f32,
73+
!eq(VT, nxv4f32): v2f32,
74+
!eq(VT, nxv2f64): v1f64,
75+
!eq(VT, nxv2bf16): v4bf16,
76+
!eq(VT, nxv4bf16): v4bf16,
77+
!eq(VT, nxv8bf16): v4bf16,
78+
true : untyped);
79+
80+
// The 128-bit vector subreg of VT.
81+
ValueType ZSub = !cond(
82+
!eq(VT, nxv16i8): v16i8,
83+
!eq(VT, nxv8i16): v8i16,
84+
!eq(VT, nxv4i32): v4i32,
85+
!eq(VT, nxv2i64): v2i64,
86+
!eq(VT, nxv2f16): v8f16,
87+
!eq(VT, nxv4f16): v8f16,
88+
!eq(VT, nxv8f16): v8f16,
89+
!eq(VT, nxv2f32): v4f32,
90+
!eq(VT, nxv4f32): v4f32,
91+
!eq(VT, nxv2f64): v2f64,
92+
!eq(VT, nxv2bf16): v8bf16,
93+
!eq(VT, nxv4bf16): v8bf16,
94+
!eq(VT, nxv8bf16): v8bf16,
95+
true : untyped);
96+
97+
// The legal scalar used to hold a vector element.
98+
ValueType EltAsScalar = !cond(
99+
!eq(VT, nxv16i8): i32,
100+
!eq(VT, nxv8i16): i32,
101+
!eq(VT, nxv4i32): i32,
102+
!eq(VT, nxv2i64): i64,
103+
!eq(VT, nxv2f16): f16,
104+
!eq(VT, nxv4f16): f16,
105+
!eq(VT, nxv8f16): f16,
106+
!eq(VT, nxv2f32): f32,
107+
!eq(VT, nxv4f32): f32,
108+
!eq(VT, nxv2f64): f64,
109+
!eq(VT, nxv2bf16): bf16,
110+
!eq(VT, nxv4bf16): bf16,
111+
!eq(VT, nxv8bf16): bf16,
112+
true : untyped);
62113
}
63114

64115
def SDT_AArch64Setcc : SDTypeProfile<1, 4, [
@@ -1402,29 +1453,67 @@ multiclass sve_int_perm_dup_i<string asm> {
14021453
def : InstAlias<"mov $Zd, $Qn",
14031454
(!cast<Instruction>(NAME # _Q) ZPR128:$Zd, FPR128asZPR:$Qn, 0), 2>;
14041455

1405-
// Duplicate extracted element of vector into all vector elements
1456+
// Duplicate an extracted vector element across a vector.
1457+
14061458
def : Pat<(nxv16i8 (splat_vector (i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)))),
14071459
(!cast<Instruction>(NAME # _B) ZPR:$vec, sve_elm_idx_extdup_b:$index)>;
1408-
def : Pat<(nxv8i16 (splat_vector (i32 (vector_extract (nxv8i16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
1409-
(!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
1410-
def : Pat<(nxv4i32 (splat_vector (i32 (vector_extract (nxv4i32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
1411-
(!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
1412-
def : Pat<(nxv2i64 (splat_vector (i64 (vector_extract (nxv2i64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
1413-
(!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
1414-
def : Pat<(nxv8f16 (splat_vector (f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
1415-
(!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
1416-
def : Pat<(nxv8bf16 (splat_vector (bf16 (vector_extract (nxv8bf16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
1417-
(!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
1418-
def : Pat<(nxv4f16 (splat_vector (f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
1419-
(!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
1420-
def : Pat<(nxv2f16 (splat_vector (f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
1421-
(!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
1422-
def : Pat<(nxv4f32 (splat_vector (f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
1423-
(!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
1424-
def : Pat<(nxv2f32 (splat_vector (f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
1425-
(!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
1426-
def : Pat<(nxv2f64 (splat_vector (f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
1460+
def : Pat<(nxv16i8 (splat_vector (i32 (vector_extract (v16i8 V128:$vec), sve_elm_idx_extdup_b:$index)))),
1461+
(!cast<Instruction>(NAME # _B) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_b:$index)>;
1462+
def : Pat<(nxv16i8 (splat_vector (i32 (vector_extract (v8i8 V64:$vec), sve_elm_idx_extdup_b:$index)))),
1463+
(!cast<Instruction>(NAME # _B) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_b:$index)>;
1464+
1465+
foreach VT = [nxv8i16, nxv2f16, nxv4f16, nxv8f16, nxv2bf16, nxv4bf16, nxv8bf16] in {
1466+
def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.Packed ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
1467+
(!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
1468+
def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.ZSub V128:$vec), sve_elm_idx_extdup_h:$index)))),
1469+
(!cast<Instruction>(NAME # _H) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_h:$index)>;
1470+
def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.DSub V64:$vec), sve_elm_idx_extdup_h:$index)))),
1471+
(!cast<Instruction>(NAME # _H) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_h:$index)>;
1472+
}
1473+
1474+
foreach VT = [nxv4i32, nxv2f32, nxv4f32 ] in {
1475+
def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.Packed ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
1476+
(!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
1477+
def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.ZSub V128:$vec), sve_elm_idx_extdup_s:$index)))),
1478+
(!cast<Instruction>(NAME # _S) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_s:$index)>;
1479+
def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.DSub V64:$vec), sve_elm_idx_extdup_s:$index)))),
1480+
(!cast<Instruction>(NAME # _S) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_s:$index)>;
1481+
}
1482+
1483+
foreach VT = [nxv2i64, nxv2f64] in {
1484+
def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (VT ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
1485+
(!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
1486+
def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.ZSub V128:$vec), sve_elm_idx_extdup_d:$index)))),
1487+
(!cast<Instruction>(NAME # _D) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_d:$index)>;
1488+
def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.DSub V64:$vec), sve_elm_idx_extdup_d:$index)))),
1489+
(!cast<Instruction>(NAME # _D) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_d:$index)>;
1490+
}
1491+
1492+
// When extracting from an unpacked vector the index must be scaled to account
1493+
// for the "holes" in the underlying packed vector type. We get the scaling
1494+
// for free by "promoting" the element type to one whose underlying vector type
1495+
// is packed.
1496+
1497+
foreach VT = [nxv2f16, nxv4f16, nxv8f16] in {
1498+
def : Pat<(VT (splat_vector (f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
1499+
(!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
1500+
def : Pat<(VT (splat_vector (f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
1501+
(!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
1502+
}
1503+
1504+
foreach VT = [nxv2bf16, nxv4bf16, nxv8bf16] in {
1505+
def : Pat<(VT (splat_vector (bf16 (vector_extract (nxv4bf16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
1506+
(!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
1507+
def : Pat<(VT (splat_vector (bf16 (vector_extract (nxv2bf16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
1508+
(!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
1509+
}
1510+
1511+
foreach VT = [nxv2f32, nxv4f32] in {
1512+
def : Pat<(VT (splat_vector (f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
14271513
(!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
1514+
}
1515+
1516+
// Duplicate an indexed 128-bit segment across a vector.
14281517

14291518
def : Pat<(nxv16i8 (AArch64duplane128 nxv16i8:$Op1, i64:$imm)),
14301519
(!cast<Instruction>(NAME # _Q) $Op1, $imm)>;

0 commit comments

Comments
 (0)