Skip to content

Commit 3ab3915

Browse files
Remove patterns for cases where the extract index must be explicitly scaled.
1 parent 51a2974 commit 3ab3915

File tree

2 files changed

+16
-15
lines changed

2 files changed

+16
-15
lines changed

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1491,26 +1491,20 @@ multiclass sve_int_perm_dup_i<string asm> {
14911491

14921492
// When extracting from an unpacked vector the index must be scaled to account
14931493
// for the "holes" in the underlying packed vector type. We get the scaling
1494-
// for free by "promoting" the element type to one whose underlying vector type
1495-
// is packed.
1494+
// for free by "promoting" the element type to one whose underlying vector
1495+
// type is packed. This is only valid when extracting from a vector whose
1496+
// length is the same or bigger than the result of the splat.
14961497

1497-
foreach VT = [nxv2f16, nxv4f16, nxv8f16] in {
1498-
def : Pat<(VT (splat_vector (f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
1498+
foreach VT = [nxv4f16, nxv4bf16] in {
1499+
def : Pat<(SVEType<VT>.HalfLength (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (VT ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
14991500
(!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
1500-
def : Pat<(VT (splat_vector (f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
1501-
(!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
1502-
}
1503-
1504-
foreach VT = [nxv2bf16, nxv4bf16, nxv8bf16] in {
1505-
def : Pat<(VT (splat_vector (bf16 (vector_extract (nxv4bf16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
1501+
def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (VT ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
15061502
(!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
1507-
def : Pat<(VT (splat_vector (bf16 (vector_extract (nxv2bf16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
1508-
(!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
15091503
}
15101504

1511-
foreach VT = [nxv2f32, nxv4f32] in {
1512-
def : Pat<(VT (splat_vector (f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
1513-
(!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
1505+
foreach VT = [nxv2f16, nxv2f32, nxv2bf16] in {
1506+
def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (VT ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
1507+
(!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
15141508
}
15151509

15161510
// Duplicate an indexed 128-bit segment across a vector.

llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ define <vscale x 8 x half> @dup_extract_nxv8f16_nxv4f16(<vscale x 4 x half> %dat
155155
; CHECK-LABEL: dup_extract_nxv8f16_nxv4f16:
156156
; CHECK: // %bb.0:
157157
; CHECK-NEXT: mov z0.s, z0.s[1]
158+
; CHECK-NEXT: mov z0.h, h0
158159
; CHECK-NEXT: ret
159160
%1 = extractelement <vscale x 4 x half> %data, i16 1
160161
%.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
@@ -166,6 +167,7 @@ define <vscale x 8 x half> @dup_extract_nxv8f16_nxv2f16(<vscale x 2 x half> %dat
166167
; CHECK-LABEL: dup_extract_nxv8f16_nxv2f16:
167168
; CHECK: // %bb.0:
168169
; CHECK-NEXT: mov z0.d, z0.d[1]
170+
; CHECK-NEXT: mov z0.h, h0
169171
; CHECK-NEXT: ret
170172
%1 = extractelement <vscale x 2 x half> %data, i16 1
171173
%.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
@@ -223,6 +225,7 @@ define <vscale x 4 x half> @dup_extract_nxv4f16_nxv2f16(<vscale x 2 x half> %dat
223225
; CHECK-LABEL: dup_extract_nxv4f16_nxv2f16:
224226
; CHECK: // %bb.0:
225227
; CHECK-NEXT: mov z0.d, z0.d[1]
228+
; CHECK-NEXT: mov z0.h, h0
226229
; CHECK-NEXT: ret
227230
%1 = extractelement <vscale x 2 x half> %data, i16 1
228231
%.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
@@ -326,6 +329,7 @@ define <vscale x 4 x float> @dup_extract_nxv4f32_nxv2f32(<vscale x 2 x float> %d
326329
; CHECK-LABEL: dup_extract_nxv4f32_nxv2f32:
327330
; CHECK: // %bb.0:
328331
; CHECK-NEXT: mov z0.d, z0.d[1]
332+
; CHECK-NEXT: mov z0.s, s0
329333
; CHECK-NEXT: ret
330334
%1 = extractelement <vscale x 2 x float> %data, i32 1
331335
%.splatinsert = insertelement <vscale x 4 x float> poison, float %1, i32 0
@@ -452,6 +456,7 @@ define <vscale x 8 x bfloat> @dup_extract_nxv8bf16_nxv4bf16(<vscale x 4 x bfloat
452456
; CHECK-LABEL: dup_extract_nxv8bf16_nxv4bf16:
453457
; CHECK: // %bb.0:
454458
; CHECK-NEXT: mov z0.s, z0.s[1]
459+
; CHECK-NEXT: mov z0.h, h0
455460
; CHECK-NEXT: ret
456461
%1 = extractelement <vscale x 4 x bfloat> %data, i16 1
457462
%.splatinsert = insertelement <vscale x 8 x bfloat> poison, bfloat %1, i32 0
@@ -463,6 +468,7 @@ define <vscale x 8 x bfloat> @dup_extract_nxv8bf16_nxv2bf16(<vscale x 2 x bfloat
463468
; CHECK-LABEL: dup_extract_nxv8bf16_nxv2bf16:
464469
; CHECK: // %bb.0:
465470
; CHECK-NEXT: mov z0.d, z0.d[1]
471+
; CHECK-NEXT: mov z0.h, h0
466472
; CHECK-NEXT: ret
467473
%1 = extractelement <vscale x 2 x bfloat> %data, i16 1
468474
%.splatinsert = insertelement <vscale x 8 x bfloat> poison, bfloat %1, i32 0
@@ -520,6 +526,7 @@ define <vscale x 4 x bfloat> @dup_extract_nxv4bf16_nxv2bf16(<vscale x 2 x bfloat
520526
; CHECK-LABEL: dup_extract_nxv4bf16_nxv2bf16:
521527
; CHECK: // %bb.0:
522528
; CHECK-NEXT: mov z0.d, z0.d[1]
529+
; CHECK-NEXT: mov z0.h, h0
523530
; CHECK-NEXT: ret
524531
%1 = extractelement <vscale x 2 x bfloat> %data, i16 1
525532
%.splatinsert = insertelement <vscale x 4 x bfloat> poison, bfloat %1, i32 0

0 commit comments

Comments
 (0)