Skip to content

Commit 9828745

Browse files
authored
[AArch64][ISel] Select constructive EXT_ZZI pseudo instruction (llvm#152554)
The patch adds patterns to select the EXT_ZZI_CONSTRUCTIVE pseudo instead of the EXT_ZZI destructive instruction for vector_splice. This only works when the two inputs to vector_splice are identical. Given that registers aren't tied anymore, this gives the register allocator more freedom and a lot of MOVs get replaced with MOVPRFX. In some cases however, we could have just chosen the same input and output register, but regalloc preferred not to. This means we end up with some test cases now having more instructions: there is now a MOVPRFX while no MOV was previously needed.
1 parent 649762c commit 9828745

23 files changed

+1228
-1042
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2133,21 +2133,37 @@ let Predicates = [HasSVE_or_SME] in {
21332133
(LASTB_VPZ_D (PTRUE_D 31), ZPR:$Z1), dsub))>;
21342134

21352135
// Splice with lane bigger or equal to 0
2136-
foreach VT = [nxv16i8] in
2136+
foreach VT = [nxv16i8] in {
21372137
def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_255 i32:$index)))),
21382138
(EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>;
2139+
let AddedComplexity = 1 in
2140+
def : Pat<(VT (vector_splice VT:$Z1, VT:$Z1, (i64 (sve_ext_imm_0_255 i32:$index)))),
2141+
(EXT_ZZI_CONSTRUCTIVE ZPR:$Z1, imm0_255:$index)>;
2142+
}
21392143

2140-
foreach VT = [nxv8i16, nxv8f16, nxv8bf16] in
2144+
foreach VT = [nxv8i16, nxv8f16, nxv8bf16] in {
21412145
def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_127 i32:$index)))),
21422146
(EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>;
2147+
let AddedComplexity = 1 in
2148+
def : Pat<(VT (vector_splice VT:$Z1, VT:$Z1, (i64 (sve_ext_imm_0_127 i32:$index)))),
2149+
(EXT_ZZI_CONSTRUCTIVE ZPR:$Z1, imm0_255:$index)>;
2150+
}
21432151

2144-
foreach VT = [nxv4i32, nxv4f16, nxv4f32, nxv4bf16] in
2152+
foreach VT = [nxv4i32, nxv4f16, nxv4f32, nxv4bf16] in {
21452153
def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_63 i32:$index)))),
21462154
(EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>;
2155+
let AddedComplexity = 1 in
2156+
def : Pat<(VT (vector_splice VT:$Z1, VT:$Z1, (i64 (sve_ext_imm_0_63 i32:$index)))),
2157+
(EXT_ZZI_CONSTRUCTIVE ZPR:$Z1, imm0_255:$index)>;
2158+
}
21472159

2148-
foreach VT = [nxv2i64, nxv2f16, nxv2f32, nxv2f64, nxv2bf16] in
2160+
foreach VT = [nxv2i64, nxv2f16, nxv2f32, nxv2f64, nxv2bf16] in {
21492161
def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_31 i32:$index)))),
21502162
(EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>;
2163+
let AddedComplexity = 1 in
2164+
def : Pat<(VT (vector_splice VT:$Z1, VT:$Z1, (i64 (sve_ext_imm_0_31 i32:$index)))),
2165+
(EXT_ZZI_CONSTRUCTIVE ZPR:$Z1, imm0_255:$index)>;
2166+
}
21512167

21522168
defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", SETUGE, SETULE>;
21532169
defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", SETUGT, SETULT>;

llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ define <vscale x 16 x i8> @splice_nxv16i8_last_idx(<vscale x 16 x i8> %a, <vscal
3636
define <vscale x 16 x i8> @splice_nxv16i8_first_idx_unary(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
3737
; CHECK-LABEL: splice_nxv16i8_first_idx_unary:
3838
; CHECK: // %bb.0:
39-
; CHECK-NEXT: mov z0.d, z1.d
39+
; CHECK-NEXT: movprfx z0, z1
4040
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #1
4141
; CHECK-NEXT: ret
4242
%res = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> %b, <vscale x 16 x i8> %b, i32 1)
@@ -55,7 +55,7 @@ define <vscale x 8 x i16> @splice_nxv8i16_first_idx(<vscale x 8 x i16> %a, <vsca
5555
define <vscale x 8 x i16> @splice_nxv8i16_first_idx_unary(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
5656
; CHECK-LABEL: splice_nxv8i16_first_idx_unary:
5757
; CHECK: // %bb.0:
58-
; CHECK-NEXT: mov z0.d, z1.d
58+
; CHECK-NEXT: movprfx z0, z1
5959
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #2
6060
; CHECK-NEXT: ret
6161
%res = call <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16> %b, <vscale x 8 x i16> %b, i32 1)
@@ -83,7 +83,7 @@ define <vscale x 4 x i32> @splice_nxv4i32_last_idx(<vscale x 4 x i32> %a, <vscal
8383
define <vscale x 4 x i32> @splice_nxv4i32_first_idx_unary(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
8484
; CHECK-LABEL: splice_nxv4i32_first_idx_unary:
8585
; CHECK: // %bb.0:
86-
; CHECK-NEXT: mov z0.d, z1.d
86+
; CHECK-NEXT: movprfx z0, z1
8787
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #4
8888
; CHECK-NEXT: ret
8989
%res = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %b, <vscale x 4 x i32> %b, i32 1)
@@ -111,7 +111,7 @@ define <vscale x 2 x i64> @splice_nxv2i64_last_idx(<vscale x 2 x i64> %a, <vscal
111111
define <vscale x 2 x i64> @splice_nxv2i64_first_idx_unary(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
112112
; CHECK-LABEL: splice_nxv2i64_first_idx_unary:
113113
; CHECK: // %bb.0:
114-
; CHECK-NEXT: mov z0.d, z1.d
114+
; CHECK-NEXT: movprfx z0, z1
115115
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8
116116
; CHECK-NEXT: ret
117117
%res = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> %b, <vscale x 2 x i64> %b, i32 1)
@@ -173,7 +173,7 @@ define <vscale x 2 x half> @splice_nxv2f16_last_idx(<vscale x 2 x half> %a, <vsc
173173
define <vscale x 2 x half> @splice_nxv2f16_first_idx_unary(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
174174
; CHECK-LABEL: splice_nxv2f16_first_idx_unary:
175175
; CHECK: // %bb.0:
176-
; CHECK-NEXT: mov z0.d, z1.d
176+
; CHECK-NEXT: movprfx z0, z1
177177
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8
178178
; CHECK-NEXT: ret
179179
%res = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x half> %b, i32 1)
@@ -235,7 +235,7 @@ define <vscale x 4 x half> @splice_nxv4f16_last_idx(<vscale x 4 x half> %a, <vsc
235235
define <vscale x 4 x half> @splice_nxv4f16_first_idx_unary(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
236236
; CHECK-LABEL: splice_nxv4f16_first_idx_unary:
237237
; CHECK: // %bb.0:
238-
; CHECK-NEXT: mov z0.d, z1.d
238+
; CHECK-NEXT: movprfx z0, z1
239239
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #4
240240
; CHECK-NEXT: ret
241241
%res = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x half> %b, i32 1)
@@ -263,7 +263,7 @@ define <vscale x 8 x half> @splice_nxv8f16_last_idx(<vscale x 8 x half> %a, <vsc
263263
define <vscale x 8 x half> @splice_nxv8f16_first_idx_unary(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
264264
; CHECK-LABEL: splice_nxv8f16_first_idx_unary:
265265
; CHECK: // %bb.0:
266-
; CHECK-NEXT: mov z0.d, z1.d
266+
; CHECK-NEXT: movprfx z0, z1
267267
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #2
268268
; CHECK-NEXT: ret
269269
%res = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x half> %b, i32 1)
@@ -325,7 +325,7 @@ define <vscale x 2 x float> @splice_nxv2f32_last_idx(<vscale x 2 x float> %a, <v
325325
define <vscale x 2 x float> @splice_nxv2f32_first_idx_unary(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
326326
; CHECK-LABEL: splice_nxv2f32_first_idx_unary:
327327
; CHECK: // %bb.0:
328-
; CHECK-NEXT: mov z0.d, z1.d
328+
; CHECK-NEXT: movprfx z0, z1
329329
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8
330330
; CHECK-NEXT: ret
331331
%res = call <vscale x 2 x float> @llvm.vector.splice.nxv2f32(<vscale x 2 x float> %b, <vscale x 2 x float> %b, i32 1)
@@ -353,7 +353,7 @@ define <vscale x 4 x float> @splice_nxv4f32_last_idx(<vscale x 4 x float> %a, <v
353353
define <vscale x 4 x float> @splice_nxv4f32_first_idx_unary(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
354354
; CHECK-LABEL: splice_nxv4f32_first_idx_unary:
355355
; CHECK: // %bb.0:
356-
; CHECK-NEXT: mov z0.d, z1.d
356+
; CHECK-NEXT: movprfx z0, z1
357357
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #4
358358
; CHECK-NEXT: ret
359359
%res = call <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float> %b, <vscale x 4 x float> %b, i32 1)
@@ -381,7 +381,7 @@ define <vscale x 2 x double> @splice_nxv2f64_last_idx(<vscale x 2 x double> %a,
381381
define <vscale x 2 x double> @splice_nxv2f64_first_idx_unary(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
382382
; CHECK-LABEL: splice_nxv2f64_first_idx_unary:
383383
; CHECK: // %bb.0:
384-
; CHECK-NEXT: mov z0.d, z1.d
384+
; CHECK-NEXT: movprfx z0, z1
385385
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8
386386
; CHECK-NEXT: ret
387387
%res = call <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double> %b, <vscale x 2 x double> %b, i32 1)
@@ -879,7 +879,7 @@ define <vscale x 2 x bfloat> @splice_nxv2bf16_last_idx(<vscale x 2 x bfloat> %a,
879879
define <vscale x 2 x bfloat> @splice_nxv2bf16_first_idx_unary(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) #0 {
880880
; CHECK-LABEL: splice_nxv2bf16_first_idx_unary:
881881
; CHECK: // %bb.0:
882-
; CHECK-NEXT: mov z0.d, z1.d
882+
; CHECK-NEXT: movprfx z0, z1
883883
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8
884884
; CHECK-NEXT: ret
885885
%res = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> %b, <vscale x 2 x bfloat> %b, i32 1)
@@ -941,7 +941,7 @@ define <vscale x 4 x bfloat> @splice_nxv4bf16_last_idx(<vscale x 4 x bfloat> %a,
941941
define <vscale x 4 x bfloat> @splice_nxv4bf16_first_idx_unary(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) #0 {
942942
; CHECK-LABEL: splice_nxv4bf16_first_idx_unary:
943943
; CHECK: // %bb.0:
944-
; CHECK-NEXT: mov z0.d, z1.d
944+
; CHECK-NEXT: movprfx z0, z1
945945
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #4
946946
; CHECK-NEXT: ret
947947
%res = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %b, i32 1)
@@ -969,7 +969,7 @@ define <vscale x 8 x bfloat> @splice_nxv8bf16_last_idx(<vscale x 8 x bfloat> %a,
969969
define <vscale x 8 x bfloat> @splice_nxv8bf16_first_idx_unary(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
970970
; CHECK-LABEL: splice_nxv8bf16_first_idx_unary:
971971
; CHECK: // %bb.0:
972-
; CHECK-NEXT: mov z0.d, z1.d
972+
; CHECK-NEXT: movprfx z0, z1
973973
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #2
974974
; CHECK-NEXT: ret
975975
%res = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %b, i32 1)

llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ define void @extract_v32i8_halves(ptr %in, ptr %out, ptr %out2) #0 vscale_range(
5050
; CHECK-LABEL: extract_v32i8_halves:
5151
; CHECK: // %bb.0: // %entry
5252
; CHECK-NEXT: ldr z0, [x0]
53-
; CHECK-NEXT: mov z1.d, z0.d
53+
; CHECK-NEXT: movprfx z1, z0
5454
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
5555
; CHECK-NEXT: str q1, [x1]
5656
; CHECK-NEXT: str q0, [x2]
@@ -68,7 +68,7 @@ define void @extract_v32i8_half_unaligned(ptr %in, ptr %out) #0 vscale_range(2,2
6868
; CHECK-LABEL: extract_v32i8_half_unaligned:
6969
; CHECK: // %bb.0: // %entry
7070
; CHECK-NEXT: ldr z0, [x0]
71-
; CHECK-NEXT: mov z1.d, z0.d
71+
; CHECK-NEXT: movprfx z1, z0
7272
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
7373
; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #4
7474
; CHECK-NEXT: str q0, [x1]
@@ -84,15 +84,16 @@ define void @extract_v32i8_quarters(ptr %in, ptr %out, ptr %out2, ptr %out3, ptr
8484
; CHECK-LABEL: extract_v32i8_quarters:
8585
; CHECK: // %bb.0: // %entry
8686
; CHECK-NEXT: ldr z0, [x0]
87-
; CHECK-NEXT: mov z1.d, z0.d
88-
; CHECK-NEXT: mov z2.d, z0.d
87+
; CHECK-NEXT: movprfx z1, z0
8988
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
89+
; CHECK-NEXT: movprfx z2, z0
9090
; CHECK-NEXT: ext z2.b, z2.b, z0.b, #24
91+
; CHECK-NEXT: movprfx z3, z0
92+
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
9193
; CHECK-NEXT: str d1, [x1]
9294
; CHECK-NEXT: str d2, [x2]
9395
; CHECK-NEXT: str d0, [x3]
94-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
95-
; CHECK-NEXT: str d0, [x4]
96+
; CHECK-NEXT: str d3, [x4]
9697
; CHECK-NEXT: ret
9798
entry:
9899
%b = load <32 x i8>, ptr %in
@@ -126,7 +127,7 @@ define void @extract_v64i8_halves(ptr %in, ptr %out, ptr %out2) #0 vscale_range(
126127
; CHECK: // %bb.0: // %entry
127128
; CHECK-NEXT: ldr z0, [x0]
128129
; CHECK-NEXT: ptrue p0.b, vl32
129-
; CHECK-NEXT: mov z1.d, z0.d
130+
; CHECK-NEXT: movprfx z1, z0
130131
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #32
131132
; CHECK-NEXT: st1b { z1.b }, p0, [x1]
132133
; CHECK-NEXT: st1b { z0.b }, p0, [x2]
@@ -207,7 +208,7 @@ define void @extract_v16i16_halves(ptr %in, ptr %out, ptr %out2) #0 vscale_range
207208
; CHECK-LABEL: extract_v16i16_halves:
208209
; CHECK: // %bb.0: // %entry
209210
; CHECK-NEXT: ldr z0, [x0]
210-
; CHECK-NEXT: mov z1.d, z0.d
211+
; CHECK-NEXT: movprfx z1, z0
211212
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
212213
; CHECK-NEXT: str q1, [x1]
213214
; CHECK-NEXT: str q0, [x2]
@@ -240,7 +241,7 @@ define void @extract_v32i16_halves(ptr %in, ptr %out, ptr %out2) #0 vscale_range
240241
; CHECK: // %bb.0: // %entry
241242
; CHECK-NEXT: ldr z0, [x0]
242243
; CHECK-NEXT: ptrue p0.h, vl16
243-
; CHECK-NEXT: mov z1.d, z0.d
244+
; CHECK-NEXT: movprfx z1, z0
244245
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #32
245246
; CHECK-NEXT: st1h { z1.h }, p0, [x1]
246247
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
@@ -322,7 +323,7 @@ define void @extract_v8i32_halves(ptr %in, ptr %out, ptr %out2) #0 vscale_range(
322323
; CHECK-LABEL: extract_v8i32_halves:
323324
; CHECK: // %bb.0: // %entry
324325
; CHECK-NEXT: ldr z0, [x0]
325-
; CHECK-NEXT: mov z1.d, z0.d
326+
; CHECK-NEXT: movprfx z1, z0
326327
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
327328
; CHECK-NEXT: str q1, [x1]
328329
; CHECK-NEXT: str q0, [x2]
@@ -355,7 +356,7 @@ define void @extract_v16i32_halves(ptr %in, ptr %out, ptr %out2) #0 vscale_range
355356
; CHECK: // %bb.0: // %entry
356357
; CHECK-NEXT: ldr z0, [x0]
357358
; CHECK-NEXT: ptrue p0.s, vl8
358-
; CHECK-NEXT: mov z1.d, z0.d
359+
; CHECK-NEXT: movprfx z1, z0
359360
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #32
360361
; CHECK-NEXT: st1w { z1.s }, p0, [x1]
361362
; CHECK-NEXT: st1w { z0.s }, p0, [x2]
@@ -426,7 +427,7 @@ define void @extract_v4i64_halves(ptr %in, ptr %out, ptr %out2) #0 vscale_range(
426427
; CHECK-LABEL: extract_v4i64_halves:
427428
; CHECK: // %bb.0: // %entry
428429
; CHECK-NEXT: ldr z0, [x0]
429-
; CHECK-NEXT: mov z1.d, z0.d
430+
; CHECK-NEXT: movprfx z1, z0
430431
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
431432
; CHECK-NEXT: str q1, [x1]
432433
; CHECK-NEXT: str q0, [x2]
@@ -459,7 +460,7 @@ define void @extract_v8i64_halves(ptr %in, ptr %out, ptr %out2) #0 vscale_range(
459460
; CHECK: // %bb.0: // %entry
460461
; CHECK-NEXT: ldr z0, [x0]
461462
; CHECK-NEXT: ptrue p0.d, vl4
462-
; CHECK-NEXT: mov z1.d, z0.d
463+
; CHECK-NEXT: movprfx z1, z0
463464
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #32
464465
; CHECK-NEXT: st1d { z1.d }, p0, [x1]
465466
; CHECK-NEXT: st1d { z0.d }, p0, [x2]
@@ -553,7 +554,7 @@ define void @extract_v16half_halves(ptr %in, ptr %out, ptr %out2) #0 vscale_rang
553554
; CHECK-LABEL: extract_v16half_halves:
554555
; CHECK: // %bb.0: // %entry
555556
; CHECK-NEXT: ldr z0, [x0]
556-
; CHECK-NEXT: mov z1.d, z0.d
557+
; CHECK-NEXT: movprfx z1, z0
557558
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
558559
; CHECK-NEXT: str q1, [x1]
559560
; CHECK-NEXT: str q0, [x2]
@@ -586,7 +587,7 @@ define void @extract_v32half_halves(ptr %in, ptr %out, ptr %out2) #0 vscale_rang
586587
; CHECK: // %bb.0: // %entry
587588
; CHECK-NEXT: ldr z0, [x0]
588589
; CHECK-NEXT: ptrue p0.h, vl16
589-
; CHECK-NEXT: mov z1.d, z0.d
590+
; CHECK-NEXT: movprfx z1, z0
590591
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #32
591592
; CHECK-NEXT: st1h { z1.h }, p0, [x1]
592593
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
@@ -668,7 +669,7 @@ define void @extract_v8float_halves(ptr %in, ptr %out, ptr %out2) #0 vscale_rang
668669
; CHECK-LABEL: extract_v8float_halves:
669670
; CHECK: // %bb.0: // %entry
670671
; CHECK-NEXT: ldr z0, [x0]
671-
; CHECK-NEXT: mov z1.d, z0.d
672+
; CHECK-NEXT: movprfx z1, z0
672673
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
673674
; CHECK-NEXT: str q1, [x1]
674675
; CHECK-NEXT: str q0, [x2]
@@ -701,7 +702,7 @@ define void @extract_v16float_halves(ptr %in, ptr %out, ptr %out2) #0 vscale_ran
701702
; CHECK: // %bb.0: // %entry
702703
; CHECK-NEXT: ldr z0, [x0]
703704
; CHECK-NEXT: ptrue p0.s, vl8
704-
; CHECK-NEXT: mov z1.d, z0.d
705+
; CHECK-NEXT: movprfx z1, z0
705706
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #32
706707
; CHECK-NEXT: st1w { z1.s }, p0, [x1]
707708
; CHECK-NEXT: st1w { z0.s }, p0, [x2]
@@ -772,7 +773,7 @@ define void @extract_v4double_halves(ptr %in, ptr %out, ptr %out2) #0 vscale_ran
772773
; CHECK-LABEL: extract_v4double_halves:
773774
; CHECK: // %bb.0: // %entry
774775
; CHECK-NEXT: ldr z0, [x0]
775-
; CHECK-NEXT: mov z1.d, z0.d
776+
; CHECK-NEXT: movprfx z1, z0
776777
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
777778
; CHECK-NEXT: str q1, [x1]
778779
; CHECK-NEXT: str q0, [x2]
@@ -805,7 +806,7 @@ define void @extract_v8double_halves(ptr %in, ptr %out, ptr %out2) #0 vscale_ran
805806
; CHECK: // %bb.0: // %entry
806807
; CHECK-NEXT: ldr z0, [x0]
807808
; CHECK-NEXT: ptrue p0.d, vl4
808-
; CHECK-NEXT: mov z1.d, z0.d
809+
; CHECK-NEXT: movprfx z1, z0
809810
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #32
810811
; CHECK-NEXT: st1d { z1.d }, p0, [x1]
811812
; CHECK-NEXT: st1d { z0.d }, p0, [x2]
@@ -908,7 +909,7 @@ define void @extract_subvector_legalization_v8i32() vscale_range(2,2) #0 {
908909
; CHECK-NEXT: add x8, x8, :lo12:.LCPI59_0
909910
; CHECK-NEXT: ptrue p1.d
910911
; CHECK-NEXT: ldr z0, [x8]
911-
; CHECK-NEXT: mov z1.d, z0.d
912+
; CHECK-NEXT: movprfx z1, z0
912913
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
913914
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
914915
; CHECK-NEXT: cmeq v1.4s, v1.4s, #0

0 commit comments

Comments
 (0)