Skip to content

Commit f573d2e

Browse files
committed
[AArch64][ISel] Select constructive SVE2 ext instruction
This adds patterns for selecting EXT_ZZI_B. They are tested for fixed vectors using extract shuffles, and for scalable vectors using llvm.vector.splice intrinsics. We will get better codegen when enabling subreg liveness. Without it, any use of a zpr2 tuple is always considered as using both zpr registers of the pair.
1 parent ebcb492 commit f573d2e

14 files changed

+2236
-1390
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4069,6 +4069,22 @@ let Predicates = [HasSVE2_or_SME] in {
40694069
let AddedComplexity = 2 in {
40704070
def : Pat<(nxv16i8 (AArch64ext nxv16i8:$zn1, nxv16i8:$zn2, (i32 imm0_255:$imm))),
40714071
(EXT_ZZI_B (REG_SEQUENCE ZPR2, $zn1, zsub0, $zn2, zsub1), imm0_255:$imm)>;
4072+
4073+
foreach VT = [nxv16i8] in
4074+
def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_255 i32:$index)))),
4075+
(EXT_ZZI_B (REG_SEQUENCE ZPR2, $Z1, zsub0, $Z2, zsub1), imm0_255:$index)>;
4076+
4077+
foreach VT = [nxv8i16, nxv8f16, nxv8bf16] in
4078+
def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_127 i32:$index)))),
4079+
(EXT_ZZI_B (REG_SEQUENCE ZPR2, $Z1, zsub0, $Z2, zsub1), imm0_255:$index)>;
4080+
4081+
foreach VT = [nxv4i32, nxv4f16, nxv4f32, nxv4bf16] in
4082+
def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_63 i32:$index)))),
4083+
(EXT_ZZI_B (REG_SEQUENCE ZPR2, $Z1, zsub0, $Z2, zsub1), imm0_255:$index)>;
4084+
4085+
foreach VT = [nxv2i64, nxv2f16, nxv2f32, nxv2f64, nxv2bf16] in
4086+
def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_31 i32:$index)))),
4087+
(EXT_ZZI_B (REG_SEQUENCE ZPR2, $Z1, zsub0, $Z2, zsub1), imm0_255:$index)>;
40724088
}
40734089
} // End HasSVE2_or_SME
40744090

llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
192192
; CHECK-SVE2p1-NEXT: mov z1.s, p0/z, #1 // =0x1
193193
; CHECK-SVE2p1-NEXT: fmov s0, w8
194194
; CHECK-SVE2p1-NEXT: mov v0.s[1], v1.s[1]
195-
; CHECK-SVE2p1-NEXT: ext z1.b, z1.b, z0.b, #8
195+
; CHECK-SVE2p1-NEXT: ext z1.b, { z1.b, z2.b }, #8
196196
; CHECK-SVE2p1-NEXT: // kill: def $d0 killed $d0 killed $q0
197197
; CHECK-SVE2p1-NEXT: // kill: def $d1 killed $d1 killed $z1
198198
; CHECK-SVE2p1-NEXT: b use
@@ -202,12 +202,12 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
202202
; CHECK-SME2-NEXT: whilelo p0.s, x0, x1
203203
; CHECK-SME2-NEXT: cset w8, mi
204204
; CHECK-SME2-NEXT: mov z1.s, p0/z, #1 // =0x1
205-
; CHECK-SME2-NEXT: fmov s2, w8
205+
; CHECK-SME2-NEXT: fmov s3, w8
206206
; CHECK-SME2-NEXT: mov z0.s, z1.s[1]
207-
; CHECK-SME2-NEXT: zip1 z0.s, z2.s, z0.s
208-
; CHECK-SME2-NEXT: ext z1.b, z1.b, z0.b, #8
209-
; CHECK-SME2-NEXT: // kill: def $d0 killed $d0 killed $z0
207+
; CHECK-SME2-NEXT: ext z1.b, { z1.b, z2.b }, #8
210208
; CHECK-SME2-NEXT: // kill: def $d1 killed $d1 killed $z1
209+
; CHECK-SME2-NEXT: zip1 z0.s, z3.s, z0.s
210+
; CHECK-SME2-NEXT: // kill: def $d0 killed $d0 killed $z0
211211
; CHECK-SME2-NEXT: b use
212212
%r = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 %i, i64 %n)
213213
%v0 = call <2 x i1> @llvm.vector.extract.v2i1.nxv4i1.i64(<vscale x 4 x i1> %r, i64 0)

llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll

Lines changed: 42 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -109,14 +109,13 @@ define <16 x i16> @two_way_i8_i16_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal
109109
; SME-LABEL: two_way_i8_i16_vl256:
110110
; SME: // %bb.0:
111111
; SME-NEXT: ldr z0, [x0]
112-
; SME-NEXT: ldr z1, [x1]
113-
; SME-NEXT: ldr z2, [x2]
114-
; SME-NEXT: umlalb z0.h, z2.b, z1.b
115-
; SME-NEXT: umlalt z0.h, z2.b, z1.b
116-
; SME-NEXT: mov z1.d, z0.d
117-
; SME-NEXT: ext z1.b, z1.b, z0.b, #16
118-
; SME-NEXT: // kill: def $q0 killed $q0 killed $z0
119-
; SME-NEXT: // kill: def $q1 killed $q1 killed $z1
112+
; SME-NEXT: ldr z2, [x1]
113+
; SME-NEXT: ldr z3, [x2]
114+
; SME-NEXT: umlalb z0.h, z3.b, z2.b
115+
; SME-NEXT: umlalt z0.h, z3.b, z2.b
116+
; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16
117+
; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1
118+
; SME-NEXT: mov z1.d, z2.d
120119
; SME-NEXT: ret
121120
%acc = load <16 x i16>, ptr %accptr
122121
%u = load <32 x i8>, ptr %uptr
@@ -232,14 +231,13 @@ define <8 x i32> @two_way_i16_i32_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal
232231
; SME-LABEL: two_way_i16_i32_vl256:
233232
; SME: // %bb.0:
234233
; SME-NEXT: ldr z0, [x0]
235-
; SME-NEXT: ldr z1, [x1]
236-
; SME-NEXT: ldr z2, [x2]
237-
; SME-NEXT: umlalb z0.s, z2.h, z1.h
238-
; SME-NEXT: umlalt z0.s, z2.h, z1.h
239-
; SME-NEXT: mov z1.d, z0.d
240-
; SME-NEXT: ext z1.b, z1.b, z0.b, #16
241-
; SME-NEXT: // kill: def $q0 killed $q0 killed $z0
242-
; SME-NEXT: // kill: def $q1 killed $q1 killed $z1
234+
; SME-NEXT: ldr z2, [x1]
235+
; SME-NEXT: ldr z3, [x2]
236+
; SME-NEXT: umlalb z0.s, z3.h, z2.h
237+
; SME-NEXT: umlalt z0.s, z3.h, z2.h
238+
; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16
239+
; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1
240+
; SME-NEXT: mov z1.d, z2.d
243241
; SME-NEXT: ret
244242
%acc = load <8 x i32>, ptr %accptr
245243
%u = load <16 x i16>, ptr %uptr
@@ -355,14 +353,13 @@ define <4 x i64> @two_way_i32_i64_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal
355353
; SME-LABEL: two_way_i32_i64_vl256:
356354
; SME: // %bb.0:
357355
; SME-NEXT: ldr z0, [x0]
358-
; SME-NEXT: ldr z1, [x1]
359-
; SME-NEXT: ldr z2, [x2]
360-
; SME-NEXT: umlalb z0.d, z2.s, z1.s
361-
; SME-NEXT: umlalt z0.d, z2.s, z1.s
362-
; SME-NEXT: mov z1.d, z0.d
363-
; SME-NEXT: ext z1.b, z1.b, z0.b, #16
364-
; SME-NEXT: // kill: def $q0 killed $q0 killed $z0
365-
; SME-NEXT: // kill: def $q1 killed $q1 killed $z1
356+
; SME-NEXT: ldr z2, [x1]
357+
; SME-NEXT: ldr z3, [x2]
358+
; SME-NEXT: umlalb z0.d, z3.s, z2.s
359+
; SME-NEXT: umlalt z0.d, z3.s, z2.s
360+
; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16
361+
; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1
362+
; SME-NEXT: mov z1.d, z2.d
366363
; SME-NEXT: ret
367364
%acc = load <4 x i64>, ptr %accptr
368365
%u = load <8 x i32>, ptr %uptr
@@ -644,13 +641,12 @@ define <8 x i32> @four_way_i8_i32_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal
644641
; SME-LABEL: four_way_i8_i32_vl256:
645642
; SME: // %bb.0:
646643
; SME-NEXT: ldr z0, [x0]
647-
; SME-NEXT: ldr z1, [x1]
648-
; SME-NEXT: ldr z2, [x2]
649-
; SME-NEXT: udot z0.s, z2.b, z1.b
650-
; SME-NEXT: mov z1.d, z0.d
651-
; SME-NEXT: ext z1.b, z1.b, z0.b, #16
652-
; SME-NEXT: // kill: def $q0 killed $q0 killed $z0
653-
; SME-NEXT: // kill: def $q1 killed $q1 killed $z1
644+
; SME-NEXT: ldr z2, [x1]
645+
; SME-NEXT: ldr z3, [x2]
646+
; SME-NEXT: udot z0.s, z3.b, z2.b
647+
; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16
648+
; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1
649+
; SME-NEXT: mov z1.d, z2.d
654650
; SME-NEXT: ret
655651
%acc = load <8 x i32>, ptr %accptr
656652
%u = load <32 x i8>, ptr %uptr
@@ -689,13 +685,12 @@ define <8 x i32> @four_way_i8_i32_vl256_usdot(ptr %accptr, ptr %uptr, ptr %sptr)
689685
; SME-LABEL: four_way_i8_i32_vl256_usdot:
690686
; SME: // %bb.0:
691687
; SME-NEXT: ldr z0, [x0]
692-
; SME-NEXT: ldr z1, [x1]
693-
; SME-NEXT: ldr z2, [x2]
694-
; SME-NEXT: usdot z0.s, z1.b, z2.b
695-
; SME-NEXT: mov z1.d, z0.d
696-
; SME-NEXT: ext z1.b, z1.b, z0.b, #16
697-
; SME-NEXT: // kill: def $q0 killed $q0 killed $z0
698-
; SME-NEXT: // kill: def $q1 killed $q1 killed $z1
688+
; SME-NEXT: ldr z2, [x1]
689+
; SME-NEXT: ldr z3, [x2]
690+
; SME-NEXT: usdot z0.s, z2.b, z3.b
691+
; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16
692+
; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1
693+
; SME-NEXT: mov z1.d, z2.d
699694
; SME-NEXT: ret
700695
%acc = load <8 x i32>, ptr %accptr
701696
%u = load <32 x i8>, ptr %uptr
@@ -822,13 +817,12 @@ define <4 x i64> @four_way_i16_i64_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vsca
822817
; SME-LABEL: four_way_i16_i64_vl256:
823818
; SME: // %bb.0:
824819
; SME-NEXT: ldr z0, [x0]
825-
; SME-NEXT: ldr z1, [x1]
826-
; SME-NEXT: ldr z2, [x2]
827-
; SME-NEXT: udot z0.d, z2.h, z1.h
828-
; SME-NEXT: mov z1.d, z0.d
829-
; SME-NEXT: ext z1.b, z1.b, z0.b, #16
830-
; SME-NEXT: // kill: def $q0 killed $q0 killed $z0
831-
; SME-NEXT: // kill: def $q1 killed $q1 killed $z1
820+
; SME-NEXT: ldr z2, [x1]
821+
; SME-NEXT: ldr z3, [x2]
822+
; SME-NEXT: udot z0.d, z3.h, z2.h
823+
; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16
824+
; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1
825+
; SME-NEXT: mov z1.d, z2.d
832826
; SME-NEXT: ret
833827
%acc = load <4 x i64>, ptr %accptr
834828
%u = load <16 x i16>, ptr %uptr
@@ -999,10 +993,9 @@ define <4 x i64> @four_way_i8_i64_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal
999993
; SME-NEXT: ldr z0, [x0]
1000994
; SME-NEXT: uaddwb z0.d, z0.d, z2.s
1001995
; SME-NEXT: uaddwt z0.d, z0.d, z2.s
1002-
; SME-NEXT: mov z1.d, z0.d
1003-
; SME-NEXT: ext z1.b, z1.b, z0.b, #16
1004-
; SME-NEXT: // kill: def $q0 killed $q0 killed $z0
1005-
; SME-NEXT: // kill: def $q1 killed $q1 killed $z1
996+
; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16
997+
; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1
998+
; SME-NEXT: mov z1.d, z2.d
1006999
; SME-NEXT: ret
10071000
%acc = load <4 x i64>, ptr %accptr
10081001
%u = load <32 x i8>, ptr %uptr

llvm/test/CodeGen/AArch64/sve-pr92779.ll

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,15 @@ define void @main(ptr %0) {
55
; CHECK-LABEL: main:
66
; CHECK: // %bb.0: // %entry
77
; CHECK-NEXT: movi v0.2d, #0000000000000000
8-
; CHECK-NEXT: movi v1.2d, #0000000000000000
98
; CHECK-NEXT: ptrue p0.d, vl1
10-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
11-
; CHECK-NEXT: uzp1 v0.2s, v1.2s, v0.2s
12-
; CHECK-NEXT: neg v0.2s, v0.2s
13-
; CHECK-NEXT: smov x8, v0.s[0]
14-
; CHECK-NEXT: smov x9, v0.s[1]
15-
; CHECK-NEXT: mov z1.d, p0/m, x8
16-
; CHECK-NEXT: mov z1.d, p0/m, x9
17-
; CHECK-NEXT: str z1, [x0]
9+
; CHECK-NEXT: ext z2.b, { z0.b, z1.b }, #8
10+
; CHECK-NEXT: uzp1 v2.2s, v0.2s, v2.2s
11+
; CHECK-NEXT: neg v2.2s, v2.2s
12+
; CHECK-NEXT: smov x8, v2.s[0]
13+
; CHECK-NEXT: smov x9, v2.s[1]
14+
; CHECK-NEXT: mov z0.d, p0/m, x8
15+
; CHECK-NEXT: mov z0.d, p0/m, x9
16+
; CHECK-NEXT: str z0, [x0]
1817
; CHECK-NEXT: ret
1918
"entry":
2019
%1 = bitcast <vscale x 2 x i64> zeroinitializer to <vscale x 4 x i32>

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
2+
; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s
33
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
44
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
55

@@ -228,25 +228,25 @@ define <4 x i256> @load_sext_v4i32i256(ptr %ap) {
228228
; CHECK-LABEL: load_sext_v4i32i256:
229229
; CHECK: // %bb.0:
230230
; CHECK-NEXT: ldr q0, [x0]
231-
; CHECK-NEXT: sunpklo z1.d, z0.s
232-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
231+
; CHECK-NEXT: sunpklo z2.d, z0.s
232+
; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
233233
; CHECK-NEXT: sunpklo z0.d, z0.s
234-
; CHECK-NEXT: fmov x9, d1
235-
; CHECK-NEXT: mov z1.d, z1.d[1]
236-
; CHECK-NEXT: fmov x11, d0
237-
; CHECK-NEXT: mov z0.d, z0.d[1]
234+
; CHECK-NEXT: fmov x9, d2
235+
; CHECK-NEXT: mov z2.d, z2.d[1]
238236
; CHECK-NEXT: asr x10, x9, #63
237+
; CHECK-NEXT: fmov x11, d2
239238
; CHECK-NEXT: stp x9, x10, [x8]
240-
; CHECK-NEXT: fmov x9, d1
239+
; CHECK-NEXT: fmov x9, d0
240+
; CHECK-NEXT: mov z0.d, z0.d[1]
241241
; CHECK-NEXT: asr x12, x11, #63
242242
; CHECK-NEXT: stp x10, x10, [x8, #16]
243-
; CHECK-NEXT: stp x11, x12, [x8, #64]
243+
; CHECK-NEXT: stp x11, x12, [x8, #32]
244244
; CHECK-NEXT: fmov x11, d0
245245
; CHECK-NEXT: asr x10, x9, #63
246-
; CHECK-NEXT: stp x12, x12, [x8, #80]
247-
; CHECK-NEXT: stp x10, x10, [x8, #48]
246+
; CHECK-NEXT: stp x12, x12, [x8, #48]
247+
; CHECK-NEXT: stp x10, x10, [x8, #80]
248248
; CHECK-NEXT: asr x12, x11, #63
249-
; CHECK-NEXT: stp x9, x10, [x8, #32]
249+
; CHECK-NEXT: stp x9, x10, [x8, #64]
250250
; CHECK-NEXT: stp x12, x12, [x8, #112]
251251
; CHECK-NEXT: stp x11, x12, [x8, #96]
252252
; CHECK-NEXT: ret

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
2+
; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s
33
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
44
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
55

@@ -78,8 +78,8 @@ define <4 x i8> @extract_subvector_v8i8(<8 x i8> %op) {
7878
define <8 x i8> @extract_subvector_v16i8(<16 x i8> %op) {
7979
; CHECK-LABEL: extract_subvector_v16i8:
8080
; CHECK: // %bb.0:
81-
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
82-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
81+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1
82+
; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
8383
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
8484
; CHECK-NEXT: ret
8585
;
@@ -119,7 +119,7 @@ define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) {
119119
; CHECK: // %bb.0:
120120
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
121121
; CHECK-NEXT: uunpklo z0.s, z0.h
122-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
122+
; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
123123
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
124124
; CHECK-NEXT: ret
125125
;
@@ -138,8 +138,8 @@ define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) {
138138
define <4 x i16> @extract_subvector_v8i16(<8 x i16> %op) {
139139
; CHECK-LABEL: extract_subvector_v8i16:
140140
; CHECK: // %bb.0:
141-
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
142-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
141+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1
142+
; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
143143
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
144144
; CHECK-NEXT: ret
145145
;
@@ -198,8 +198,8 @@ define <1 x i32> @extract_subvector_v2i32(<2 x i32> %op) {
198198
define <2 x i32> @extract_subvector_v4i32(<4 x i32> %op) {
199199
; CHECK-LABEL: extract_subvector_v4i32:
200200
; CHECK: // %bb.0:
201-
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
202-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
201+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1
202+
; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
203203
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
204204
; CHECK-NEXT: ret
205205
;
@@ -237,8 +237,8 @@ define void @extract_subvector_v8i32(ptr %a, ptr %b) {
237237
define <1 x i64> @extract_subvector_v2i64(<2 x i64> %op) {
238238
; CHECK-LABEL: extract_subvector_v2i64:
239239
; CHECK: // %bb.0:
240-
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
241-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
240+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1
241+
; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
242242
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
243243
; CHECK-NEXT: ret
244244
;
@@ -297,8 +297,8 @@ define <2 x half> @extract_subvector_v4f16(<4 x half> %op) {
297297
define <4 x half> @extract_subvector_v8f16(<8 x half> %op) {
298298
; CHECK-LABEL: extract_subvector_v8f16:
299299
; CHECK: // %bb.0:
300-
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
301-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
300+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1
301+
; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
302302
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
303303
; CHECK-NEXT: ret
304304
;
@@ -357,8 +357,8 @@ define <1 x float> @extract_subvector_v2f32(<2 x float> %op) {
357357
define <2 x float> @extract_subvector_v4f32(<4 x float> %op) {
358358
; CHECK-LABEL: extract_subvector_v4f32:
359359
; CHECK: // %bb.0:
360-
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
361-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
360+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1
361+
; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
362362
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
363363
; CHECK-NEXT: ret
364364
;
@@ -396,8 +396,8 @@ define void @extract_subvector_v8f32(ptr %a, ptr %b) {
396396
define <1 x double> @extract_subvector_v2f64(<2 x double> %op) {
397397
; CHECK-LABEL: extract_subvector_v2f64:
398398
; CHECK: // %bb.0:
399-
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
400-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
399+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1
400+
; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
401401
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
402402
; CHECK-NEXT: ret
403403
;

0 commit comments

Comments
 (0)