@@ -143,15 +143,8 @@ define <4 x float> @extract_v4f32_nxv16f32_12(<vscale x 16 x float> %arg) {
143143define <2 x float > @extract_v2f32_nxv16f32_2 (<vscale x 16 x float > %arg ) {
144144; CHECK-LABEL: extract_v2f32_nxv16f32_2:
145145; CHECK: // %bb.0:
146- ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
147- ; CHECK-NEXT: addvl sp, sp, #-1
148- ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
149- ; CHECK-NEXT: .cfi_offset w29, -16
150- ; CHECK-NEXT: ptrue p0.s
151- ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
152- ; CHECK-NEXT: ldr d0, [sp, #8]
153- ; CHECK-NEXT: addvl sp, sp, #1
154- ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
146+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
147+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
155148; CHECK-NEXT: ret
156149 %ext = call <2 x float > @llvm.vector.extract.v2f32.nxv16f32 (<vscale x 16 x float > %arg , i64 2 )
157150 ret <2 x float > %ext
@@ -274,15 +267,8 @@ define <4 x i3> @extract_v4i3_nxv32i3_16(<vscale x 32 x i3> %arg) {
274267define <2 x i32 > @extract_v2i32_nxv16i32_2 (<vscale x 16 x i32 > %arg ) {
275268; CHECK-LABEL: extract_v2i32_nxv16i32_2:
276269; CHECK: // %bb.0:
277- ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
278- ; CHECK-NEXT: addvl sp, sp, #-1
279- ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
280- ; CHECK-NEXT: .cfi_offset w29, -16
281- ; CHECK-NEXT: ptrue p0.s
282- ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
283- ; CHECK-NEXT: ldr d0, [sp, #8]
284- ; CHECK-NEXT: addvl sp, sp, #1
285- ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
270+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
271+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
286272; CHECK-NEXT: ret
287273 %ext = call <2 x i32 > @llvm.vector.extract.v2i32.nxv16i32 (<vscale x 16 x i32 > %arg , i64 2 )
288274 ret <2 x i32 > %ext
@@ -314,16 +300,9 @@ define <4 x half> @extract_v4f16_nxv2f16_0(<vscale x 2 x half> %arg) {
314300; CHECK-NEXT: addvl sp, sp, #-1
315301; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
316302; CHECK-NEXT: .cfi_offset w29, -16
317- ; CHECK-NEXT: cntd x8
318303; CHECK-NEXT: ptrue p0.d
319- ; CHECK-NEXT: addpl x9, sp, #6
320- ; CHECK-NEXT: subs x8, x8, #4
321- ; CHECK-NEXT: csel x8, xzr, x8, lo
322- ; CHECK-NEXT: st1h { z0.d }, p0, [sp, #3, mul vl]
323- ; CHECK-NEXT: cmp x8, #0
324- ; CHECK-NEXT: csel x8, x8, xzr, lo
325- ; CHECK-NEXT: lsl x8, x8, #1
326- ; CHECK-NEXT: ldr d0, [x9, x8]
304+ ; CHECK-NEXT: st1h { z0.d }, p0, [sp]
305+ ; CHECK-NEXT: ldr d0, [sp]
327306; CHECK-NEXT: addvl sp, sp, #1
328307; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
329308; CHECK-NEXT: ret
@@ -338,17 +317,12 @@ define <4 x half> @extract_v4f16_nxv2f16_4(<vscale x 2 x half> %arg) {
338317; CHECK-NEXT: addvl sp, sp, #-1
339318; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
340319; CHECK-NEXT: .cfi_offset w29, -16
341- ; CHECK-NEXT: cntd x8
342- ; CHECK-NEXT: mov w9, #4 // =0x4
343320; CHECK-NEXT: ptrue p0.d
344- ; CHECK-NEXT: subs x8, x8, #4
345- ; CHECK-NEXT: csel x8, xzr, x8, lo
346- ; CHECK-NEXT: st1h { z0.d }, p0, [sp, #3, mul vl]
347- ; CHECK-NEXT: cmp x8, #4
348- ; CHECK-NEXT: csel x8, x8, x9, lo
349- ; CHECK-NEXT: addpl x9, sp, #6
350- ; CHECK-NEXT: lsl x8, x8, #1
351- ; CHECK-NEXT: ldr d0, [x9, x8]
321+ ; CHECK-NEXT: ptrue p1.h
322+ ; CHECK-NEXT: st1h { z0.d }, p0, [sp]
323+ ; CHECK-NEXT: ld1h { z0.h }, p1/z, [sp]
324+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
325+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
352326; CHECK-NEXT: addvl sp, sp, #1
353327; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
354328; CHECK-NEXT: ret
0 commit comments