@@ -71,15 +71,16 @@ ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %retval
7171define {<vscale x 2 x i64 >, <vscale x 2 x i64 >} @vector_deinterleave_nxv2i64_nxv4i64 (<vscale x 4 x i64 > %vec ) {
7272; CHECK-LABEL: vector_deinterleave_nxv2i64_nxv4i64:
7373; CHECK: # %bb.0:
74+ ; CHECK-NEXT: li a0, 85
75+ ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
76+ ; CHECK-NEXT: vmv.v.x v16, a0
77+ ; CHECK-NEXT: li a0, 170
78+ ; CHECK-NEXT: vmv.v.x v17, a0
7479; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
75- ; CHECK-NEXT: vid.v v12
76- ; CHECK-NEXT: vand.vi v12, v12, 1
77- ; CHECK-NEXT: vmseq.vi v16, v12, 0
7880; CHECK-NEXT: vcompress.vm v12, v8, v16
79- ; CHECK-NEXT: vmnot.m v14, v16
80- ; CHECK-NEXT: vcompress.vm v16, v8, v14
81+ ; CHECK-NEXT: vcompress.vm v20, v8, v17
8182; CHECK-NEXT: vmv2r.v v8, v12
82- ; CHECK-NEXT: vmv2r.v v10, v16
83+ ; CHECK-NEXT: vmv2r.v v10, v20
8384; CHECK-NEXT: ret
8485%retval = call {<vscale x 2 x i64 >, <vscale x 2 x i64 >} @llvm.vector.deinterleave2.nxv4i64 (<vscale x 4 x i64 > %vec )
8586ret {<vscale x 2 x i64 >, <vscale x 2 x i64 >} %retval
@@ -88,15 +89,16 @@ ret {<vscale x 2 x i64>, <vscale x 2 x i64>} %retval
8889define {<vscale x 4 x i64 >, <vscale x 4 x i64 >} @vector_deinterleave_nxv4i64_nxv8i64 (<vscale x 8 x i64 > %vec ) {
8990; CHECK-LABEL: vector_deinterleave_nxv4i64_nxv8i64:
9091; CHECK: # %bb.0:
92+ ; CHECK-NEXT: li a0, 85
93+ ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
94+ ; CHECK-NEXT: vmv.v.x v24, a0
95+ ; CHECK-NEXT: li a0, 170
96+ ; CHECK-NEXT: vmv.v.x v25, a0
9197; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
92- ; CHECK-NEXT: vid.v v16
93- ; CHECK-NEXT: vand.vi v16, v16, 1
94- ; CHECK-NEXT: vmseq.vi v24, v16, 0
9598; CHECK-NEXT: vcompress.vm v16, v8, v24
96- ; CHECK-NEXT: vmnot.m v20, v24
97- ; CHECK-NEXT: vcompress.vm v24, v8, v20
99+ ; CHECK-NEXT: vcompress.vm v0, v8, v25
98100; CHECK-NEXT: vmv4r.v v8, v16
99- ; CHECK-NEXT: vmv4r.v v12, v24
101+ ; CHECK-NEXT: vmv4r.v v12, v0
100102; CHECK-NEXT: ret
101103%retval = call {<vscale x 4 x i64 >, <vscale x 4 x i64 >} @llvm.vector.deinterleave2.nxv8i64 (<vscale x 8 x i64 > %vec )
102104ret {<vscale x 4 x i64 >, <vscale x 4 x i64 >} %retval
@@ -182,50 +184,41 @@ define {<vscale x 8 x i64>, <vscale x 8 x i64>} @vector_deinterleave_nxv8i64_nxv
182184; CHECK-NEXT: addi sp, sp, -16
183185; CHECK-NEXT: .cfi_def_cfa_offset 16
184186; CHECK-NEXT: csrr a0, vlenb
185- ; CHECK-NEXT: li a1, 24
186- ; CHECK-NEXT: mul a0, a0, a1
187- ; CHECK-NEXT: sub sp, sp, a0
188- ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
189- ; CHECK-NEXT: csrr a0, vlenb
190187; CHECK-NEXT: slli a0, a0, 4
191- ; CHECK-NEXT: add a0, sp, a0
192- ; CHECK-NEXT: addi a0, a0, 16
193- ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
188+ ; CHECK-NEXT: sub sp, sp, a0
189+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
190+ ; CHECK-NEXT: li a0, 85
191+ ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
192+ ; CHECK-NEXT: vmv.v.x v7, a0
193+ ; CHECK-NEXT: li a0, 170
194+ ; CHECK-NEXT: vmv.v.x v6, a0
194195; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
195- ; CHECK-NEXT: vid.v v16
196- ; CHECK-NEXT: vand.vi v24, v16, 1
197- ; CHECK-NEXT: vmseq.vi v16, v24, 0
198- ; CHECK-NEXT: vcompress.vm v24, v8, v16
196+ ; CHECK-NEXT: vcompress.vm v24, v8, v7
197+ ; CHECK-NEXT: vmv1r.v v28, v7
198+ ; CHECK-NEXT: vmv1r.v v29, v6
199+ ; CHECK-NEXT: vcompress.vm v0, v8, v29
200+ ; CHECK-NEXT: vcompress.vm v8, v16, v28
199201; CHECK-NEXT: addi a0, sp, 16
200- ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
201- ; CHECK-NEXT: vmnot.m v17, v16
202- ; CHECK-NEXT: vcompress.vm v0, v8, v17
203- ; CHECK-NEXT: csrr a0, vlenb
204- ; CHECK-NEXT: slli a0, a0, 4
205- ; CHECK-NEXT: add a0, sp, a0
206- ; CHECK-NEXT: addi a0, a0, 16
207- ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
208- ; CHECK-NEXT: vcompress.vm v24, v8, v16
202+ ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
203+ ; CHECK-NEXT: vcompress.vm v8, v16, v29
209204; CHECK-NEXT: csrr a0, vlenb
210205; CHECK-NEXT: slli a0, a0, 3
211206; CHECK-NEXT: add a0, sp, a0
212207; CHECK-NEXT: addi a0, a0, 16
213- ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
214- ; CHECK-NEXT: vcompress.vm v24, v8, v17
208+ ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
209+ ; CHECK-NEXT: addi a0, sp, 16
210+ ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
211+ ; CHECK-NEXT: vmv4r.v v28, v8
215212; CHECK-NEXT: csrr a0, vlenb
216213; CHECK-NEXT: slli a0, a0, 3
217214; CHECK-NEXT: add a0, sp, a0
218215; CHECK-NEXT: addi a0, a0, 16
219216; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
220- ; CHECK-NEXT: addi a0, sp, 16
221- ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
222- ; CHECK-NEXT: vmv4r.v v20, v8
223- ; CHECK-NEXT: vmv4r.v v4, v24
224- ; CHECK-NEXT: vmv8r.v v8, v16
217+ ; CHECK-NEXT: vmv4r.v v4, v8
218+ ; CHECK-NEXT: vmv8r.v v8, v24
225219; CHECK-NEXT: vmv8r.v v16, v0
226220; CHECK-NEXT: csrr a0, vlenb
227- ; CHECK-NEXT: li a1, 24
228- ; CHECK-NEXT: mul a0, a0, a1
221+ ; CHECK-NEXT: slli a0, a0, 4
229222; CHECK-NEXT: add sp, sp, a0
230223; CHECK-NEXT: .cfi_def_cfa sp, 16
231224; CHECK-NEXT: addi sp, sp, 16
@@ -350,15 +343,16 @@ ret {<vscale x 4 x float>, <vscale x 4 x float>} %retval
350343define {<vscale x 2 x double >, <vscale x 2 x double >} @vector_deinterleave_nxv2f64_nxv4f64 (<vscale x 4 x double > %vec ) {
351344; CHECK-LABEL: vector_deinterleave_nxv2f64_nxv4f64:
352345; CHECK: # %bb.0:
346+ ; CHECK-NEXT: li a0, 85
347+ ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
348+ ; CHECK-NEXT: vmv.v.x v16, a0
349+ ; CHECK-NEXT: li a0, 170
350+ ; CHECK-NEXT: vmv.v.x v17, a0
353351; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
354- ; CHECK-NEXT: vid.v v12
355- ; CHECK-NEXT: vand.vi v12, v12, 1
356- ; CHECK-NEXT: vmseq.vi v16, v12, 0
357352; CHECK-NEXT: vcompress.vm v12, v8, v16
358- ; CHECK-NEXT: vmnot.m v14, v16
359- ; CHECK-NEXT: vcompress.vm v16, v8, v14
353+ ; CHECK-NEXT: vcompress.vm v20, v8, v17
360354; CHECK-NEXT: vmv2r.v v8, v12
361- ; CHECK-NEXT: vmv2r.v v10, v16
355+ ; CHECK-NEXT: vmv2r.v v10, v20
362356; CHECK-NEXT: ret
363357%retval = call {<vscale x 2 x double >, <vscale x 2 x double >} @llvm.vector.deinterleave2.nxv4f64 (<vscale x 4 x double > %vec )
364358ret {<vscale x 2 x double >, <vscale x 2 x double >} %retval
@@ -423,50 +417,41 @@ define {<vscale x 8 x double>, <vscale x 8 x double>} @vector_deinterleave_nxv8f
423417; CHECK-NEXT: addi sp, sp, -16
424418; CHECK-NEXT: .cfi_def_cfa_offset 16
425419; CHECK-NEXT: csrr a0, vlenb
426- ; CHECK-NEXT: li a1, 24
427- ; CHECK-NEXT: mul a0, a0, a1
428- ; CHECK-NEXT: sub sp, sp, a0
429- ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
430- ; CHECK-NEXT: csrr a0, vlenb
431420; CHECK-NEXT: slli a0, a0, 4
432- ; CHECK-NEXT: add a0, sp, a0
433- ; CHECK-NEXT: addi a0, a0, 16
434- ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
421+ ; CHECK-NEXT: sub sp, sp, a0
422+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
423+ ; CHECK-NEXT: li a0, 85
424+ ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
425+ ; CHECK-NEXT: vmv.v.x v7, a0
426+ ; CHECK-NEXT: li a0, 170
427+ ; CHECK-NEXT: vmv.v.x v6, a0
435428; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
436- ; CHECK-NEXT: vid.v v16
437- ; CHECK-NEXT: vand.vi v24, v16, 1
438- ; CHECK-NEXT: vmseq.vi v16, v24, 0
439- ; CHECK-NEXT: vcompress.vm v24, v8, v16
429+ ; CHECK-NEXT: vcompress.vm v24, v8, v7
430+ ; CHECK-NEXT: vmv1r.v v28, v7
431+ ; CHECK-NEXT: vmv1r.v v29, v6
432+ ; CHECK-NEXT: vcompress.vm v0, v8, v29
433+ ; CHECK-NEXT: vcompress.vm v8, v16, v28
440434; CHECK-NEXT: addi a0, sp, 16
441- ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
442- ; CHECK-NEXT: vmnot.m v17, v16
443- ; CHECK-NEXT: vcompress.vm v0, v8, v17
444- ; CHECK-NEXT: csrr a0, vlenb
445- ; CHECK-NEXT: slli a0, a0, 4
446- ; CHECK-NEXT: add a0, sp, a0
447- ; CHECK-NEXT: addi a0, a0, 16
448- ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
449- ; CHECK-NEXT: vcompress.vm v24, v8, v16
435+ ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
436+ ; CHECK-NEXT: vcompress.vm v8, v16, v29
450437; CHECK-NEXT: csrr a0, vlenb
451438; CHECK-NEXT: slli a0, a0, 3
452439; CHECK-NEXT: add a0, sp, a0
453440; CHECK-NEXT: addi a0, a0, 16
454- ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
455- ; CHECK-NEXT: vcompress.vm v24, v8, v17
441+ ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
442+ ; CHECK-NEXT: addi a0, sp, 16
443+ ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
444+ ; CHECK-NEXT: vmv4r.v v28, v8
456445; CHECK-NEXT: csrr a0, vlenb
457446; CHECK-NEXT: slli a0, a0, 3
458447; CHECK-NEXT: add a0, sp, a0
459448; CHECK-NEXT: addi a0, a0, 16
460449; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
461- ; CHECK-NEXT: addi a0, sp, 16
462- ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
463- ; CHECK-NEXT: vmv4r.v v20, v8
464- ; CHECK-NEXT: vmv4r.v v4, v24
465- ; CHECK-NEXT: vmv8r.v v8, v16
450+ ; CHECK-NEXT: vmv4r.v v4, v8
451+ ; CHECK-NEXT: vmv8r.v v8, v24
466452; CHECK-NEXT: vmv8r.v v16, v0
467453; CHECK-NEXT: csrr a0, vlenb
468- ; CHECK-NEXT: li a1, 24
469- ; CHECK-NEXT: mul a0, a0, a1
454+ ; CHECK-NEXT: slli a0, a0, 4
470455; CHECK-NEXT: add sp, sp, a0
471456; CHECK-NEXT: .cfi_def_cfa sp, 16
472457; CHECK-NEXT: addi sp, sp, 16
0 commit comments