@@ -73,12 +73,13 @@ define {<vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv
7373; CHECK: # %bb.0:
7474; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
7575; CHECK-NEXT: vid.v v12
76- ; CHECK-NEXT: vadd.vv v16, v12, v12
77- ; CHECK-NEXT: vrgather.vv v12, v8, v16
78- ; CHECK-NEXT: vadd.vi v16, v16, 1
79- ; CHECK-NEXT: vrgather.vv v20, v8, v16
76+ ; CHECK-NEXT: vand.vi v12, v12, 1
77+ ; CHECK-NEXT: vmseq.vi v16, v12, 0
78+ ; CHECK-NEXT: vcompress.vm v12, v8, v16
79+ ; CHECK-NEXT: vmnot.m v14, v16
80+ ; CHECK-NEXT: vcompress.vm v16, v8, v14
8081; CHECK-NEXT: vmv2r.v v8, v12
81- ; CHECK-NEXT: vmv2r.v v10, v20
82+ ; CHECK-NEXT: vmv2r.v v10, v16
8283; CHECK-NEXT: ret
8384%retval = call {<vscale x 2 x i64 >, <vscale x 2 x i64 >} @llvm.vector.deinterleave2.nxv4i64 (<vscale x 4 x i64 > %vec )
8485ret {<vscale x 2 x i64 >, <vscale x 2 x i64 >} %retval
@@ -89,12 +90,13 @@ define {<vscale x 4 x i64>, <vscale x 4 x i64>} @vector_deinterleave_nxv4i64_nxv
8990; CHECK: # %bb.0:
9091; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
9192; CHECK-NEXT: vid.v v16
92- ; CHECK-NEXT: vadd.vv v24, v16, v16
93- ; CHECK-NEXT: vrgather.vv v16, v8, v24
94- ; CHECK-NEXT: vadd.vi v24, v24, 1
95- ; CHECK-NEXT: vrgather.vv v0, v8, v24
93+ ; CHECK-NEXT: vand.vi v16, v16, 1
94+ ; CHECK-NEXT: vmseq.vi v24, v16, 0
95+ ; CHECK-NEXT: vcompress.vm v16, v8, v24
96+ ; CHECK-NEXT: vmnot.m v20, v24
97+ ; CHECK-NEXT: vcompress.vm v24, v8, v20
9698; CHECK-NEXT: vmv4r.v v8, v16
97- ; CHECK-NEXT: vmv4r.v v12, v0
99+ ; CHECK-NEXT: vmv4r.v v12, v24
98100; CHECK-NEXT: ret
99101%retval = call {<vscale x 4 x i64 >, <vscale x 4 x i64 >} @llvm.vector.deinterleave2.nxv8i64 (<vscale x 8 x i64 > %vec )
100102ret {<vscale x 4 x i64 >, <vscale x 4 x i64 >} %retval
@@ -180,66 +182,50 @@ define {<vscale x 8 x i64>, <vscale x 8 x i64>} @vector_deinterleave_nxv8i64_nxv
180182; CHECK-NEXT: addi sp, sp, -16
181183; CHECK-NEXT: .cfi_def_cfa_offset 16
182184; CHECK-NEXT: csrr a0, vlenb
183- ; CHECK-NEXT: slli a0, a0, 5
184- ; CHECK-NEXT: sub sp, sp, a0
185- ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
186- ; CHECK-NEXT: csrr a0, vlenb
187- ; CHECK-NEXT: li a1, 24
188- ; CHECK-NEXT: mul a0, a0, a1
189- ; CHECK-NEXT: add a0, sp, a0
190- ; CHECK-NEXT: addi a0, a0, 16
191- ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
192- ; CHECK-NEXT: vmv8r.v v24, v8
193- ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
194- ; CHECK-NEXT: vid.v v8
195- ; CHECK-NEXT: vadd.vv v0, v8, v8
196- ; CHECK-NEXT: vrgather.vv v8, v24, v0
197- ; CHECK-NEXT: csrr a0, vlenb
198- ; CHECK-NEXT: slli a0, a0, 3
199- ; CHECK-NEXT: add a0, sp, a0
200- ; CHECK-NEXT: addi a0, a0, 16
201- ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
202- ; CHECK-NEXT: csrr a0, vlenb
203185; CHECK-NEXT: li a1, 24
204186; CHECK-NEXT: mul a0, a0, a1
205- ; CHECK-NEXT: add a0, sp, a0
206- ; CHECK-NEXT: addi a0, a0, 16
207- ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
208- ; CHECK-NEXT: vrgather.vv v16, v8, v0
187+ ; CHECK-NEXT: sub sp, sp, a0
188+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
209189; CHECK-NEXT: csrr a0, vlenb
210190; CHECK-NEXT: slli a0, a0, 4
211191; CHECK-NEXT: add a0, sp, a0
212192; CHECK-NEXT: addi a0, a0, 16
213193; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
214- ; CHECK-NEXT: vadd.vi v8, v0, 1
215- ; CHECK-NEXT: vrgather.vv v0, v24, v8
216- ; CHECK-NEXT: csrr a0, vlenb
217- ; CHECK-NEXT: li a1, 24
218- ; CHECK-NEXT: mul a0, a0, a1
219- ; CHECK-NEXT: add a0, sp, a0
220- ; CHECK-NEXT: addi a0, a0, 16
221- ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
222- ; CHECK-NEXT: vrgather.vv v16, v24, v8
194+ ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
195+ ; CHECK-NEXT: vid.v v16
196+ ; CHECK-NEXT: vand.vi v24, v16, 1
197+ ; CHECK-NEXT: vmseq.vi v16, v24, 0
198+ ; CHECK-NEXT: vcompress.vm v24, v8, v16
223199; CHECK-NEXT: addi a0, sp, 16
224- ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
200+ ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
201+ ; CHECK-NEXT: vmnot.m v17, v16
202+ ; CHECK-NEXT: vcompress.vm v0, v8, v17
225203; CHECK-NEXT: csrr a0, vlenb
226204; CHECK-NEXT: slli a0, a0, 4
227205; CHECK-NEXT: add a0, sp, a0
228206; CHECK-NEXT: addi a0, a0, 16
229207; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
208+ ; CHECK-NEXT: vcompress.vm v24, v8, v16
209+ ; CHECK-NEXT: csrr a0, vlenb
210+ ; CHECK-NEXT: slli a0, a0, 3
211+ ; CHECK-NEXT: add a0, sp, a0
212+ ; CHECK-NEXT: addi a0, a0, 16
213+ ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
214+ ; CHECK-NEXT: vcompress.vm v24, v8, v17
230215; CHECK-NEXT: csrr a0, vlenb
231216; CHECK-NEXT: slli a0, a0, 3
232217; CHECK-NEXT: add a0, sp, a0
233218; CHECK-NEXT: addi a0, a0, 16
219+ ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
220+ ; CHECK-NEXT: addi a0, sp, 16
234221; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
235222; CHECK-NEXT: vmv4r.v v20, v8
236- ; CHECK-NEXT: addi a0, sp, 16
237- ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
238- ; CHECK-NEXT: vmv4r.v v4, v8
223+ ; CHECK-NEXT: vmv4r.v v4, v24
239224; CHECK-NEXT: vmv8r.v v8, v16
240225; CHECK-NEXT: vmv8r.v v16, v0
241226; CHECK-NEXT: csrr a0, vlenb
242- ; CHECK-NEXT: slli a0, a0, 5
227+ ; CHECK-NEXT: li a1, 24
228+ ; CHECK-NEXT: mul a0, a0, a1
243229; CHECK-NEXT: add sp, sp, a0
244230; CHECK-NEXT: .cfi_def_cfa sp, 16
245231; CHECK-NEXT: addi sp, sp, 16
@@ -366,12 +352,13 @@ define {<vscale x 2 x double>, <vscale x 2 x double>} @vector_deinterleave_nxv2f
366352; CHECK: # %bb.0:
367353; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
368354; CHECK-NEXT: vid.v v12
369- ; CHECK-NEXT: vadd.vv v16, v12, v12
370- ; CHECK-NEXT: vrgather.vv v12, v8, v16
371- ; CHECK-NEXT: vadd.vi v16, v16, 1
372- ; CHECK-NEXT: vrgather.vv v20, v8, v16
355+ ; CHECK-NEXT: vand.vi v12, v12, 1
356+ ; CHECK-NEXT: vmseq.vi v16, v12, 0
357+ ; CHECK-NEXT: vcompress.vm v12, v8, v16
358+ ; CHECK-NEXT: vmnot.m v14, v16
359+ ; CHECK-NEXT: vcompress.vm v16, v8, v14
373360; CHECK-NEXT: vmv2r.v v8, v12
374- ; CHECK-NEXT: vmv2r.v v10, v20
361+ ; CHECK-NEXT: vmv2r.v v10, v16
375362; CHECK-NEXT: ret
376363%retval = call {<vscale x 2 x double >, <vscale x 2 x double >} @llvm.vector.deinterleave2.nxv4f64 (<vscale x 4 x double > %vec )
377364ret {<vscale x 2 x double >, <vscale x 2 x double >} %retval
@@ -436,66 +423,50 @@ define {<vscale x 8 x double>, <vscale x 8 x double>} @vector_deinterleave_nxv8f
436423; CHECK-NEXT: addi sp, sp, -16
437424; CHECK-NEXT: .cfi_def_cfa_offset 16
438425; CHECK-NEXT: csrr a0, vlenb
439- ; CHECK-NEXT: slli a0, a0, 5
440- ; CHECK-NEXT: sub sp, sp, a0
441- ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
442- ; CHECK-NEXT: csrr a0, vlenb
443- ; CHECK-NEXT: li a1, 24
444- ; CHECK-NEXT: mul a0, a0, a1
445- ; CHECK-NEXT: add a0, sp, a0
446- ; CHECK-NEXT: addi a0, a0, 16
447- ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
448- ; CHECK-NEXT: vmv8r.v v24, v8
449- ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
450- ; CHECK-NEXT: vid.v v8
451- ; CHECK-NEXT: vadd.vv v0, v8, v8
452- ; CHECK-NEXT: vrgather.vv v8, v24, v0
453- ; CHECK-NEXT: csrr a0, vlenb
454- ; CHECK-NEXT: slli a0, a0, 3
455- ; CHECK-NEXT: add a0, sp, a0
456- ; CHECK-NEXT: addi a0, a0, 16
457- ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
458- ; CHECK-NEXT: csrr a0, vlenb
459426; CHECK-NEXT: li a1, 24
460427; CHECK-NEXT: mul a0, a0, a1
461- ; CHECK-NEXT: add a0, sp, a0
462- ; CHECK-NEXT: addi a0, a0, 16
463- ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
464- ; CHECK-NEXT: vrgather.vv v16, v8, v0
428+ ; CHECK-NEXT: sub sp, sp, a0
429+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
465430; CHECK-NEXT: csrr a0, vlenb
466431; CHECK-NEXT: slli a0, a0, 4
467432; CHECK-NEXT: add a0, sp, a0
468433; CHECK-NEXT: addi a0, a0, 16
469434; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
470- ; CHECK-NEXT: vadd.vi v8, v0, 1
471- ; CHECK-NEXT: vrgather.vv v0, v24, v8
472- ; CHECK-NEXT: csrr a0, vlenb
473- ; CHECK-NEXT: li a1, 24
474- ; CHECK-NEXT: mul a0, a0, a1
475- ; CHECK-NEXT: add a0, sp, a0
476- ; CHECK-NEXT: addi a0, a0, 16
477- ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
478- ; CHECK-NEXT: vrgather.vv v16, v24, v8
435+ ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
436+ ; CHECK-NEXT: vid.v v16
437+ ; CHECK-NEXT: vand.vi v24, v16, 1
438+ ; CHECK-NEXT: vmseq.vi v16, v24, 0
439+ ; CHECK-NEXT: vcompress.vm v24, v8, v16
479440; CHECK-NEXT: addi a0, sp, 16
480- ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
441+ ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
442+ ; CHECK-NEXT: vmnot.m v17, v16
443+ ; CHECK-NEXT: vcompress.vm v0, v8, v17
481444; CHECK-NEXT: csrr a0, vlenb
482445; CHECK-NEXT: slli a0, a0, 4
483446; CHECK-NEXT: add a0, sp, a0
484447; CHECK-NEXT: addi a0, a0, 16
485448; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
449+ ; CHECK-NEXT: vcompress.vm v24, v8, v16
450+ ; CHECK-NEXT: csrr a0, vlenb
451+ ; CHECK-NEXT: slli a0, a0, 3
452+ ; CHECK-NEXT: add a0, sp, a0
453+ ; CHECK-NEXT: addi a0, a0, 16
454+ ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
455+ ; CHECK-NEXT: vcompress.vm v24, v8, v17
486456; CHECK-NEXT: csrr a0, vlenb
487457; CHECK-NEXT: slli a0, a0, 3
488458; CHECK-NEXT: add a0, sp, a0
489459; CHECK-NEXT: addi a0, a0, 16
460+ ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
461+ ; CHECK-NEXT: addi a0, sp, 16
490462; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
491463; CHECK-NEXT: vmv4r.v v20, v8
492- ; CHECK-NEXT: addi a0, sp, 16
493- ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
494- ; CHECK-NEXT: vmv4r.v v4, v8
464+ ; CHECK-NEXT: vmv4r.v v4, v24
495465; CHECK-NEXT: vmv8r.v v8, v16
496466; CHECK-NEXT: vmv8r.v v16, v0
497467; CHECK-NEXT: csrr a0, vlenb
498- ; CHECK-NEXT: slli a0, a0, 5
468+ ; CHECK-NEXT: li a1, 24
469+ ; CHECK-NEXT: mul a0, a0, a1
499470; CHECK-NEXT: add sp, sp, a0
500471; CHECK-NEXT: .cfi_def_cfa sp, 16
501472; CHECK-NEXT: addi sp, sp, 16
0 commit comments