@@ -85,11 +85,11 @@ define {<2 x i64>, <2 x i64>} @vector_deinterleave_v2i64_v4i64(<4 x i64> %vec) {
8585;
8686; ZIP-LABEL: vector_deinterleave_v2i64_v4i64:
8787; ZIP: # %bb.0:
88- ; ZIP-NEXT: vsetivli zero, 2, e64, m2, ta, ma
89- ; ZIP-NEXT: vslidedown.vi v10, v8, 2
9088; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
91- ; ZIP-NEXT: ri.vzipodd.vv v9, v8, v10
92- ; ZIP-NEXT: vslideup.vi v8, v10, 1
89+ ; ZIP-NEXT: ri.vunzip2a.vv v10, v8, v9
90+ ; ZIP-NEXT: ri.vunzip2b.vv v11, v8, v9
91+ ; ZIP-NEXT: vmv.v.v v8, v10
92+ ; ZIP-NEXT: vmv.v.v v9, v11
9393; ZIP-NEXT: ret
9494%retval = call {<2 x i64 >, <2 x i64 >} @llvm.vector.deinterleave2.v4i64 (<4 x i64 > %vec )
9595ret {<2 x i64 >, <2 x i64 >} %retval
@@ -129,62 +129,51 @@ define {<4 x i64>, <4 x i64>} @vector_deinterleave_v4i64_v8i64(<8 x i64> %vec) {
129129; ZIP-LABEL: vector_deinterleave_v4i64_v8i64:
130130; ZIP: # %bb.0:
131131; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
132- ; ZIP-NEXT: vslidedown.vi v12, v8, 1
133- ; ZIP-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
134- ; ZIP-NEXT: vmv.v.i v0, 2
135- ; ZIP-NEXT: vmv.v.i v14, 12
136- ; ZIP-NEXT: vsetivli zero, 4, e64, m4, ta, ma
137- ; ZIP-NEXT: vslidedown.vi v16, v8, 4
138- ; ZIP-NEXT: vsetivli zero, 2, e64, m2, ta, ma
139- ; ZIP-NEXT: vslidedown.vi v10, v8, 2
140- ; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, mu
141- ; ZIP-NEXT: vslidedown.vi v12, v8, 2, v0.t
142- ; ZIP-NEXT: ri.vzip2a.vv v18, v8, v10
143- ; ZIP-NEXT: vsetivli zero, 2, e64, m2, ta, ma
144- ; ZIP-NEXT: vslidedown.vi v8, v16, 2
145- ; ZIP-NEXT: vmv1r.v v0, v14
146- ; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, mu
147- ; ZIP-NEXT: ri.vzip2a.vv v12, v16, v8, v0.t
148- ; ZIP-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
149- ; ZIP-NEXT: vmv.v.i v0, 8
150- ; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, mu
151- ; ZIP-NEXT: vslideup.vi v8, v16, 2
152- ; ZIP-NEXT: vslideup.vi v8, v16, 1, v0.t
153- ; ZIP-NEXT: vmv1r.v v0, v14
154- ; ZIP-NEXT: vmerge.vvm v8, v18, v8, v0
155- ; ZIP-NEXT: vmv2r.v v10, v12
132+ ; ZIP-NEXT: ri.vunzip2a.vv v12, v8, v10
133+ ; ZIP-NEXT: ri.vunzip2b.vv v14, v8, v10
134+ ; ZIP-NEXT: vmv.v.v v8, v12
135+ ; ZIP-NEXT: vmv.v.v v10, v14
156136; ZIP-NEXT: ret
157137 %retval = call {<4 x i64 >, <4 x i64 >} @llvm.vector.deinterleave2.v8i64 (<8 x i64 > %vec )
158138 ret {<4 x i64 >, <4 x i64 >} %retval
159139}
160140
161141define {<8 x i64 >, <8 x i64 >} @vector_deinterleave_v8i64_v16i64 (<16 x i64 > %vec ) {
162- ; CHECK-LABEL: vector_deinterleave_v8i64_v16i64:
163- ; CHECK: # %bb.0:
164- ; CHECK-NEXT: li a0, 85
165- ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
166- ; CHECK-NEXT: vmv.v.i v0, -16
167- ; CHECK-NEXT: vid.v v16
168- ; CHECK-NEXT: vsetivli zero, 8, e64, m8, ta, ma
169- ; CHECK-NEXT: vslidedown.vi v24, v8, 8
170- ; CHECK-NEXT: vmv.s.x v12, a0
171- ; CHECK-NEXT: li a0, 170
172- ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
173- ; CHECK-NEXT: vadd.vv v20, v16, v16
174- ; CHECK-NEXT: vmv.s.x v21, a0
175- ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
176- ; CHECK-NEXT: vcompress.vm v16, v8, v12
177- ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
178- ; CHECK-NEXT: vadd.vi v22, v20, -8
179- ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
180- ; CHECK-NEXT: vcompress.vm v12, v8, v21
181- ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
182- ; CHECK-NEXT: vadd.vi v8, v20, -7
183- ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
184- ; CHECK-NEXT: vrgatherei16.vv v16, v24, v22, v0.t
185- ; CHECK-NEXT: vrgatherei16.vv v12, v24, v8, v0.t
186- ; CHECK-NEXT: vmv.v.v v8, v16
187- ; CHECK-NEXT: ret
142+ ; V-LABEL: vector_deinterleave_v8i64_v16i64:
143+ ; V: # %bb.0:
144+ ; V-NEXT: li a0, 85
145+ ; V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
146+ ; V-NEXT: vmv.v.i v0, -16
147+ ; V-NEXT: vid.v v16
148+ ; V-NEXT: vsetivli zero, 8, e64, m8, ta, ma
149+ ; V-NEXT: vslidedown.vi v24, v8, 8
150+ ; V-NEXT: vmv.s.x v12, a0
151+ ; V-NEXT: li a0, 170
152+ ; V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
153+ ; V-NEXT: vadd.vv v20, v16, v16
154+ ; V-NEXT: vmv.s.x v21, a0
155+ ; V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
156+ ; V-NEXT: vcompress.vm v16, v8, v12
157+ ; V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
158+ ; V-NEXT: vadd.vi v22, v20, -8
159+ ; V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
160+ ; V-NEXT: vcompress.vm v12, v8, v21
161+ ; V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
162+ ; V-NEXT: vadd.vi v8, v20, -7
163+ ; V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
164+ ; V-NEXT: vrgatherei16.vv v16, v24, v22, v0.t
165+ ; V-NEXT: vrgatherei16.vv v12, v24, v8, v0.t
166+ ; V-NEXT: vmv.v.v v8, v16
167+ ; V-NEXT: ret
168+ ;
169+ ; ZIP-LABEL: vector_deinterleave_v8i64_v16i64:
170+ ; ZIP: # %bb.0:
171+ ; ZIP-NEXT: vsetivli zero, 8, e64, m4, ta, ma
172+ ; ZIP-NEXT: ri.vunzip2a.vv v16, v8, v12
173+ ; ZIP-NEXT: ri.vunzip2b.vv v20, v8, v12
174+ ; ZIP-NEXT: vmv.v.v v8, v16
175+ ; ZIP-NEXT: vmv.v.v v12, v20
176+ ; ZIP-NEXT: ret
188177 %retval = call {<8 x i64 >, <8 x i64 >} @llvm.vector.deinterleave2.v16i64 (<16 x i64 > %vec )
189178 ret {<8 x i64 >, <8 x i64 >} %retval
190179}
@@ -498,11 +487,11 @@ define {<2 x double>, <2 x double>} @vector_deinterleave_v2f64_v4f64(<4 x double
498487;
499488; ZIP-LABEL: vector_deinterleave_v2f64_v4f64:
500489; ZIP: # %bb.0:
501- ; ZIP-NEXT: vsetivli zero, 2, e64, m2, ta, ma
502- ; ZIP-NEXT: vslidedown.vi v10, v8, 2
503490; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
504- ; ZIP-NEXT: ri.vzipodd.vv v9, v8, v10
505- ; ZIP-NEXT: vslideup.vi v8, v10, 1
491+ ; ZIP-NEXT: ri.vunzip2a.vv v10, v8, v9
492+ ; ZIP-NEXT: ri.vunzip2b.vv v12, v8, v9
493+ ; ZIP-NEXT: vmv.v.v v8, v10
494+ ; ZIP-NEXT: vmv.v.v v9, v12
506495; ZIP-NEXT: ret
507496%retval = call {<2 x double >, <2 x double >} @llvm.vector.deinterleave2.v4f64 (<4 x double > %vec )
508497ret {<2 x double >, <2 x double >} %retval
@@ -541,31 +530,11 @@ define {<4 x double>, <4 x double>} @vector_deinterleave_v4f64_v8f64(<8 x double
541530;
542531; ZIP-LABEL: vector_deinterleave_v4f64_v8f64:
543532; ZIP: # %bb.0:
544- ; ZIP-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
545- ; ZIP-NEXT: vmv.v.i v0, 8
546- ; ZIP-NEXT: vsetivli zero, 4, e64, m4, ta, ma
547- ; ZIP-NEXT: vslidedown.vi v16, v8, 4
548- ; ZIP-NEXT: vsetivli zero, 2, e64, m2, ta, ma
549- ; ZIP-NEXT: vslidedown.vi v12, v8, 2
550- ; ZIP-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
551- ; ZIP-NEXT: vmv.v.i v10, 12
552- ; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, mu
553- ; ZIP-NEXT: vslideup.vi v14, v16, 2
554- ; ZIP-NEXT: vslideup.vi v14, v16, 1, v0.t
555- ; ZIP-NEXT: ri.vzip2a.vv v18, v8, v12
556- ; ZIP-NEXT: vmv1r.v v0, v10
557- ; ZIP-NEXT: vmerge.vvm v12, v18, v14, v0
558- ; ZIP-NEXT: vsetivli zero, 2, e64, m2, ta, ma
559- ; ZIP-NEXT: vslidedown.vi v14, v16, 2
560- ; ZIP-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
561- ; ZIP-NEXT: vmv.v.i v0, 2
562- ; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, mu
563- ; ZIP-NEXT: ri.vzip2a.vv v18, v16, v14
564- ; ZIP-NEXT: vslidedown.vi v14, v8, 1
565- ; ZIP-NEXT: vslidedown.vi v14, v8, 2, v0.t
566- ; ZIP-NEXT: vmv1r.v v0, v10
567- ; ZIP-NEXT: vmerge.vvm v10, v14, v18, v0
568- ; ZIP-NEXT: vmv2r.v v8, v12
533+ ; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
534+ ; ZIP-NEXT: ri.vunzip2a.vv v12, v8, v10
535+ ; ZIP-NEXT: ri.vunzip2b.vv v16, v8, v10
536+ ; ZIP-NEXT: vmv.v.v v8, v12
537+ ; ZIP-NEXT: vmv.v.v v10, v16
569538; ZIP-NEXT: ret
570539%retval = call {<4 x double >, <4 x double >} @llvm.vector.deinterleave2.v8f64 (<8 x double > %vec )
571540ret {<4 x double >, <4 x double >} %retval
0 commit comments