@@ -138,13 +138,13 @@ define void @store_i64_stride6_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
138138; AVX512-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
139139; AVX512-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
140140; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
141- ; AVX512-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm1
142- ; AVX512-NEXT: vpmovsxbq {{.*#+}} ymm2 = [5,7 ,9,11]
143- ; AVX512-NEXT: vpermi2q %zmm1 , %zmm0 , %zmm2
144- ; AVX512-NEXT: vpmovsxbq {{.*#+}} zmm3 = [0,2,4,6,8,10,1,3]
145- ; AVX512-NEXT: vpermi2q %zmm1 , %zmm0, %zmm3
146- ; AVX512-NEXT: vmovdqa64 %zmm3, (%rax)
147- ; AVX512-NEXT: vmovdqa %ymm2, 64 (%rax)
141+ ; AVX512-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm2
142+ ; AVX512-NEXT: vpmovsxbq {{.*#+}} ymm3 = [1,3 ,9,11]
143+ ; AVX512-NEXT: vpermi2q %zmm2 , %zmm1 , %zmm3
144+ ; AVX512-NEXT: vpmovsxbq {{.*#+}} zmm1 = [0,2,4,6,8,10,1,3]
145+ ; AVX512-NEXT: vpermi2q %zmm2 , %zmm0, %zmm1
146+ ; AVX512-NEXT: vmovdqa %ymm3, 64 (%rax)
147+ ; AVX512-NEXT: vmovdqa64 %zmm1, (%rax)
148148; AVX512-NEXT: vzeroupper
149149; AVX512-NEXT: retq
150150;
@@ -157,13 +157,13 @@ define void @store_i64_stride6_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
157157; AVX512-FCP-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
158158; AVX512-FCP-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
159159; AVX512-FCP-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
160- ; AVX512-FCP-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm1
161- ; AVX512-FCP-NEXT: vpmovsxbq {{.*#+}} ymm2 = [5,7 ,9,11]
162- ; AVX512-FCP-NEXT: vpermi2q %zmm1 , %zmm0 , %zmm2
163- ; AVX512-FCP-NEXT: vpmovsxbq {{.*#+}} zmm3 = [0,2,4,6,8,10,1,3]
164- ; AVX512-FCP-NEXT: vpermi2q %zmm1 , %zmm0, %zmm3
165- ; AVX512-FCP-NEXT: vmovdqa64 %zmm3, (%rax)
166- ; AVX512-FCP-NEXT: vmovdqa %ymm2, 64 (%rax)
160+ ; AVX512-FCP-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm2
161+ ; AVX512-FCP-NEXT: vpmovsxbq {{.*#+}} ymm3 = [1,3 ,9,11]
162+ ; AVX512-FCP-NEXT: vpermi2q %zmm2 , %zmm1 , %zmm3
163+ ; AVX512-FCP-NEXT: vpmovsxbq {{.*#+}} zmm1 = [0,2,4,6,8,10,1,3]
164+ ; AVX512-FCP-NEXT: vpermi2q %zmm2 , %zmm0, %zmm1
165+ ; AVX512-FCP-NEXT: vmovdqa %ymm3, 64 (%rax)
166+ ; AVX512-FCP-NEXT: vmovdqa64 %zmm1, (%rax)
167167; AVX512-FCP-NEXT: vzeroupper
168168; AVX512-FCP-NEXT: retq
169169;
@@ -176,13 +176,13 @@ define void @store_i64_stride6_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
176176; AVX512DQ-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
177177; AVX512DQ-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
178178; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
179- ; AVX512DQ-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm1
180- ; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} ymm2 = [5,7 ,9,11]
181- ; AVX512DQ-NEXT: vpermi2q %zmm1 , %zmm0 , %zmm2
182- ; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} zmm3 = [0,2,4,6,8,10,1,3]
183- ; AVX512DQ-NEXT: vpermi2q %zmm1 , %zmm0, %zmm3
184- ; AVX512DQ-NEXT: vmovdqa64 %zmm3, (%rax)
185- ; AVX512DQ-NEXT: vmovdqa %ymm2, 64 (%rax)
179+ ; AVX512DQ-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm2
180+ ; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} ymm3 = [1,3 ,9,11]
181+ ; AVX512DQ-NEXT: vpermi2q %zmm2 , %zmm1 , %zmm3
182+ ; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} zmm1 = [0,2,4,6,8,10,1,3]
183+ ; AVX512DQ-NEXT: vpermi2q %zmm2 , %zmm0, %zmm1
184+ ; AVX512DQ-NEXT: vmovdqa %ymm3, 64 (%rax)
185+ ; AVX512DQ-NEXT: vmovdqa64 %zmm1, (%rax)
186186; AVX512DQ-NEXT: vzeroupper
187187; AVX512DQ-NEXT: retq
188188;
@@ -195,13 +195,13 @@ define void @store_i64_stride6_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
195195; AVX512DQ-FCP-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
196196; AVX512DQ-FCP-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
197197; AVX512DQ-FCP-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
198- ; AVX512DQ-FCP-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm1
199- ; AVX512DQ-FCP-NEXT: vpmovsxbq {{.*#+}} ymm2 = [5,7 ,9,11]
200- ; AVX512DQ-FCP-NEXT: vpermi2q %zmm1 , %zmm0 , %zmm2
201- ; AVX512DQ-FCP-NEXT: vpmovsxbq {{.*#+}} zmm3 = [0,2,4,6,8,10,1,3]
202- ; AVX512DQ-FCP-NEXT: vpermi2q %zmm1 , %zmm0, %zmm3
203- ; AVX512DQ-FCP-NEXT: vmovdqa64 %zmm3, (%rax)
204- ; AVX512DQ-FCP-NEXT: vmovdqa %ymm2, 64 (%rax)
198+ ; AVX512DQ-FCP-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm2
199+ ; AVX512DQ-FCP-NEXT: vpmovsxbq {{.*#+}} ymm3 = [1,3 ,9,11]
200+ ; AVX512DQ-FCP-NEXT: vpermi2q %zmm2 , %zmm1 , %zmm3
201+ ; AVX512DQ-FCP-NEXT: vpmovsxbq {{.*#+}} zmm1 = [0,2,4,6,8,10,1,3]
202+ ; AVX512DQ-FCP-NEXT: vpermi2q %zmm2 , %zmm0, %zmm1
203+ ; AVX512DQ-FCP-NEXT: vmovdqa %ymm3, 64 (%rax)
204+ ; AVX512DQ-FCP-NEXT: vmovdqa64 %zmm1, (%rax)
205205; AVX512DQ-FCP-NEXT: vzeroupper
206206; AVX512DQ-FCP-NEXT: retq
207207;
@@ -214,13 +214,13 @@ define void @store_i64_stride6_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
214214; AVX512BW-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
215215; AVX512BW-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
216216; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
217- ; AVX512BW-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm1
218- ; AVX512BW-NEXT: vpmovsxbq {{.*#+}} ymm2 = [5,7 ,9,11]
219- ; AVX512BW-NEXT: vpermi2q %zmm1 , %zmm0 , %zmm2
220- ; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm3 = [0,2,4,6,8,10,1,3]
221- ; AVX512BW-NEXT: vpermi2q %zmm1 , %zmm0, %zmm3
222- ; AVX512BW-NEXT: vmovdqa64 %zmm3, (%rax)
223- ; AVX512BW-NEXT: vmovdqa %ymm2, 64 (%rax)
217+ ; AVX512BW-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm2
218+ ; AVX512BW-NEXT: vpmovsxbq {{.*#+}} ymm3 = [1,3 ,9,11]
219+ ; AVX512BW-NEXT: vpermi2q %zmm2 , %zmm1 , %zmm3
220+ ; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm1 = [0,2,4,6,8,10,1,3]
221+ ; AVX512BW-NEXT: vpermi2q %zmm2 , %zmm0, %zmm1
222+ ; AVX512BW-NEXT: vmovdqa %ymm3, 64 (%rax)
223+ ; AVX512BW-NEXT: vmovdqa64 %zmm1, (%rax)
224224; AVX512BW-NEXT: vzeroupper
225225; AVX512BW-NEXT: retq
226226;
@@ -233,13 +233,13 @@ define void @store_i64_stride6_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
233233; AVX512BW-FCP-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
234234; AVX512BW-FCP-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
235235; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
236- ; AVX512BW-FCP-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm1
237- ; AVX512BW-FCP-NEXT: vpmovsxbq {{.*#+}} ymm2 = [5,7 ,9,11]
238- ; AVX512BW-FCP-NEXT: vpermi2q %zmm1 , %zmm0 , %zmm2
239- ; AVX512BW-FCP-NEXT: vpmovsxbq {{.*#+}} zmm3 = [0,2,4,6,8,10,1,3]
240- ; AVX512BW-FCP-NEXT: vpermi2q %zmm1 , %zmm0, %zmm3
241- ; AVX512BW-FCP-NEXT: vmovdqa64 %zmm3, (%rax)
242- ; AVX512BW-FCP-NEXT: vmovdqa %ymm2, 64 (%rax)
236+ ; AVX512BW-FCP-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm2
237+ ; AVX512BW-FCP-NEXT: vpmovsxbq {{.*#+}} ymm3 = [1,3 ,9,11]
238+ ; AVX512BW-FCP-NEXT: vpermi2q %zmm2 , %zmm1 , %zmm3
239+ ; AVX512BW-FCP-NEXT: vpmovsxbq {{.*#+}} zmm1 = [0,2,4,6,8,10,1,3]
240+ ; AVX512BW-FCP-NEXT: vpermi2q %zmm2 , %zmm0, %zmm1
241+ ; AVX512BW-FCP-NEXT: vmovdqa %ymm3, 64 (%rax)
242+ ; AVX512BW-FCP-NEXT: vmovdqa64 %zmm1, (%rax)
243243; AVX512BW-FCP-NEXT: vzeroupper
244244; AVX512BW-FCP-NEXT: retq
245245;
@@ -252,13 +252,13 @@ define void @store_i64_stride6_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
252252; AVX512DQ-BW-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
253253; AVX512DQ-BW-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
254254; AVX512DQ-BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
255- ; AVX512DQ-BW-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm1
256- ; AVX512DQ-BW-NEXT: vpmovsxbq {{.*#+}} ymm2 = [5,7 ,9,11]
257- ; AVX512DQ-BW-NEXT: vpermi2q %zmm1 , %zmm0 , %zmm2
258- ; AVX512DQ-BW-NEXT: vpmovsxbq {{.*#+}} zmm3 = [0,2,4,6,8,10,1,3]
259- ; AVX512DQ-BW-NEXT: vpermi2q %zmm1 , %zmm0, %zmm3
260- ; AVX512DQ-BW-NEXT: vmovdqa64 %zmm3, (%rax)
261- ; AVX512DQ-BW-NEXT: vmovdqa %ymm2, 64 (%rax)
255+ ; AVX512DQ-BW-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm2
256+ ; AVX512DQ-BW-NEXT: vpmovsxbq {{.*#+}} ymm3 = [1,3 ,9,11]
257+ ; AVX512DQ-BW-NEXT: vpermi2q %zmm2 , %zmm1 , %zmm3
258+ ; AVX512DQ-BW-NEXT: vpmovsxbq {{.*#+}} zmm1 = [0,2,4,6,8,10,1,3]
259+ ; AVX512DQ-BW-NEXT: vpermi2q %zmm2 , %zmm0, %zmm1
260+ ; AVX512DQ-BW-NEXT: vmovdqa %ymm3, 64 (%rax)
261+ ; AVX512DQ-BW-NEXT: vmovdqa64 %zmm1, (%rax)
262262; AVX512DQ-BW-NEXT: vzeroupper
263263; AVX512DQ-BW-NEXT: retq
264264;
@@ -271,13 +271,13 @@ define void @store_i64_stride6_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
271271; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1
272272; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
273273; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
274- ; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm1
275- ; AVX512DQ-BW-FCP-NEXT: vpmovsxbq {{.*#+}} ymm2 = [5,7 ,9,11]
276- ; AVX512DQ-BW-FCP-NEXT: vpermi2q %zmm1 , %zmm0 , %zmm2
277- ; AVX512DQ-BW-FCP-NEXT: vpmovsxbq {{.*#+}} zmm3 = [0,2,4,6,8,10,1,3]
278- ; AVX512DQ-BW-FCP-NEXT: vpermi2q %zmm1 , %zmm0, %zmm3
279- ; AVX512DQ-BW-FCP-NEXT: vmovdqa64 %zmm3, (%rax)
280- ; AVX512DQ-BW-FCP-NEXT: vmovdqa %ymm2, 64 (%rax)
274+ ; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm2
275+ ; AVX512DQ-BW-FCP-NEXT: vpmovsxbq {{.*#+}} ymm3 = [1,3 ,9,11]
276+ ; AVX512DQ-BW-FCP-NEXT: vpermi2q %zmm2 , %zmm1 , %zmm3
277+ ; AVX512DQ-BW-FCP-NEXT: vpmovsxbq {{.*#+}} zmm1 = [0,2,4,6,8,10,1,3]
278+ ; AVX512DQ-BW-FCP-NEXT: vpermi2q %zmm2 , %zmm0, %zmm1
279+ ; AVX512DQ-BW-FCP-NEXT: vmovdqa %ymm3, 64 (%rax)
280+ ; AVX512DQ-BW-FCP-NEXT: vmovdqa64 %zmm1, (%rax)
281281; AVX512DQ-BW-FCP-NEXT: vzeroupper
282282; AVX512DQ-BW-FCP-NEXT: retq
283283 %in.vec0 = load <2 x i64>, ptr %in.vecptr0, align 64
0 commit comments