@@ -79,24 +79,24 @@ define void @store_i64_stride7_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
7979; AVX2-NEXT: vmovaps (%r8), %xmm2
8080; AVX2-NEXT: vmovaps (%r9), %xmm3
8181; AVX2-NEXT: vmovaps (%r10), %xmm4
82- ; AVX2-NEXT: vinsertf128 $1, (%rsi), %ymm0, %ymm0
82+ ; AVX2-NEXT: vinsertf128 $1, (%rsi), %ymm0, %ymm5
8383; AVX2-NEXT: vinsertf128 $1, (%rcx), %ymm1, %ymm1
84- ; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm5
85- ; AVX2-NEXT: vblendps {{.*#+}} ymm5 = ymm5 [0,1],ymm0[2,3],ymm5 [4,5,6,7]
86- ; AVX2-NEXT: vpermpd {{.*#+}} ymm5 = ymm5 [0,2,2,1]
84+ ; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm6
85+ ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm6 [0,1],ymm0[2,3],ymm6 [4,5,6,7]
86+ ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0 [0,2,2,1]
8787; AVX2-NEXT: vbroadcastsd %xmm4, %ymm6
88- ; AVX2-NEXT: vblendps {{.*#+}} ymm5 = ymm5 [0,1,2,3],ymm6[4,5],ymm5 [6,7]
89- ; AVX2-NEXT: vunpckhpd {{.*#+}} ymm6 = ymm0 [1],ymm1[1],ymm0 [3],ymm1[3]
88+ ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0 [0,1,2,3],ymm6[4,5],ymm0 [6,7]
89+ ; AVX2-NEXT: vunpckhpd {{.*#+}} ymm6 = ymm5 [1],ymm1[1],ymm5 [3],ymm1[3]
9090; AVX2-NEXT: vpermpd {{.*#+}} ymm6 = ymm6[2,1,3,3]
9191; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm2
9292; AVX2-NEXT: vblendps {{.*#+}} ymm2 = ymm6[0,1,2,3,4,5],ymm2[6,7]
93- ; AVX2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0 [0],ymm1[0],ymm0 [2],ymm1[2]
94- ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0 [0,2,1,3]
95- ; AVX2-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm3[1],xmm4[1]
96- ; AVX2-NEXT: vmovaps %xmm1 , 96(%rax)
97- ; AVX2-NEXT: vmovaps %ymm0 , (%rax)
93+ ; AVX2-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm5 [0],ymm1[0],ymm5 [2],ymm1[2]
94+ ; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1 [0,2,1,3]
95+ ; AVX2-NEXT: vunpckhpd {{.*#+}} xmm3 = xmm3[1],xmm4[1]
96+ ; AVX2-NEXT: vmovaps %xmm3 , 96(%rax)
97+ ; AVX2-NEXT: vmovaps %ymm1 , (%rax)
9898; AVX2-NEXT: vmovaps %ymm2, 64(%rax)
99- ; AVX2-NEXT: vmovaps %ymm5 , 32(%rax)
99+ ; AVX2-NEXT: vmovaps %ymm0 , 32(%rax)
100100; AVX2-NEXT: vzeroupper
101101; AVX2-NEXT: retq
102102;
@@ -109,24 +109,24 @@ define void @store_i64_stride7_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
109109; AVX2-FP-NEXT: vmovaps (%r8), %xmm2
110110; AVX2-FP-NEXT: vmovaps (%r9), %xmm3
111111; AVX2-FP-NEXT: vmovaps (%r10), %xmm4
112- ; AVX2-FP-NEXT: vinsertf128 $1, (%rsi), %ymm0, %ymm0
112+ ; AVX2-FP-NEXT: vinsertf128 $1, (%rsi), %ymm0, %ymm5
113113; AVX2-FP-NEXT: vinsertf128 $1, (%rcx), %ymm1, %ymm1
114- ; AVX2-FP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm5
115- ; AVX2-FP-NEXT: vblendps {{.*#+}} ymm5 = ymm5 [0,1],ymm0[2,3],ymm5 [4,5,6,7]
116- ; AVX2-FP-NEXT: vpermpd {{.*#+}} ymm5 = ymm5 [0,2,2,1]
114+ ; AVX2-FP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm6
115+ ; AVX2-FP-NEXT: vblendps {{.*#+}} ymm0 = ymm6 [0,1],ymm0[2,3],ymm6 [4,5,6,7]
116+ ; AVX2-FP-NEXT: vpermpd {{.*#+}} ymm0 = ymm0 [0,2,2,1]
117117; AVX2-FP-NEXT: vbroadcastsd %xmm4, %ymm6
118- ; AVX2-FP-NEXT: vblendps {{.*#+}} ymm5 = ymm5 [0,1,2,3],ymm6[4,5],ymm5 [6,7]
119- ; AVX2-FP-NEXT: vunpckhpd {{.*#+}} ymm6 = ymm0 [1],ymm1[1],ymm0 [3],ymm1[3]
118+ ; AVX2-FP-NEXT: vblendps {{.*#+}} ymm0 = ymm0 [0,1,2,3],ymm6[4,5],ymm0 [6,7]
119+ ; AVX2-FP-NEXT: vunpckhpd {{.*#+}} ymm6 = ymm5 [1],ymm1[1],ymm5 [3],ymm1[3]
120120; AVX2-FP-NEXT: vpermpd {{.*#+}} ymm6 = ymm6[2,1,3,3]
121121; AVX2-FP-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm2
122122; AVX2-FP-NEXT: vblendps {{.*#+}} ymm2 = ymm6[0,1,2,3,4,5],ymm2[6,7]
123- ; AVX2-FP-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0 [0],ymm1[0],ymm0 [2],ymm1[2]
124- ; AVX2-FP-NEXT: vpermpd {{.*#+}} ymm0 = ymm0 [0,2,1,3]
125- ; AVX2-FP-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm3[1],xmm4[1]
126- ; AVX2-FP-NEXT: vmovaps %xmm1 , 96(%rax)
127- ; AVX2-FP-NEXT: vmovaps %ymm0 , (%rax)
123+ ; AVX2-FP-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm5 [0],ymm1[0],ymm5 [2],ymm1[2]
124+ ; AVX2-FP-NEXT: vpermpd {{.*#+}} ymm1 = ymm1 [0,2,1,3]
125+ ; AVX2-FP-NEXT: vunpckhpd {{.*#+}} xmm3 = xmm3[1],xmm4[1]
126+ ; AVX2-FP-NEXT: vmovaps %xmm3 , 96(%rax)
127+ ; AVX2-FP-NEXT: vmovaps %ymm1 , (%rax)
128128; AVX2-FP-NEXT: vmovaps %ymm2, 64(%rax)
129- ; AVX2-FP-NEXT: vmovaps %ymm5 , 32(%rax)
129+ ; AVX2-FP-NEXT: vmovaps %ymm0 , 32(%rax)
130130; AVX2-FP-NEXT: vzeroupper
131131; AVX2-FP-NEXT: retq
132132;
@@ -139,24 +139,24 @@ define void @store_i64_stride7_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
139139; AVX2-FCP-NEXT: vmovaps (%r8), %xmm2
140140; AVX2-FCP-NEXT: vmovaps (%r9), %xmm3
141141; AVX2-FCP-NEXT: vmovaps (%r10), %xmm4
142- ; AVX2-FCP-NEXT: vinsertf128 $1, (%rsi), %ymm0, %ymm0
142+ ; AVX2-FCP-NEXT: vinsertf128 $1, (%rsi), %ymm0, %ymm5
143143; AVX2-FCP-NEXT: vinsertf128 $1, (%rcx), %ymm1, %ymm1
144- ; AVX2-FCP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm5
145- ; AVX2-FCP-NEXT: vblendps {{.*#+}} ymm5 = ymm5 [0,1],ymm0[2,3],ymm5 [4,5,6,7]
146- ; AVX2-FCP-NEXT: vpermpd {{.*#+}} ymm5 = ymm5 [0,2,2,1]
144+ ; AVX2-FCP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm6
145+ ; AVX2-FCP-NEXT: vblendps {{.*#+}} ymm0 = ymm6 [0,1],ymm0[2,3],ymm6 [4,5,6,7]
146+ ; AVX2-FCP-NEXT: vpermpd {{.*#+}} ymm0 = ymm0 [0,2,2,1]
147147; AVX2-FCP-NEXT: vbroadcastsd %xmm4, %ymm6
148- ; AVX2-FCP-NEXT: vblendps {{.*#+}} ymm5 = ymm5 [0,1,2,3],ymm6[4,5],ymm5 [6,7]
149- ; AVX2-FCP-NEXT: vunpckhpd {{.*#+}} ymm6 = ymm0 [1],ymm1[1],ymm0 [3],ymm1[3]
148+ ; AVX2-FCP-NEXT: vblendps {{.*#+}} ymm0 = ymm0 [0,1,2,3],ymm6[4,5],ymm0 [6,7]
149+ ; AVX2-FCP-NEXT: vunpckhpd {{.*#+}} ymm6 = ymm5 [1],ymm1[1],ymm5 [3],ymm1[3]
150150; AVX2-FCP-NEXT: vpermpd {{.*#+}} ymm6 = ymm6[2,1,3,3]
151151; AVX2-FCP-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm2
152152; AVX2-FCP-NEXT: vblendps {{.*#+}} ymm2 = ymm6[0,1,2,3,4,5],ymm2[6,7]
153- ; AVX2-FCP-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0 [0],ymm1[0],ymm0 [2],ymm1[2]
154- ; AVX2-FCP-NEXT: vpermpd {{.*#+}} ymm0 = ymm0 [0,2,1,3]
155- ; AVX2-FCP-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm3[1],xmm4[1]
156- ; AVX2-FCP-NEXT: vmovaps %xmm1 , 96(%rax)
157- ; AVX2-FCP-NEXT: vmovaps %ymm0 , (%rax)
153+ ; AVX2-FCP-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm5 [0],ymm1[0],ymm5 [2],ymm1[2]
154+ ; AVX2-FCP-NEXT: vpermpd {{.*#+}} ymm1 = ymm1 [0,2,1,3]
155+ ; AVX2-FCP-NEXT: vunpckhpd {{.*#+}} xmm3 = xmm3[1],xmm4[1]
156+ ; AVX2-FCP-NEXT: vmovaps %xmm3 , 96(%rax)
157+ ; AVX2-FCP-NEXT: vmovaps %ymm1 , (%rax)
158158; AVX2-FCP-NEXT: vmovaps %ymm2, 64(%rax)
159- ; AVX2-FCP-NEXT: vmovaps %ymm5 , 32(%rax)
159+ ; AVX2-FCP-NEXT: vmovaps %ymm0 , 32(%rax)
160160; AVX2-FCP-NEXT: vzeroupper
161161; AVX2-FCP-NEXT: retq
162162;
0 commit comments