@@ -84,7 +84,7 @@ define void @load_i64_stride3_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
8484;
8585; AVX512-LABEL: load_i64_stride3_vf2:
8686; AVX512: # %bb.0:
87- ; AVX512-NEXT: vpermpd {{.*#+}} zmm0 = mem[0,3,2,3,4,7,6,7 ]
87+ ; AVX512-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,3,2,3]
8888; AVX512-NEXT: vmovdqa 32(%rdi), %xmm1
8989; AVX512-NEXT: vpalignr {{.*#+}} xmm2 = mem[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
9090; AVX512-NEXT: vpblendd {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
@@ -97,9 +97,8 @@ define void @load_i64_stride3_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
9797; AVX512-FCP-LABEL: load_i64_stride3_vf2:
9898; AVX512-FCP: # %bb.0:
9999; AVX512-FCP-NEXT: vpmovsxbq {{.*#+}} xmm0 = [1,4]
100- ; AVX512-FCP-NEXT: vmovaps (%rdi), %zmm1
101- ; AVX512-FCP-NEXT: vpermpd %zmm1, %zmm0, %zmm0
102- ; AVX512-FCP-NEXT: vpermpd {{.*#+}} zmm1 = zmm1[0,3,2,3,4,7,6,7]
100+ ; AVX512-FCP-NEXT: vpermpd (%rdi), %zmm0, %zmm0
101+ ; AVX512-FCP-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,3,2,3]
103102; AVX512-FCP-NEXT: vmovaps 16(%rdi), %xmm2
104103; AVX512-FCP-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0,1],mem[2,3]
105104; AVX512-FCP-NEXT: vmovaps %xmm1, (%rsi)
@@ -110,7 +109,7 @@ define void @load_i64_stride3_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
110109;
111110; AVX512DQ-LABEL: load_i64_stride3_vf2:
112111; AVX512DQ: # %bb.0:
113- ; AVX512DQ-NEXT: vpermpd {{.*#+}} zmm0 = mem[0,3,2,3,4,7,6,7 ]
112+ ; AVX512DQ-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,3,2,3]
114113; AVX512DQ-NEXT: vmovdqa 32(%rdi), %xmm1
115114; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm2 = mem[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
116115; AVX512DQ-NEXT: vpblendd {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
@@ -123,9 +122,8 @@ define void @load_i64_stride3_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
123122; AVX512DQ-FCP-LABEL: load_i64_stride3_vf2:
124123; AVX512DQ-FCP: # %bb.0:
125124; AVX512DQ-FCP-NEXT: vpmovsxbq {{.*#+}} xmm0 = [1,4]
126- ; AVX512DQ-FCP-NEXT: vmovaps (%rdi), %zmm1
127- ; AVX512DQ-FCP-NEXT: vpermpd %zmm1, %zmm0, %zmm0
128- ; AVX512DQ-FCP-NEXT: vpermpd {{.*#+}} zmm1 = zmm1[0,3,2,3,4,7,6,7]
125+ ; AVX512DQ-FCP-NEXT: vpermpd (%rdi), %zmm0, %zmm0
126+ ; AVX512DQ-FCP-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,3,2,3]
129127; AVX512DQ-FCP-NEXT: vmovaps 16(%rdi), %xmm2
130128; AVX512DQ-FCP-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0,1],mem[2,3]
131129; AVX512DQ-FCP-NEXT: vmovaps %xmm1, (%rsi)
@@ -136,7 +134,7 @@ define void @load_i64_stride3_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
136134;
137135; AVX512BW-LABEL: load_i64_stride3_vf2:
138136; AVX512BW: # %bb.0:
139- ; AVX512BW-NEXT: vpermpd {{.*#+}} zmm0 = mem[0,3,2,3,4,7,6,7 ]
137+ ; AVX512BW-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,3,2,3]
140138; AVX512BW-NEXT: vmovdqa 32(%rdi), %xmm1
141139; AVX512BW-NEXT: vpalignr {{.*#+}} xmm2 = mem[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
142140; AVX512BW-NEXT: vpblendd {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
@@ -149,9 +147,8 @@ define void @load_i64_stride3_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
149147; AVX512BW-FCP-LABEL: load_i64_stride3_vf2:
150148; AVX512BW-FCP: # %bb.0:
151149; AVX512BW-FCP-NEXT: vpmovsxbq {{.*#+}} xmm0 = [1,4]
152- ; AVX512BW-FCP-NEXT: vmovaps (%rdi), %zmm1
153- ; AVX512BW-FCP-NEXT: vpermpd %zmm1, %zmm0, %zmm0
154- ; AVX512BW-FCP-NEXT: vpermpd {{.*#+}} zmm1 = zmm1[0,3,2,3,4,7,6,7]
150+ ; AVX512BW-FCP-NEXT: vpermpd (%rdi), %zmm0, %zmm0
151+ ; AVX512BW-FCP-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,3,2,3]
155152; AVX512BW-FCP-NEXT: vmovaps 16(%rdi), %xmm2
156153; AVX512BW-FCP-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0,1],mem[2,3]
157154; AVX512BW-FCP-NEXT: vmovaps %xmm1, (%rsi)
@@ -162,7 +159,7 @@ define void @load_i64_stride3_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
162159;
163160; AVX512DQ-BW-LABEL: load_i64_stride3_vf2:
164161; AVX512DQ-BW: # %bb.0:
165- ; AVX512DQ-BW-NEXT: vpermpd {{.*#+}} zmm0 = mem[0,3,2,3,4,7,6,7 ]
162+ ; AVX512DQ-BW-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,3,2,3]
166163; AVX512DQ-BW-NEXT: vmovdqa 32(%rdi), %xmm1
167164; AVX512DQ-BW-NEXT: vpalignr {{.*#+}} xmm2 = mem[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
168165; AVX512DQ-BW-NEXT: vpblendd {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
@@ -175,9 +172,8 @@ define void @load_i64_stride3_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
175172; AVX512DQ-BW-FCP-LABEL: load_i64_stride3_vf2:
176173; AVX512DQ-BW-FCP: # %bb.0:
177174; AVX512DQ-BW-FCP-NEXT: vpmovsxbq {{.*#+}} xmm0 = [1,4]
178- ; AVX512DQ-BW-FCP-NEXT: vmovaps (%rdi), %zmm1
179- ; AVX512DQ-BW-FCP-NEXT: vpermpd %zmm1, %zmm0, %zmm0
180- ; AVX512DQ-BW-FCP-NEXT: vpermpd {{.*#+}} zmm1 = zmm1[0,3,2,3,4,7,6,7]
175+ ; AVX512DQ-BW-FCP-NEXT: vpermpd (%rdi), %zmm0, %zmm0
176+ ; AVX512DQ-BW-FCP-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,3,2,3]
181177; AVX512DQ-BW-FCP-NEXT: vmovaps 16(%rdi), %xmm2
182178; AVX512DQ-BW-FCP-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0,1],mem[2,3]
183179; AVX512DQ-BW-FCP-NEXT: vmovaps %xmm1, (%rsi)
0 commit comments