66define <8 x i16 > @zext_8x8mem_to_8x16 (ptr %i , <8 x i1 > %mask ) nounwind readnone {
77; KNL-LABEL: zext_8x8mem_to_8x16:
88; KNL: # %bb.0:
9- ; KNL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
9+ ; KNL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
10+ ; KNL-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
1011; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
1112; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
1213; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
@@ -21,7 +22,8 @@ define <8 x i16> @zext_8x8mem_to_8x16(ptr%i , <8 x i1> %mask) nounwind readnone
2122;
2223; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x16:
2324; AVX512DQNOBW: # %bb.0:
24- ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
25+ ; AVX512DQNOBW-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
26+ ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
2527; AVX512DQNOBW-NEXT: vpsllw $15, %xmm0, %xmm0
2628; AVX512DQNOBW-NEXT: vpsraw $15, %xmm0, %xmm0
2729; AVX512DQNOBW-NEXT: vpand %xmm1, %xmm0, %xmm0
@@ -35,7 +37,8 @@ define <8 x i16> @zext_8x8mem_to_8x16(ptr%i , <8 x i1> %mask) nounwind readnone
3537define <8 x i16 > @sext_8x8mem_to_8x16 (ptr %i , <8 x i1 > %mask ) nounwind readnone {
3638; KNL-LABEL: sext_8x8mem_to_8x16:
3739; KNL: # %bb.0:
38- ; KNL-NEXT: vpmovsxbw (%rdi), %xmm1
40+ ; KNL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
41+ ; KNL-NEXT: vpmovsxbw %xmm1, %xmm1
3942; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
4043; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
4144; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
@@ -50,7 +53,8 @@ define <8 x i16> @sext_8x8mem_to_8x16(ptr%i , <8 x i1> %mask) nounwind readnone
5053;
5154; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x16:
5255; AVX512DQNOBW: # %bb.0:
53- ; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %xmm1
56+ ; AVX512DQNOBW-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
57+ ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm1, %xmm1
5458; AVX512DQNOBW-NEXT: vpsllw $15, %xmm0, %xmm0
5559; AVX512DQNOBW-NEXT: vpsraw $15, %xmm0, %xmm0
5660; AVX512DQNOBW-NEXT: vpand %xmm1, %xmm0, %xmm0
@@ -208,8 +212,10 @@ define <32 x i16> @zext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readn
208212; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
209213; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
210214; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
211- ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
212- ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
215+ ; KNL-NEXT: vmovdqu (%rdi), %ymm2
216+ ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
217+ ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2
218+ ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
213219; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
214220; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
215221; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
@@ -231,8 +237,10 @@ define <32 x i16> @zext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readn
231237; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1
232238; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
233239; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
234- ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
235- ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
240+ ; AVX512DQNOBW-NEXT: vmovdqu (%rdi), %ymm2
241+ ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
242+ ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm2, %xmm2
243+ ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
236244; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
237245; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
238246; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
@@ -253,8 +261,10 @@ define <32 x i16> @sext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readn
253261; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
254262; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
255263; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
256- ; KNL-NEXT: vpmovsxbw 16(%rdi), %ymm2
257- ; KNL-NEXT: vpmovsxbw (%rdi), %ymm3
264+ ; KNL-NEXT: vmovdqu (%rdi), %ymm2
265+ ; KNL-NEXT: vpmovsxbw %xmm2, %ymm3
266+ ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2
267+ ; KNL-NEXT: vpmovsxbw %xmm2, %ymm2
258268; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
259269; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
260270; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
@@ -276,8 +286,10 @@ define <32 x i16> @sext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readn
276286; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1
277287; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
278288; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
279- ; AVX512DQNOBW-NEXT: vpmovsxbw 16(%rdi), %ymm2
280- ; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %ymm3
289+ ; AVX512DQNOBW-NEXT: vmovdqu (%rdi), %ymm2
290+ ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm2, %ymm3
291+ ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm2, %xmm2
292+ ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm2, %ymm2
281293; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
282294; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
283295; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
0 commit comments