6
6
define <8 x i16 > @zext_8x8mem_to_8x16 (ptr %i , <8 x i1 > %mask ) nounwind readnone {
7
7
; KNL-LABEL: zext_8x8mem_to_8x16:
8
8
; KNL: # %bb.0:
9
- ; KNL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
9
+ ; KNL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
10
+ ; KNL-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
10
11
; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
11
12
; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
12
13
; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
@@ -21,7 +22,8 @@ define <8 x i16> @zext_8x8mem_to_8x16(ptr%i , <8 x i1> %mask) nounwind readnone
21
22
;
22
23
; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x16:
23
24
; AVX512DQNOBW: # %bb.0:
24
- ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
25
+ ; AVX512DQNOBW-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
26
+ ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
25
27
; AVX512DQNOBW-NEXT: vpsllw $15, %xmm0, %xmm0
26
28
; AVX512DQNOBW-NEXT: vpsraw $15, %xmm0, %xmm0
27
29
; AVX512DQNOBW-NEXT: vpand %xmm1, %xmm0, %xmm0
@@ -35,7 +37,8 @@ define <8 x i16> @zext_8x8mem_to_8x16(ptr%i , <8 x i1> %mask) nounwind readnone
35
37
define <8 x i16 > @sext_8x8mem_to_8x16 (ptr %i , <8 x i1 > %mask ) nounwind readnone {
36
38
; KNL-LABEL: sext_8x8mem_to_8x16:
37
39
; KNL: # %bb.0:
38
- ; KNL-NEXT: vpmovsxbw (%rdi), %xmm1
40
+ ; KNL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
41
+ ; KNL-NEXT: vpmovsxbw %xmm1, %xmm1
39
42
; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
40
43
; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
41
44
; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
@@ -50,7 +53,8 @@ define <8 x i16> @sext_8x8mem_to_8x16(ptr%i , <8 x i1> %mask) nounwind readnone
50
53
;
51
54
; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x16:
52
55
; AVX512DQNOBW: # %bb.0:
53
- ; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %xmm1
56
+ ; AVX512DQNOBW-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
57
+ ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm1, %xmm1
54
58
; AVX512DQNOBW-NEXT: vpsllw $15, %xmm0, %xmm0
55
59
; AVX512DQNOBW-NEXT: vpsraw $15, %xmm0, %xmm0
56
60
; AVX512DQNOBW-NEXT: vpand %xmm1, %xmm0, %xmm0
@@ -208,8 +212,10 @@ define <32 x i16> @zext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readn
208
212
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
209
213
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
210
214
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
211
- ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
212
- ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
215
+ ; KNL-NEXT: vmovdqu (%rdi), %ymm2
216
+ ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
217
+ ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2
218
+ ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
213
219
; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
214
220
; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
215
221
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
@@ -231,8 +237,10 @@ define <32 x i16> @zext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readn
231
237
; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1
232
238
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
233
239
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
234
- ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
235
- ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
240
+ ; AVX512DQNOBW-NEXT: vmovdqu (%rdi), %ymm2
241
+ ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
242
+ ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm2, %xmm2
243
+ ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
236
244
; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
237
245
; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
238
246
; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
@@ -253,8 +261,10 @@ define <32 x i16> @sext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readn
253
261
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
254
262
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
255
263
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
256
- ; KNL-NEXT: vpmovsxbw 16(%rdi), %ymm2
257
- ; KNL-NEXT: vpmovsxbw (%rdi), %ymm3
264
+ ; KNL-NEXT: vmovdqu (%rdi), %ymm2
265
+ ; KNL-NEXT: vpmovsxbw %xmm2, %ymm3
266
+ ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2
267
+ ; KNL-NEXT: vpmovsxbw %xmm2, %ymm2
258
268
; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
259
269
; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
260
270
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
@@ -276,8 +286,10 @@ define <32 x i16> @sext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readn
276
286
; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1
277
287
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
278
288
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
279
- ; AVX512DQNOBW-NEXT: vpmovsxbw 16(%rdi), %ymm2
280
- ; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %ymm3
289
+ ; AVX512DQNOBW-NEXT: vmovdqu (%rdi), %ymm2
290
+ ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm2, %ymm3
291
+ ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm2, %xmm2
292
+ ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm2, %ymm2
281
293
; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
282
294
; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
283
295
; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
0 commit comments