Skip to content

Commit b5e3a69

Browse files
committed
[X86] Regenerate shuffle tests with sign-extended masks
1 parent cced2e7 commit b5e3a69

8 files changed

+97
-105
lines changed

llvm/test/CodeGen/X86/shuffle-strided-with-offset-512.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ define void @shuffle_v16i32_to_v8i32_1(ptr %L, ptr %S) nounwind {
6565
;
6666
; AVX512BWVL-FAST-ALL-LABEL: shuffle_v16i32_to_v8i32_1:
6767
; AVX512BWVL-FAST-ALL: # %bb.0:
68-
; AVX512BWVL-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm0 = [1,3,5,7,9,11,13,15]
68+
; AVX512BWVL-FAST-ALL-NEXT: vpmovsxbd {{.*#+}} ymm0 = [1,3,5,7,9,11,13,15]
6969
; AVX512BWVL-FAST-ALL-NEXT: vpermps (%rdi), %zmm0, %zmm0
7070
; AVX512BWVL-FAST-ALL-NEXT: vmovaps %ymm0, (%rsi)
7171
; AVX512BWVL-FAST-ALL-NEXT: vzeroupper

llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-2.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ define void @load_i32_stride2_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) nou
363363
; AVX512-FCP-LABEL: load_i32_stride2_vf8:
364364
; AVX512-FCP: # %bb.0:
365365
; AVX512-FCP-NEXT: vmovdqa64 (%rdi), %zmm0
366-
; AVX512-FCP-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,5,7,9,11,13,15]
366+
; AVX512-FCP-NEXT: vpmovsxbd {{.*#+}} ymm1 = [1,3,5,7,9,11,13,15]
367367
; AVX512-FCP-NEXT: vpermps (%rdi), %zmm1, %zmm1
368368
; AVX512-FCP-NEXT: vpmovqd %zmm0, (%rsi)
369369
; AVX512-FCP-NEXT: vmovaps %ymm1, (%rdx)
@@ -384,7 +384,7 @@ define void @load_i32_stride2_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) nou
384384
; AVX512DQ-FCP-LABEL: load_i32_stride2_vf8:
385385
; AVX512DQ-FCP: # %bb.0:
386386
; AVX512DQ-FCP-NEXT: vmovdqa64 (%rdi), %zmm0
387-
; AVX512DQ-FCP-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,5,7,9,11,13,15]
387+
; AVX512DQ-FCP-NEXT: vpmovsxbd {{.*#+}} ymm1 = [1,3,5,7,9,11,13,15]
388388
; AVX512DQ-FCP-NEXT: vpermps (%rdi), %zmm1, %zmm1
389389
; AVX512DQ-FCP-NEXT: vpmovqd %zmm0, (%rsi)
390390
; AVX512DQ-FCP-NEXT: vmovaps %ymm1, (%rdx)
@@ -405,7 +405,7 @@ define void @load_i32_stride2_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) nou
405405
; AVX512BW-FCP-LABEL: load_i32_stride2_vf8:
406406
; AVX512BW-FCP: # %bb.0:
407407
; AVX512BW-FCP-NEXT: vmovdqa64 (%rdi), %zmm0
408-
; AVX512BW-FCP-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,5,7,9,11,13,15]
408+
; AVX512BW-FCP-NEXT: vpmovsxbd {{.*#+}} ymm1 = [1,3,5,7,9,11,13,15]
409409
; AVX512BW-FCP-NEXT: vpermps (%rdi), %zmm1, %zmm1
410410
; AVX512BW-FCP-NEXT: vpmovqd %zmm0, (%rsi)
411411
; AVX512BW-FCP-NEXT: vmovaps %ymm1, (%rdx)
@@ -426,7 +426,7 @@ define void @load_i32_stride2_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) nou
426426
; AVX512DQ-BW-FCP-LABEL: load_i32_stride2_vf8:
427427
; AVX512DQ-BW-FCP: # %bb.0:
428428
; AVX512DQ-BW-FCP-NEXT: vmovdqa64 (%rdi), %zmm0
429-
; AVX512DQ-BW-FCP-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,5,7,9,11,13,15]
429+
; AVX512DQ-BW-FCP-NEXT: vpmovsxbd {{.*#+}} ymm1 = [1,3,5,7,9,11,13,15]
430430
; AVX512DQ-BW-FCP-NEXT: vpermps (%rdi), %zmm1, %zmm1
431431
; AVX512DQ-BW-FCP-NEXT: vpmovqd %zmm0, (%rsi)
432432
; AVX512DQ-BW-FCP-NEXT: vmovaps %ymm1, (%rdx)

llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-3.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -309,12 +309,12 @@ define void @load_i32_stride3_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
309309
;
310310
; AVX512-LABEL: load_i32_stride3_vf4:
311311
; AVX512: # %bb.0:
312-
; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [0,3,6,9]
312+
; AVX512-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,3,6,9]
313313
; AVX512-NEXT: vmovaps (%rdi), %zmm1
314314
; AVX512-NEXT: vpermps %zmm1, %zmm0, %zmm0
315-
; AVX512-NEXT: vmovaps {{.*#+}} xmm2 = [1,4,7,10]
315+
; AVX512-NEXT: vpmovsxbd {{.*#+}} xmm2 = [1,4,7,10]
316316
; AVX512-NEXT: vpermps %zmm1, %zmm2, %zmm2
317-
; AVX512-NEXT: vmovaps {{.*#+}} xmm3 = [2,5,8,11]
317+
; AVX512-NEXT: vpmovsxbd {{.*#+}} xmm3 = [2,5,8,11]
318318
; AVX512-NEXT: vpermps %zmm1, %zmm3, %zmm1
319319
; AVX512-NEXT: vmovaps %xmm0, (%rsi)
320320
; AVX512-NEXT: vmovaps %xmm2, (%rdx)
@@ -324,12 +324,12 @@ define void @load_i32_stride3_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
324324
;
325325
; AVX512-FCP-LABEL: load_i32_stride3_vf4:
326326
; AVX512-FCP: # %bb.0:
327-
; AVX512-FCP-NEXT: vmovaps {{.*#+}} xmm0 = [0,3,6,9]
327+
; AVX512-FCP-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,3,6,9]
328328
; AVX512-FCP-NEXT: vmovaps (%rdi), %zmm1
329329
; AVX512-FCP-NEXT: vpermps %zmm1, %zmm0, %zmm0
330-
; AVX512-FCP-NEXT: vmovaps {{.*#+}} xmm2 = [1,4,7,10]
330+
; AVX512-FCP-NEXT: vpmovsxbd {{.*#+}} xmm2 = [1,4,7,10]
331331
; AVX512-FCP-NEXT: vpermps %zmm1, %zmm2, %zmm2
332-
; AVX512-FCP-NEXT: vmovaps {{.*#+}} xmm3 = [2,5,8,11]
332+
; AVX512-FCP-NEXT: vpmovsxbd {{.*#+}} xmm3 = [2,5,8,11]
333333
; AVX512-FCP-NEXT: vpermps %zmm1, %zmm3, %zmm1
334334
; AVX512-FCP-NEXT: vmovaps %xmm0, (%rsi)
335335
; AVX512-FCP-NEXT: vmovaps %xmm2, (%rdx)
@@ -339,12 +339,12 @@ define void @load_i32_stride3_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
339339
;
340340
; AVX512DQ-LABEL: load_i32_stride3_vf4:
341341
; AVX512DQ: # %bb.0:
342-
; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm0 = [0,3,6,9]
342+
; AVX512DQ-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,3,6,9]
343343
; AVX512DQ-NEXT: vmovaps (%rdi), %zmm1
344344
; AVX512DQ-NEXT: vpermps %zmm1, %zmm0, %zmm0
345-
; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm2 = [1,4,7,10]
345+
; AVX512DQ-NEXT: vpmovsxbd {{.*#+}} xmm2 = [1,4,7,10]
346346
; AVX512DQ-NEXT: vpermps %zmm1, %zmm2, %zmm2
347-
; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm3 = [2,5,8,11]
347+
; AVX512DQ-NEXT: vpmovsxbd {{.*#+}} xmm3 = [2,5,8,11]
348348
; AVX512DQ-NEXT: vpermps %zmm1, %zmm3, %zmm1
349349
; AVX512DQ-NEXT: vmovaps %xmm0, (%rsi)
350350
; AVX512DQ-NEXT: vmovaps %xmm2, (%rdx)
@@ -354,12 +354,12 @@ define void @load_i32_stride3_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
354354
;
355355
; AVX512DQ-FCP-LABEL: load_i32_stride3_vf4:
356356
; AVX512DQ-FCP: # %bb.0:
357-
; AVX512DQ-FCP-NEXT: vmovaps {{.*#+}} xmm0 = [0,3,6,9]
357+
; AVX512DQ-FCP-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,3,6,9]
358358
; AVX512DQ-FCP-NEXT: vmovaps (%rdi), %zmm1
359359
; AVX512DQ-FCP-NEXT: vpermps %zmm1, %zmm0, %zmm0
360-
; AVX512DQ-FCP-NEXT: vmovaps {{.*#+}} xmm2 = [1,4,7,10]
360+
; AVX512DQ-FCP-NEXT: vpmovsxbd {{.*#+}} xmm2 = [1,4,7,10]
361361
; AVX512DQ-FCP-NEXT: vpermps %zmm1, %zmm2, %zmm2
362-
; AVX512DQ-FCP-NEXT: vmovaps {{.*#+}} xmm3 = [2,5,8,11]
362+
; AVX512DQ-FCP-NEXT: vpmovsxbd {{.*#+}} xmm3 = [2,5,8,11]
363363
; AVX512DQ-FCP-NEXT: vpermps %zmm1, %zmm3, %zmm1
364364
; AVX512DQ-FCP-NEXT: vmovaps %xmm0, (%rsi)
365365
; AVX512DQ-FCP-NEXT: vmovaps %xmm2, (%rdx)
@@ -369,12 +369,12 @@ define void @load_i32_stride3_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
369369
;
370370
; AVX512BW-LABEL: load_i32_stride3_vf4:
371371
; AVX512BW: # %bb.0:
372-
; AVX512BW-NEXT: vmovaps {{.*#+}} xmm0 = [0,3,6,9]
372+
; AVX512BW-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,3,6,9]
373373
; AVX512BW-NEXT: vmovaps (%rdi), %zmm1
374374
; AVX512BW-NEXT: vpermps %zmm1, %zmm0, %zmm0
375-
; AVX512BW-NEXT: vmovaps {{.*#+}} xmm2 = [1,4,7,10]
375+
; AVX512BW-NEXT: vpmovsxbd {{.*#+}} xmm2 = [1,4,7,10]
376376
; AVX512BW-NEXT: vpermps %zmm1, %zmm2, %zmm2
377-
; AVX512BW-NEXT: vmovaps {{.*#+}} xmm3 = [2,5,8,11]
377+
; AVX512BW-NEXT: vpmovsxbd {{.*#+}} xmm3 = [2,5,8,11]
378378
; AVX512BW-NEXT: vpermps %zmm1, %zmm3, %zmm1
379379
; AVX512BW-NEXT: vmovaps %xmm0, (%rsi)
380380
; AVX512BW-NEXT: vmovaps %xmm2, (%rdx)
@@ -384,12 +384,12 @@ define void @load_i32_stride3_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
384384
;
385385
; AVX512BW-FCP-LABEL: load_i32_stride3_vf4:
386386
; AVX512BW-FCP: # %bb.0:
387-
; AVX512BW-FCP-NEXT: vmovaps {{.*#+}} xmm0 = [0,3,6,9]
387+
; AVX512BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,3,6,9]
388388
; AVX512BW-FCP-NEXT: vmovaps (%rdi), %zmm1
389389
; AVX512BW-FCP-NEXT: vpermps %zmm1, %zmm0, %zmm0
390-
; AVX512BW-FCP-NEXT: vmovaps {{.*#+}} xmm2 = [1,4,7,10]
390+
; AVX512BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm2 = [1,4,7,10]
391391
; AVX512BW-FCP-NEXT: vpermps %zmm1, %zmm2, %zmm2
392-
; AVX512BW-FCP-NEXT: vmovaps {{.*#+}} xmm3 = [2,5,8,11]
392+
; AVX512BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm3 = [2,5,8,11]
393393
; AVX512BW-FCP-NEXT: vpermps %zmm1, %zmm3, %zmm1
394394
; AVX512BW-FCP-NEXT: vmovaps %xmm0, (%rsi)
395395
; AVX512BW-FCP-NEXT: vmovaps %xmm2, (%rdx)
@@ -399,12 +399,12 @@ define void @load_i32_stride3_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
399399
;
400400
; AVX512DQ-BW-LABEL: load_i32_stride3_vf4:
401401
; AVX512DQ-BW: # %bb.0:
402-
; AVX512DQ-BW-NEXT: vmovaps {{.*#+}} xmm0 = [0,3,6,9]
402+
; AVX512DQ-BW-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,3,6,9]
403403
; AVX512DQ-BW-NEXT: vmovaps (%rdi), %zmm1
404404
; AVX512DQ-BW-NEXT: vpermps %zmm1, %zmm0, %zmm0
405-
; AVX512DQ-BW-NEXT: vmovaps {{.*#+}} xmm2 = [1,4,7,10]
405+
; AVX512DQ-BW-NEXT: vpmovsxbd {{.*#+}} xmm2 = [1,4,7,10]
406406
; AVX512DQ-BW-NEXT: vpermps %zmm1, %zmm2, %zmm2
407-
; AVX512DQ-BW-NEXT: vmovaps {{.*#+}} xmm3 = [2,5,8,11]
407+
; AVX512DQ-BW-NEXT: vpmovsxbd {{.*#+}} xmm3 = [2,5,8,11]
408408
; AVX512DQ-BW-NEXT: vpermps %zmm1, %zmm3, %zmm1
409409
; AVX512DQ-BW-NEXT: vmovaps %xmm0, (%rsi)
410410
; AVX512DQ-BW-NEXT: vmovaps %xmm2, (%rdx)
@@ -414,12 +414,12 @@ define void @load_i32_stride3_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
414414
;
415415
; AVX512DQ-BW-FCP-LABEL: load_i32_stride3_vf4:
416416
; AVX512DQ-BW-FCP: # %bb.0:
417-
; AVX512DQ-BW-FCP-NEXT: vmovaps {{.*#+}} xmm0 = [0,3,6,9]
417+
; AVX512DQ-BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,3,6,9]
418418
; AVX512DQ-BW-FCP-NEXT: vmovaps (%rdi), %zmm1
419419
; AVX512DQ-BW-FCP-NEXT: vpermps %zmm1, %zmm0, %zmm0
420-
; AVX512DQ-BW-FCP-NEXT: vmovaps {{.*#+}} xmm2 = [1,4,7,10]
420+
; AVX512DQ-BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm2 = [1,4,7,10]
421421
; AVX512DQ-BW-FCP-NEXT: vpermps %zmm1, %zmm2, %zmm2
422-
; AVX512DQ-BW-FCP-NEXT: vmovaps {{.*#+}} xmm3 = [2,5,8,11]
422+
; AVX512DQ-BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm3 = [2,5,8,11]
423423
; AVX512DQ-BW-FCP-NEXT: vpermps %zmm1, %zmm3, %zmm1
424424
; AVX512DQ-BW-FCP-NEXT: vmovaps %xmm0, (%rsi)
425425
; AVX512DQ-BW-FCP-NEXT: vmovaps %xmm2, (%rdx)

llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-4.ll

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ define void @load_i32_stride4_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
106106
; AVX512-FCP-NEXT: vmovdqa (%rdi), %xmm0
107107
; AVX512-FCP-NEXT: vmovdqa 16(%rdi), %xmm1
108108
; AVX512-FCP-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
109-
; AVX512-FCP-NEXT: vmovaps {{.*#+}} xmm3 = [1,5,1,1]
109+
; AVX512-FCP-NEXT: vpmovsxbd {{.*#+}} xmm3 = [1,5,1,1]
110110
; AVX512-FCP-NEXT: vpermps (%rdi), %ymm3, %ymm3
111111
; AVX512-FCP-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
112112
; AVX512-FCP-NEXT: vmovq %xmm2, (%rsi)
@@ -135,7 +135,7 @@ define void @load_i32_stride4_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
135135
; AVX512DQ-FCP-NEXT: vmovdqa (%rdi), %xmm0
136136
; AVX512DQ-FCP-NEXT: vmovdqa 16(%rdi), %xmm1
137137
; AVX512DQ-FCP-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
138-
; AVX512DQ-FCP-NEXT: vmovaps {{.*#+}} xmm3 = [1,5,1,1]
138+
; AVX512DQ-FCP-NEXT: vpmovsxbd {{.*#+}} xmm3 = [1,5,1,1]
139139
; AVX512DQ-FCP-NEXT: vpermps (%rdi), %ymm3, %ymm3
140140
; AVX512DQ-FCP-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
141141
; AVX512DQ-FCP-NEXT: vmovq %xmm2, (%rsi)
@@ -164,7 +164,7 @@ define void @load_i32_stride4_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
164164
; AVX512BW-FCP-NEXT: vmovdqa (%rdi), %xmm0
165165
; AVX512BW-FCP-NEXT: vmovdqa 16(%rdi), %xmm1
166166
; AVX512BW-FCP-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
167-
; AVX512BW-FCP-NEXT: vmovaps {{.*#+}} xmm3 = [1,5,1,1]
167+
; AVX512BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm3 = [1,5,1,1]
168168
; AVX512BW-FCP-NEXT: vpermps (%rdi), %ymm3, %ymm3
169169
; AVX512BW-FCP-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
170170
; AVX512BW-FCP-NEXT: vmovq %xmm2, (%rsi)
@@ -193,7 +193,7 @@ define void @load_i32_stride4_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
193193
; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rdi), %xmm0
194194
; AVX512DQ-BW-FCP-NEXT: vmovdqa 16(%rdi), %xmm1
195195
; AVX512DQ-BW-FCP-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
196-
; AVX512DQ-BW-FCP-NEXT: vmovaps {{.*#+}} xmm3 = [1,5,1,1]
196+
; AVX512DQ-BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm3 = [1,5,1,1]
197197
; AVX512DQ-BW-FCP-NEXT: vpermps (%rdi), %ymm3, %ymm3
198198
; AVX512DQ-BW-FCP-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
199199
; AVX512DQ-BW-FCP-NEXT: vmovq %xmm2, (%rsi)
@@ -364,14 +364,14 @@ define void @load_i32_stride4_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
364364
;
365365
; AVX512-LABEL: load_i32_stride4_vf4:
366366
; AVX512: # %bb.0:
367-
; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [0,4,8,12]
367+
; AVX512-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,4,8,12]
368368
; AVX512-NEXT: vmovaps (%rdi), %zmm1
369369
; AVX512-NEXT: vpermps %zmm1, %zmm0, %zmm0
370-
; AVX512-NEXT: vmovaps {{.*#+}} xmm2 = [1,5,9,13]
370+
; AVX512-NEXT: vpmovsxbd {{.*#+}} xmm2 = [1,5,9,13]
371371
; AVX512-NEXT: vpermps %zmm1, %zmm2, %zmm2
372-
; AVX512-NEXT: vmovaps {{.*#+}} xmm3 = [2,6,10,14]
372+
; AVX512-NEXT: vpmovsxbd {{.*#+}} xmm3 = [2,6,10,14]
373373
; AVX512-NEXT: vpermps %zmm1, %zmm3, %zmm3
374-
; AVX512-NEXT: vmovaps {{.*#+}} xmm4 = [3,7,11,15]
374+
; AVX512-NEXT: vpmovsxbd {{.*#+}} xmm4 = [3,7,11,15]
375375
; AVX512-NEXT: vpermps %zmm1, %zmm4, %zmm1
376376
; AVX512-NEXT: vmovaps %xmm0, (%rsi)
377377
; AVX512-NEXT: vmovaps %xmm2, (%rdx)
@@ -382,14 +382,14 @@ define void @load_i32_stride4_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
382382
;
383383
; AVX512-FCP-LABEL: load_i32_stride4_vf4:
384384
; AVX512-FCP: # %bb.0:
385-
; AVX512-FCP-NEXT: vmovaps {{.*#+}} xmm0 = [0,4,8,12]
385+
; AVX512-FCP-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,4,8,12]
386386
; AVX512-FCP-NEXT: vmovaps (%rdi), %zmm1
387387
; AVX512-FCP-NEXT: vpermps %zmm1, %zmm0, %zmm0
388-
; AVX512-FCP-NEXT: vmovaps {{.*#+}} xmm2 = [1,5,9,13]
388+
; AVX512-FCP-NEXT: vpmovsxbd {{.*#+}} xmm2 = [1,5,9,13]
389389
; AVX512-FCP-NEXT: vpermps %zmm1, %zmm2, %zmm2
390-
; AVX512-FCP-NEXT: vmovaps {{.*#+}} xmm3 = [2,6,10,14]
390+
; AVX512-FCP-NEXT: vpmovsxbd {{.*#+}} xmm3 = [2,6,10,14]
391391
; AVX512-FCP-NEXT: vpermps %zmm1, %zmm3, %zmm3
392-
; AVX512-FCP-NEXT: vmovaps {{.*#+}} xmm4 = [3,7,11,15]
392+
; AVX512-FCP-NEXT: vpmovsxbd {{.*#+}} xmm4 = [3,7,11,15]
393393
; AVX512-FCP-NEXT: vpermps %zmm1, %zmm4, %zmm1
394394
; AVX512-FCP-NEXT: vmovaps %xmm0, (%rsi)
395395
; AVX512-FCP-NEXT: vmovaps %xmm2, (%rdx)
@@ -400,14 +400,14 @@ define void @load_i32_stride4_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
400400
;
401401
; AVX512DQ-LABEL: load_i32_stride4_vf4:
402402
; AVX512DQ: # %bb.0:
403-
; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm0 = [0,4,8,12]
403+
; AVX512DQ-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,4,8,12]
404404
; AVX512DQ-NEXT: vmovaps (%rdi), %zmm1
405405
; AVX512DQ-NEXT: vpermps %zmm1, %zmm0, %zmm0
406-
; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm2 = [1,5,9,13]
406+
; AVX512DQ-NEXT: vpmovsxbd {{.*#+}} xmm2 = [1,5,9,13]
407407
; AVX512DQ-NEXT: vpermps %zmm1, %zmm2, %zmm2
408-
; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm3 = [2,6,10,14]
408+
; AVX512DQ-NEXT: vpmovsxbd {{.*#+}} xmm3 = [2,6,10,14]
409409
; AVX512DQ-NEXT: vpermps %zmm1, %zmm3, %zmm3
410-
; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm4 = [3,7,11,15]
410+
; AVX512DQ-NEXT: vpmovsxbd {{.*#+}} xmm4 = [3,7,11,15]
411411
; AVX512DQ-NEXT: vpermps %zmm1, %zmm4, %zmm1
412412
; AVX512DQ-NEXT: vmovaps %xmm0, (%rsi)
413413
; AVX512DQ-NEXT: vmovaps %xmm2, (%rdx)
@@ -418,14 +418,14 @@ define void @load_i32_stride4_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
418418
;
419419
; AVX512DQ-FCP-LABEL: load_i32_stride4_vf4:
420420
; AVX512DQ-FCP: # %bb.0:
421-
; AVX512DQ-FCP-NEXT: vmovaps {{.*#+}} xmm0 = [0,4,8,12]
421+
; AVX512DQ-FCP-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,4,8,12]
422422
; AVX512DQ-FCP-NEXT: vmovaps (%rdi), %zmm1
423423
; AVX512DQ-FCP-NEXT: vpermps %zmm1, %zmm0, %zmm0
424-
; AVX512DQ-FCP-NEXT: vmovaps {{.*#+}} xmm2 = [1,5,9,13]
424+
; AVX512DQ-FCP-NEXT: vpmovsxbd {{.*#+}} xmm2 = [1,5,9,13]
425425
; AVX512DQ-FCP-NEXT: vpermps %zmm1, %zmm2, %zmm2
426-
; AVX512DQ-FCP-NEXT: vmovaps {{.*#+}} xmm3 = [2,6,10,14]
426+
; AVX512DQ-FCP-NEXT: vpmovsxbd {{.*#+}} xmm3 = [2,6,10,14]
427427
; AVX512DQ-FCP-NEXT: vpermps %zmm1, %zmm3, %zmm3
428-
; AVX512DQ-FCP-NEXT: vmovaps {{.*#+}} xmm4 = [3,7,11,15]
428+
; AVX512DQ-FCP-NEXT: vpmovsxbd {{.*#+}} xmm4 = [3,7,11,15]
429429
; AVX512DQ-FCP-NEXT: vpermps %zmm1, %zmm4, %zmm1
430430
; AVX512DQ-FCP-NEXT: vmovaps %xmm0, (%rsi)
431431
; AVX512DQ-FCP-NEXT: vmovaps %xmm2, (%rdx)
@@ -436,14 +436,14 @@ define void @load_i32_stride4_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
436436
;
437437
; AVX512BW-LABEL: load_i32_stride4_vf4:
438438
; AVX512BW: # %bb.0:
439-
; AVX512BW-NEXT: vmovaps {{.*#+}} xmm0 = [0,4,8,12]
439+
; AVX512BW-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,4,8,12]
440440
; AVX512BW-NEXT: vmovaps (%rdi), %zmm1
441441
; AVX512BW-NEXT: vpermps %zmm1, %zmm0, %zmm0
442-
; AVX512BW-NEXT: vmovaps {{.*#+}} xmm2 = [1,5,9,13]
442+
; AVX512BW-NEXT: vpmovsxbd {{.*#+}} xmm2 = [1,5,9,13]
443443
; AVX512BW-NEXT: vpermps %zmm1, %zmm2, %zmm2
444-
; AVX512BW-NEXT: vmovaps {{.*#+}} xmm3 = [2,6,10,14]
444+
; AVX512BW-NEXT: vpmovsxbd {{.*#+}} xmm3 = [2,6,10,14]
445445
; AVX512BW-NEXT: vpermps %zmm1, %zmm3, %zmm3
446-
; AVX512BW-NEXT: vmovaps {{.*#+}} xmm4 = [3,7,11,15]
446+
; AVX512BW-NEXT: vpmovsxbd {{.*#+}} xmm4 = [3,7,11,15]
447447
; AVX512BW-NEXT: vpermps %zmm1, %zmm4, %zmm1
448448
; AVX512BW-NEXT: vmovaps %xmm0, (%rsi)
449449
; AVX512BW-NEXT: vmovaps %xmm2, (%rdx)
@@ -454,14 +454,14 @@ define void @load_i32_stride4_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
454454
;
455455
; AVX512BW-FCP-LABEL: load_i32_stride4_vf4:
456456
; AVX512BW-FCP: # %bb.0:
457-
; AVX512BW-FCP-NEXT: vmovaps {{.*#+}} xmm0 = [0,4,8,12]
457+
; AVX512BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,4,8,12]
458458
; AVX512BW-FCP-NEXT: vmovaps (%rdi), %zmm1
459459
; AVX512BW-FCP-NEXT: vpermps %zmm1, %zmm0, %zmm0
460-
; AVX512BW-FCP-NEXT: vmovaps {{.*#+}} xmm2 = [1,5,9,13]
460+
; AVX512BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm2 = [1,5,9,13]
461461
; AVX512BW-FCP-NEXT: vpermps %zmm1, %zmm2, %zmm2
462-
; AVX512BW-FCP-NEXT: vmovaps {{.*#+}} xmm3 = [2,6,10,14]
462+
; AVX512BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm3 = [2,6,10,14]
463463
; AVX512BW-FCP-NEXT: vpermps %zmm1, %zmm3, %zmm3
464-
; AVX512BW-FCP-NEXT: vmovaps {{.*#+}} xmm4 = [3,7,11,15]
464+
; AVX512BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm4 = [3,7,11,15]
465465
; AVX512BW-FCP-NEXT: vpermps %zmm1, %zmm4, %zmm1
466466
; AVX512BW-FCP-NEXT: vmovaps %xmm0, (%rsi)
467467
; AVX512BW-FCP-NEXT: vmovaps %xmm2, (%rdx)
@@ -472,14 +472,14 @@ define void @load_i32_stride4_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
472472
;
473473
; AVX512DQ-BW-LABEL: load_i32_stride4_vf4:
474474
; AVX512DQ-BW: # %bb.0:
475-
; AVX512DQ-BW-NEXT: vmovaps {{.*#+}} xmm0 = [0,4,8,12]
475+
; AVX512DQ-BW-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,4,8,12]
476476
; AVX512DQ-BW-NEXT: vmovaps (%rdi), %zmm1
477477
; AVX512DQ-BW-NEXT: vpermps %zmm1, %zmm0, %zmm0
478-
; AVX512DQ-BW-NEXT: vmovaps {{.*#+}} xmm2 = [1,5,9,13]
478+
; AVX512DQ-BW-NEXT: vpmovsxbd {{.*#+}} xmm2 = [1,5,9,13]
479479
; AVX512DQ-BW-NEXT: vpermps %zmm1, %zmm2, %zmm2
480-
; AVX512DQ-BW-NEXT: vmovaps {{.*#+}} xmm3 = [2,6,10,14]
480+
; AVX512DQ-BW-NEXT: vpmovsxbd {{.*#+}} xmm3 = [2,6,10,14]
481481
; AVX512DQ-BW-NEXT: vpermps %zmm1, %zmm3, %zmm3
482-
; AVX512DQ-BW-NEXT: vmovaps {{.*#+}} xmm4 = [3,7,11,15]
482+
; AVX512DQ-BW-NEXT: vpmovsxbd {{.*#+}} xmm4 = [3,7,11,15]
483483
; AVX512DQ-BW-NEXT: vpermps %zmm1, %zmm4, %zmm1
484484
; AVX512DQ-BW-NEXT: vmovaps %xmm0, (%rsi)
485485
; AVX512DQ-BW-NEXT: vmovaps %xmm2, (%rdx)
@@ -490,14 +490,14 @@ define void @load_i32_stride4_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
490490
;
491491
; AVX512DQ-BW-FCP-LABEL: load_i32_stride4_vf4:
492492
; AVX512DQ-BW-FCP: # %bb.0:
493-
; AVX512DQ-BW-FCP-NEXT: vmovaps {{.*#+}} xmm0 = [0,4,8,12]
493+
; AVX512DQ-BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,4,8,12]
494494
; AVX512DQ-BW-FCP-NEXT: vmovaps (%rdi), %zmm1
495495
; AVX512DQ-BW-FCP-NEXT: vpermps %zmm1, %zmm0, %zmm0
496-
; AVX512DQ-BW-FCP-NEXT: vmovaps {{.*#+}} xmm2 = [1,5,9,13]
496+
; AVX512DQ-BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm2 = [1,5,9,13]
497497
; AVX512DQ-BW-FCP-NEXT: vpermps %zmm1, %zmm2, %zmm2
498-
; AVX512DQ-BW-FCP-NEXT: vmovaps {{.*#+}} xmm3 = [2,6,10,14]
498+
; AVX512DQ-BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm3 = [2,6,10,14]
499499
; AVX512DQ-BW-FCP-NEXT: vpermps %zmm1, %zmm3, %zmm3
500-
; AVX512DQ-BW-FCP-NEXT: vmovaps {{.*#+}} xmm4 = [3,7,11,15]
500+
; AVX512DQ-BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm4 = [3,7,11,15]
501501
; AVX512DQ-BW-FCP-NEXT: vpermps %zmm1, %zmm4, %zmm1
502502
; AVX512DQ-BW-FCP-NEXT: vmovaps %xmm0, (%rsi)
503503
; AVX512DQ-BW-FCP-NEXT: vmovaps %xmm2, (%rdx)

0 commit comments

Comments
 (0)