22; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
33; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL,AVX512VL-FAST-ALL
44; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL,AVX512VL-FAST-PERLANE
5- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW,AVX512BW-FAST-ALL
6- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW,AVX512BW-FAST-PERLANE
7- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL,AVX512BWVL-FAST-ALL
8- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL,AVX512BWVL-FAST-PERLANE
5+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
6+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
7+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
8+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
99; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VBMI
1010; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VBMI
11- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VBMIVL
12- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VBMIVL
11+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VBMI
12+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VBMI
1313
1414; PR31551
1515; Pairs of shufflevector:trunc functions with functional equivalence.
@@ -74,13 +74,6 @@ define void @shuffle_v64i8_to_v32i8(ptr %L, ptr %S) nounwind {
7474; AVX512VBMI-NEXT: vpmovwb %zmm0, (%rsi)
7575; AVX512VBMI-NEXT: vzeroupper
7676; AVX512VBMI-NEXT: retq
77- ;
78- ; AVX512VBMIVL-LABEL: shuffle_v64i8_to_v32i8:
79- ; AVX512VBMIVL: # %bb.0:
80- ; AVX512VBMIVL-NEXT: vmovdqa64 (%rdi), %zmm0
81- ; AVX512VBMIVL-NEXT: vpmovwb %zmm0, (%rsi)
82- ; AVX512VBMIVL-NEXT: vzeroupper
83- ; AVX512VBMIVL-NEXT: retq
8477 %vec = load <64 x i8 >, ptr %L
8578 %strided.vec = shufflevector <64 x i8 > %vec , <64 x i8 > undef , <32 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 8 , i32 10 , i32 12 , i32 14 , i32 16 , i32 18 , i32 20 , i32 22 , i32 24 , i32 26 , i32 28 , i32 30 , i32 32 , i32 34 , i32 36 , i32 38 , i32 40 , i32 42 , i32 44 , i32 46 , i32 48 , i32 50 , i32 52 , i32 54 , i32 56 , i32 58 , i32 60 , i32 62 >
8679 store <32 x i8 > %strided.vec , ptr %S
@@ -126,13 +119,6 @@ define void @trunc_v32i16_to_v32i8(ptr %L, ptr %S) nounwind {
126119; AVX512VBMI-NEXT: vpmovwb %zmm0, (%rsi)
127120; AVX512VBMI-NEXT: vzeroupper
128121; AVX512VBMI-NEXT: retq
129- ;
130- ; AVX512VBMIVL-LABEL: trunc_v32i16_to_v32i8:
131- ; AVX512VBMIVL: # %bb.0:
132- ; AVX512VBMIVL-NEXT: vmovdqa64 (%rdi), %zmm0
133- ; AVX512VBMIVL-NEXT: vpmovwb %zmm0, (%rsi)
134- ; AVX512VBMIVL-NEXT: vzeroupper
135- ; AVX512VBMIVL-NEXT: retq
136122 %vec = load <64 x i8 >, ptr %L
137123 %bc = bitcast <64 x i8 > %vec to <32 x i16 >
138124 %strided.vec = trunc <32 x i16 > %bc to <32 x i8 >
@@ -346,14 +332,6 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_
346332; AVX512VBMI-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
347333; AVX512VBMI-NEXT: vzeroupper
348334; AVX512VBMI-NEXT: retq
349- ;
350- ; AVX512VBMIVL-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62:
351- ; AVX512VBMIVL: # %bb.0:
352- ; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,62]
353- ; AVX512VBMIVL-NEXT: vpermb %zmm0, %zmm1, %zmm0
354- ; AVX512VBMIVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
355- ; AVX512VBMIVL-NEXT: vzeroupper
356- ; AVX512VBMIVL-NEXT: retq
357335 %res = shufflevector <64 x i8 > %x , <64 x i8 > %x , <16 x i32 > <i32 1 , i32 5 , i32 9 , i32 13 , i32 17 , i32 21 , i32 25 , i32 29 , i32 33 , i32 37 , i32 41 , i32 45 , i32 49 , i32 53 , i32 57 , i32 62 >
358336 ret <16 x i8 > %res
359337}
@@ -406,12 +384,6 @@ define <32 x i8> @trunc_shuffle_v32i16_v32i8_ofs1(<32 x i16> %a0) {
406384; AVX512VBMI-NEXT: vpsrlw $8, %zmm0, %zmm0
407385; AVX512VBMI-NEXT: vpmovwb %zmm0, %ymm0
408386; AVX512VBMI-NEXT: retq
409- ;
410- ; AVX512VBMIVL-LABEL: trunc_shuffle_v32i16_v32i8_ofs1:
411- ; AVX512VBMIVL: # %bb.0:
412- ; AVX512VBMIVL-NEXT: vpsrlw $8, %zmm0, %zmm0
413- ; AVX512VBMIVL-NEXT: vpmovwb %zmm0, %ymm0
414- ; AVX512VBMIVL-NEXT: retq
415387 %bc = bitcast <32 x i16 > %a0 to <64 x i8 >
416388 %res = shufflevector <64 x i8 > %bc , <64 x i8 > poison, <32 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 , i32 17 , i32 19 , i32 21 , i32 23 , i32 25 , i32 27 , i32 29 , i32 31 , i32 33 , i32 35 , i32 37 , i32 39 , i32 41 , i32 43 , i32 45 , i32 47 , i32 49 , i32 51 , i32 53 , i32 55 , i32 57 , i32 59 , i32 61 , i32 63 >
417389 ret <32 x i8 > %res
@@ -442,11 +414,9 @@ define <4 x double> @PR34175(ptr %p) {
442414;
443415; AVX512BW-LABEL: PR34175:
444416; AVX512BW: # %bb.0:
445- ; AVX512BW-NEXT: vmovq {{.*#+}} xmm0 = [0,8,32,40,0,0,0,0]
446- ; AVX512BW-NEXT: vmovdqu (%rdi), %ymm1
447- ; AVX512BW-NEXT: vmovdqu 32(%rdi), %ymm2
448- ; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
449- ; AVX512BW-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
417+ ; AVX512BW-NEXT: vmovq {{.*#+}} xmm0 = [0,8,16,24,0,0,0,0]
418+ ; AVX512BW-NEXT: vpermw (%rdi), %zmm0, %zmm0
419+ ; AVX512BW-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
450420; AVX512BW-NEXT: vcvtdq2pd %xmm0, %ymm0
451421; AVX512BW-NEXT: retq
452422;
@@ -460,21 +430,11 @@ define <4 x double> @PR34175(ptr %p) {
460430;
461431; AVX512VBMI-LABEL: PR34175:
462432; AVX512VBMI: # %bb.0:
463- ; AVX512VBMI-NEXT: vmovq {{.*#+}} xmm0 = [0,8,32,40,0,0,0,0]
464- ; AVX512VBMI-NEXT: vmovdqu (%rdi), %ymm1
465- ; AVX512VBMI-NEXT: vmovdqu 32(%rdi), %ymm2
466- ; AVX512VBMI-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
467- ; AVX512VBMI-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
433+ ; AVX512VBMI-NEXT: vmovq {{.*#+}} xmm0 = [0,8,16,24,0,0,0,0]
434+ ; AVX512VBMI-NEXT: vpermw (%rdi), %zmm0, %zmm0
435+ ; AVX512VBMI-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
468436; AVX512VBMI-NEXT: vcvtdq2pd %xmm0, %ymm0
469437; AVX512VBMI-NEXT: retq
470- ;
471- ; AVX512VBMIVL-LABEL: PR34175:
472- ; AVX512VBMIVL: # %bb.0:
473- ; AVX512VBMIVL-NEXT: vmovq {{.*#+}} xmm0 = [0,8,16,24,0,0,0,0]
474- ; AVX512VBMIVL-NEXT: vpermw (%rdi), %zmm0, %zmm0
475- ; AVX512VBMIVL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
476- ; AVX512VBMIVL-NEXT: vcvtdq2pd %xmm0, %ymm0
477- ; AVX512VBMIVL-NEXT: retq
478438 %v = load <32 x i16 >, ptr %p , align 2
479439 %shuf = shufflevector <32 x i16 > %v , <32 x i16 > undef , <4 x i32 > <i32 0 , i32 8 , i32 16 , i32 24 >
480440 %tofp = uitofp <4 x i16 > %shuf to <4 x double >
@@ -492,8 +452,3 @@ define <16 x i8> @trunc_v8i64_to_v8i8_return_v16i8(<8 x i64> %vec) nounwind {
492452 ret <16 x i8 > %result
493453}
494454
495- ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
496- ; AVX512BW-FAST-ALL: {{.*}}
497- ; AVX512BW-FAST-PERLANE: {{.*}}
498- ; AVX512BWVL-FAST-ALL: {{.*}}
499- ; AVX512BWVL-FAST-PERLANE: {{.*}}
0 commit comments