|
3 | 3 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSE2OR3,SSSE3 |
4 | 4 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 |
5 | 5 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 |
6 | | -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW |
7 | | -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST-ALL |
8 | | -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST-PERLANE |
| 6 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 |
| 7 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2 |
| 8 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2 |
9 | 9 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX512 |
10 | 10 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX512 |
11 | 11 |
|
@@ -2846,60 +2846,22 @@ define <8 x i32> @test33(<8 x i32> %a0, <8 x i64> %a1) { |
2846 | 2846 | ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 |
2847 | 2847 | ; AVX1-NEXT: retq |
2848 | 2848 | ; |
2849 | | -; AVX2-SLOW-LABEL: test33: |
2850 | | -; AVX2-SLOW: # %bb.0: |
2851 | | -; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] |
2852 | | -; AVX2-SLOW-NEXT: vpxor %ymm3, %ymm2, %ymm4 |
2853 | | -; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372041149743103,9223372041149743103,9223372041149743103,9223372041149743103] |
2854 | | -; AVX2-SLOW-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4 |
2855 | | -; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm6 = [4294967295,4294967295,4294967295,4294967295] |
2856 | | -; AVX2-SLOW-NEXT: vblendvpd %ymm4, %ymm2, %ymm6, %ymm2 |
2857 | | -; AVX2-SLOW-NEXT: vpxor %ymm3, %ymm1, %ymm3 |
2858 | | -; AVX2-SLOW-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3 |
2859 | | -; AVX2-SLOW-NEXT: vblendvpd %ymm3, %ymm1, %ymm6, %ymm1 |
2860 | | -; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[2,3] |
2861 | | -; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 |
2862 | | -; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,2],ymm3[0,2],ymm1[4,6],ymm3[4,6] |
2863 | | -; AVX2-SLOW-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 |
2864 | | -; AVX2-SLOW-NEXT: vpsubd %ymm1, %ymm0, %ymm0 |
2865 | | -; AVX2-SLOW-NEXT: retq |
2866 | | -; |
2867 | | -; AVX2-FAST-ALL-LABEL: test33: |
2868 | | -; AVX2-FAST-ALL: # %bb.0: |
2869 | | -; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] |
2870 | | -; AVX2-FAST-ALL-NEXT: vpxor %ymm3, %ymm1, %ymm4 |
2871 | | -; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372041149743103,9223372041149743103,9223372041149743103,9223372041149743103] |
2872 | | -; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4 |
2873 | | -; AVX2-FAST-ALL-NEXT: vbroadcastsd {{.*#+}} ymm6 = [4294967295,4294967295,4294967295,4294967295] |
2874 | | -; AVX2-FAST-ALL-NEXT: vblendvpd %ymm4, %ymm1, %ymm6, %ymm1 |
2875 | | -; AVX2-FAST-ALL-NEXT: vmovapd {{.*#+}} ymm4 = [0,2,4,6,4,6,6,7] |
2876 | | -; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm4, %ymm1 |
2877 | | -; AVX2-FAST-ALL-NEXT: vpxor %ymm3, %ymm2, %ymm3 |
2878 | | -; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3 |
2879 | | -; AVX2-FAST-ALL-NEXT: vblendvpd %ymm3, %ymm2, %ymm6, %ymm2 |
2880 | | -; AVX2-FAST-ALL-NEXT: vpermps %ymm2, %ymm4, %ymm2 |
2881 | | -; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 |
2882 | | -; AVX2-FAST-ALL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 |
2883 | | -; AVX2-FAST-ALL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 |
2884 | | -; AVX2-FAST-ALL-NEXT: retq |
2885 | | -; |
2886 | | -; AVX2-FAST-PERLANE-LABEL: test33: |
2887 | | -; AVX2-FAST-PERLANE: # %bb.0: |
2888 | | -; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] |
2889 | | -; AVX2-FAST-PERLANE-NEXT: vpxor %ymm3, %ymm2, %ymm4 |
2890 | | -; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372041149743103,9223372041149743103,9223372041149743103,9223372041149743103] |
2891 | | -; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4 |
2892 | | -; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm6 = [4294967295,4294967295,4294967295,4294967295] |
2893 | | -; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm4, %ymm2, %ymm6, %ymm2 |
2894 | | -; AVX2-FAST-PERLANE-NEXT: vpxor %ymm3, %ymm1, %ymm3 |
2895 | | -; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3 |
2896 | | -; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm3, %ymm1, %ymm6, %ymm1 |
2897 | | -; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[2,3] |
2898 | | -; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 |
2899 | | -; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,2],ymm3[0,2],ymm1[4,6],ymm3[4,6] |
2900 | | -; AVX2-FAST-PERLANE-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 |
2901 | | -; AVX2-FAST-PERLANE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 |
2902 | | -; AVX2-FAST-PERLANE-NEXT: retq |
| 2849 | +; AVX2-LABEL: test33: |
| 2850 | +; AVX2: # %bb.0: |
| 2851 | +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] |
| 2852 | +; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm4 |
| 2853 | +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372041149743103,9223372041149743103,9223372041149743103,9223372041149743103] |
| 2854 | +; AVX2-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4 |
| 2855 | +; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm6 = [4294967295,4294967295,4294967295,4294967295] |
| 2856 | +; AVX2-NEXT: vblendvpd %ymm4, %ymm2, %ymm6, %ymm2 |
| 2857 | +; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm3 |
| 2858 | +; AVX2-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3 |
| 2859 | +; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm6, %ymm1 |
| 2860 | +; AVX2-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,2],ymm2[0,2],ymm1[4,6],ymm2[4,6] |
| 2861 | +; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,2,1,3] |
| 2862 | +; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 |
| 2863 | +; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0 |
| 2864 | +; AVX2-NEXT: retq |
2903 | 2865 | ; |
2904 | 2866 | ; AVX512-LABEL: test33: |
2905 | 2867 | ; AVX512: # %bb.0: |
@@ -3070,66 +3032,24 @@ define <8 x i32> @test34(<8 x i32> %a0, <8 x i64> %a1) { |
3070 | 3032 | ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 |
3071 | 3033 | ; AVX1-NEXT: retq |
3072 | 3034 | ; |
3073 | | -; AVX2-SLOW-LABEL: test34: |
3074 | | -; AVX2-SLOW: # %bb.0: |
3075 | | -; AVX2-SLOW-NEXT: vpbroadcastd {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] |
3076 | | -; AVX2-SLOW-NEXT: vpand %ymm3, %ymm0, %ymm0 |
3077 | | -; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] |
3078 | | -; AVX2-SLOW-NEXT: vpxor %ymm3, %ymm2, %ymm4 |
3079 | | -; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372041149743103,9223372041149743103,9223372041149743103,9223372041149743103] |
3080 | | -; AVX2-SLOW-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4 |
3081 | | -; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm6 = [4294967295,4294967295,4294967295,4294967295] |
3082 | | -; AVX2-SLOW-NEXT: vblendvpd %ymm4, %ymm2, %ymm6, %ymm2 |
3083 | | -; AVX2-SLOW-NEXT: vpxor %ymm3, %ymm1, %ymm3 |
3084 | | -; AVX2-SLOW-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3 |
3085 | | -; AVX2-SLOW-NEXT: vblendvpd %ymm3, %ymm1, %ymm6, %ymm1 |
3086 | | -; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[2,3] |
3087 | | -; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 |
3088 | | -; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,2],ymm3[0,2],ymm1[4,6],ymm3[4,6] |
3089 | | -; AVX2-SLOW-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 |
3090 | | -; AVX2-SLOW-NEXT: vpsubd %ymm1, %ymm0, %ymm0 |
3091 | | -; AVX2-SLOW-NEXT: retq |
3092 | | -; |
3093 | | -; AVX2-FAST-ALL-LABEL: test34: |
3094 | | -; AVX2-FAST-ALL: # %bb.0: |
3095 | | -; AVX2-FAST-ALL-NEXT: vpbroadcastd {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] |
3096 | | -; AVX2-FAST-ALL-NEXT: vpand %ymm3, %ymm0, %ymm0 |
3097 | | -; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] |
3098 | | -; AVX2-FAST-ALL-NEXT: vpxor %ymm3, %ymm1, %ymm4 |
3099 | | -; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372041149743103,9223372041149743103,9223372041149743103,9223372041149743103] |
3100 | | -; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4 |
3101 | | -; AVX2-FAST-ALL-NEXT: vbroadcastsd {{.*#+}} ymm6 = [4294967295,4294967295,4294967295,4294967295] |
3102 | | -; AVX2-FAST-ALL-NEXT: vblendvpd %ymm4, %ymm1, %ymm6, %ymm1 |
3103 | | -; AVX2-FAST-ALL-NEXT: vmovapd {{.*#+}} ymm4 = [0,2,4,6,4,6,6,7] |
3104 | | -; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm4, %ymm1 |
3105 | | -; AVX2-FAST-ALL-NEXT: vpxor %ymm3, %ymm2, %ymm3 |
3106 | | -; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3 |
3107 | | -; AVX2-FAST-ALL-NEXT: vblendvpd %ymm3, %ymm2, %ymm6, %ymm2 |
3108 | | -; AVX2-FAST-ALL-NEXT: vpermps %ymm2, %ymm4, %ymm2 |
3109 | | -; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 |
3110 | | -; AVX2-FAST-ALL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 |
3111 | | -; AVX2-FAST-ALL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 |
3112 | | -; AVX2-FAST-ALL-NEXT: retq |
3113 | | -; |
3114 | | -; AVX2-FAST-PERLANE-LABEL: test34: |
3115 | | -; AVX2-FAST-PERLANE: # %bb.0: |
3116 | | -; AVX2-FAST-PERLANE-NEXT: vpbroadcastd {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] |
3117 | | -; AVX2-FAST-PERLANE-NEXT: vpand %ymm3, %ymm0, %ymm0 |
3118 | | -; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] |
3119 | | -; AVX2-FAST-PERLANE-NEXT: vpxor %ymm3, %ymm2, %ymm4 |
3120 | | -; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372041149743103,9223372041149743103,9223372041149743103,9223372041149743103] |
3121 | | -; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4 |
3122 | | -; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm6 = [4294967295,4294967295,4294967295,4294967295] |
3123 | | -; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm4, %ymm2, %ymm6, %ymm2 |
3124 | | -; AVX2-FAST-PERLANE-NEXT: vpxor %ymm3, %ymm1, %ymm3 |
3125 | | -; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3 |
3126 | | -; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm3, %ymm1, %ymm6, %ymm1 |
3127 | | -; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[2,3] |
3128 | | -; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 |
3129 | | -; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,2],ymm3[0,2],ymm1[4,6],ymm3[4,6] |
3130 | | -; AVX2-FAST-PERLANE-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 |
3131 | | -; AVX2-FAST-PERLANE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 |
3132 | | -; AVX2-FAST-PERLANE-NEXT: retq |
| 3035 | +; AVX2-LABEL: test34: |
| 3036 | +; AVX2: # %bb.0: |
| 3037 | +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] |
| 3038 | +; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0 |
| 3039 | +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] |
| 3040 | +; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm4 |
| 3041 | +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372041149743103,9223372041149743103,9223372041149743103,9223372041149743103] |
| 3042 | +; AVX2-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4 |
| 3043 | +; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm6 = [4294967295,4294967295,4294967295,4294967295] |
| 3044 | +; AVX2-NEXT: vblendvpd %ymm4, %ymm2, %ymm6, %ymm2 |
| 3045 | +; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm3 |
| 3046 | +; AVX2-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3 |
| 3047 | +; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm6, %ymm1 |
| 3048 | +; AVX2-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,2],ymm2[0,2],ymm1[4,6],ymm2[4,6] |
| 3049 | +; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,2,1,3] |
| 3050 | +; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 |
| 3051 | +; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0 |
| 3052 | +; AVX2-NEXT: retq |
3133 | 3053 | ; |
3134 | 3054 | ; AVX512-LABEL: test34: |
3135 | 3055 | ; AVX512: # %bb.0: |
|
0 commit comments