|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
2 | | -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -mattr=prefer-256-bit | FileCheck %s --check-prefixes=CHECK,CHECK-SKX |
| 2 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -mattr=prefer-256-bit | FileCheck %s --check-prefixes=CHECK,CHECK-SKX,CHECK-SKX-NOVBMI |
3 | 3 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -mattr=prefer-256-bit,avx512vbmi | FileCheck %s --check-prefixes=CHECK,CHECK-SKX,CHECK-SKX-VBMI |
4 | 4 | ; Make sure CPUs default to prefer-256-bit. avx512vnni isn't interesting as it just adds an isel peephole for vpmaddwd+vpaddd |
5 | 5 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX512 |
@@ -883,6 +883,30 @@ define <16 x i16> @test_16f32tosb_512(ptr %ptr, <16 x i16> %passthru) "min-legal |
883 | 883 | } |
884 | 884 |
|
885 | 885 | define dso_local void @mul256(ptr %a, ptr %b, ptr %c) "min-legal-vector-width"="256" { |
| 886 | +; CHECK-SKX-NOVBMI-LABEL: mul256: |
| 887 | +; CHECK-SKX-NOVBMI: # %bb.0: |
| 888 | +; CHECK-SKX-NOVBMI-NEXT: vmovdqa (%rdi), %ymm0 |
| 889 | +; CHECK-SKX-NOVBMI-NEXT: vmovdqa 32(%rdi), %ymm1 |
| 890 | +; CHECK-SKX-NOVBMI-NEXT: vmovdqa (%rsi), %ymm2 |
| 891 | +; CHECK-SKX-NOVBMI-NEXT: vmovdqa 32(%rsi), %ymm3 |
| 892 | +; CHECK-SKX-NOVBMI-NEXT: vpbroadcastd {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] |
| 893 | +; CHECK-SKX-NOVBMI-NEXT: vpand %ymm4, %ymm3, %ymm5 |
| 894 | +; CHECK-SKX-NOVBMI-NEXT: vpmaddubsw %ymm5, %ymm1, %ymm5 |
| 895 | +; CHECK-SKX-NOVBMI-NEXT: vpandn %ymm3, %ymm4, %ymm3 |
| 896 | +; CHECK-SKX-NOVBMI-NEXT: vpmaddubsw %ymm3, %ymm1, %ymm1 |
| 897 | +; CHECK-SKX-NOVBMI-NEXT: vpsllw $8, %ymm1, %ymm1 |
| 898 | +; CHECK-SKX-NOVBMI-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 | (ymm5 & ymm4) |
| 899 | +; CHECK-SKX-NOVBMI-NEXT: vpand %ymm4, %ymm2, %ymm3 |
| 900 | +; CHECK-SKX-NOVBMI-NEXT: vpmaddubsw %ymm3, %ymm0, %ymm3 |
| 901 | +; CHECK-SKX-NOVBMI-NEXT: vpandn %ymm2, %ymm4, %ymm2 |
| 902 | +; CHECK-SKX-NOVBMI-NEXT: vpmaddubsw %ymm2, %ymm0, %ymm0 |
| 903 | +; CHECK-SKX-NOVBMI-NEXT: vpsllw $8, %ymm0, %ymm0 |
| 904 | +; CHECK-SKX-NOVBMI-NEXT: vpternlogq {{.*#+}} ymm0 = ymm0 | (ymm3 & ymm4) |
| 905 | +; CHECK-SKX-NOVBMI-NEXT: vmovdqa %ymm0, (%rdx) |
| 906 | +; CHECK-SKX-NOVBMI-NEXT: vmovdqa %ymm1, 32(%rdx) |
| 907 | +; CHECK-SKX-NOVBMI-NEXT: vzeroupper |
| 908 | +; CHECK-SKX-NOVBMI-NEXT: retq |
| 909 | +; |
886 | 910 | ; CHECK-SKX-VBMI-LABEL: mul256: |
887 | 911 | ; CHECK-SKX-VBMI: # %bb.0: |
888 | 912 | ; CHECK-SKX-VBMI-NEXT: vmovdqa (%rdi), %ymm0 |
@@ -960,6 +984,21 @@ define dso_local void @mul256(ptr %a, ptr %b, ptr %c) "min-legal-vector-width"=" |
960 | 984 | } |
961 | 985 |
|
962 | 986 | define dso_local void @mul512(ptr %a, ptr %b, ptr %c) "min-legal-vector-width"="512" { |
| 987 | +; CHECK-SKX-NOVBMI-LABEL: mul512: |
| 988 | +; CHECK-SKX-NOVBMI: # %bb.0: |
| 989 | +; CHECK-SKX-NOVBMI-NEXT: vmovdqa64 (%rdi), %zmm0 |
| 990 | +; CHECK-SKX-NOVBMI-NEXT: vmovdqa64 (%rsi), %zmm1 |
| 991 | +; CHECK-SKX-NOVBMI-NEXT: vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] |
| 992 | +; CHECK-SKX-NOVBMI-NEXT: vpandq %zmm2, %zmm1, %zmm3 |
| 993 | +; CHECK-SKX-NOVBMI-NEXT: vpmaddubsw %zmm3, %zmm0, %zmm3 |
| 994 | +; CHECK-SKX-NOVBMI-NEXT: vpandnq %zmm1, %zmm2, %zmm1 |
| 995 | +; CHECK-SKX-NOVBMI-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0 |
| 996 | +; CHECK-SKX-NOVBMI-NEXT: vpsllw $8, %zmm0, %zmm0 |
| 997 | +; CHECK-SKX-NOVBMI-NEXT: vpternlogq {{.*#+}} zmm0 = zmm0 | (zmm3 & zmm2) |
| 998 | +; CHECK-SKX-NOVBMI-NEXT: vmovdqa64 %zmm0, (%rdx) |
| 999 | +; CHECK-SKX-NOVBMI-NEXT: vzeroupper |
| 1000 | +; CHECK-SKX-NOVBMI-NEXT: retq |
| 1001 | +; |
963 | 1002 | ; CHECK-SKX-VBMI-LABEL: mul512: |
964 | 1003 | ; CHECK-SKX-VBMI: # %bb.0: |
965 | 1004 | ; CHECK-SKX-VBMI-NEXT: vmovdqa64 (%rdi), %zmm0 |
@@ -1137,6 +1176,14 @@ define <16 x i16> @trunc_v16i32_v16i16_zeroes(ptr %x) nounwind "min-legal-vector |
1137 | 1176 | } |
1138 | 1177 |
|
1139 | 1178 | define <32 x i8> @trunc_v32i16_v32i8_zeroes(ptr %x) nounwind "min-legal-vector-width"="256" { |
| 1179 | +; CHECK-SKX-NOVBMI-LABEL: trunc_v32i16_v32i8_zeroes: |
| 1180 | +; CHECK-SKX-NOVBMI: # %bb.0: |
| 1181 | +; CHECK-SKX-NOVBMI-NEXT: vpsrlw $8, 32(%rdi), %ymm0 |
| 1182 | +; CHECK-SKX-NOVBMI-NEXT: vpsrlw $8, (%rdi), %ymm1 |
| 1183 | +; CHECK-SKX-NOVBMI-NEXT: vpackuswb %ymm0, %ymm1, %ymm0 |
| 1184 | +; CHECK-SKX-NOVBMI-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] |
| 1185 | +; CHECK-SKX-NOVBMI-NEXT: retq |
| 1186 | +; |
1140 | 1187 | ; CHECK-SKX-VBMI-LABEL: trunc_v32i16_v32i8_zeroes: |
1141 | 1188 | ; CHECK-SKX-VBMI: # %bb.0: |
1142 | 1189 | ; CHECK-SKX-VBMI-NEXT: vmovdqa (%rdi), %ymm1 |
@@ -1192,6 +1239,14 @@ define <16 x i16> @trunc_v16i32_v16i16_sign(ptr %x) nounwind "min-legal-vector-w |
1192 | 1239 | } |
1193 | 1240 |
|
1194 | 1241 | define <32 x i8> @trunc_v32i16_v32i8_sign(ptr %x) nounwind "min-legal-vector-width"="256" { |
| 1242 | +; CHECK-SKX-NOVBMI-LABEL: trunc_v32i16_v32i8_sign: |
| 1243 | +; CHECK-SKX-NOVBMI: # %bb.0: |
| 1244 | +; CHECK-SKX-NOVBMI-NEXT: vpsrlw $8, 32(%rdi), %ymm0 |
| 1245 | +; CHECK-SKX-NOVBMI-NEXT: vpsrlw $8, (%rdi), %ymm1 |
| 1246 | +; CHECK-SKX-NOVBMI-NEXT: vpackuswb %ymm0, %ymm1, %ymm0 |
| 1247 | +; CHECK-SKX-NOVBMI-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] |
| 1248 | +; CHECK-SKX-NOVBMI-NEXT: retq |
| 1249 | +; |
1195 | 1250 | ; CHECK-SKX-VBMI-LABEL: trunc_v32i16_v32i8_sign: |
1196 | 1251 | ; CHECK-SKX-VBMI: # %bb.0: |
1197 | 1252 | ; CHECK-SKX-VBMI-NEXT: vmovdqa (%rdi), %ymm1 |
|
0 commit comments