|
4 | 4 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=AVX,AVX1 |
5 | 5 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=AVX,AVX2 |
6 | 6 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F |
7 | | -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512FVL |
| 7 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefixes=AVX512FVL |
8 | 8 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW |
9 | | -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw | FileCheck %s --check-prefixes=AVX512VL,AVX512BWVL |
| 9 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw | FileCheck %s --check-prefixes=AVX512BWVL |
10 | 10 |
|
11 | 11 | define void @truncstore_v8i64_v8i32(<8 x i64> %x, ptr %p, <8 x i32> %mask) { |
12 | 12 | ; SSE2-LABEL: truncstore_v8i64_v8i32: |
@@ -350,14 +350,21 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, ptr %p, <8 x i32> %mask) { |
350 | 350 | ; AVX512-NEXT: vzeroupper |
351 | 351 | ; AVX512-NEXT: retq |
352 | 352 | ; |
353 | | -; AVX512VL-LABEL: truncstore_v8i64_v8i32: |
354 | | -; AVX512VL: # %bb.0: |
355 | | -; AVX512VL-NEXT: vptestmd %ymm1, %ymm1, %k1 |
356 | | -; AVX512VL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 |
357 | | -; AVX512VL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 |
358 | | -; AVX512VL-NEXT: vpmovqd %zmm0, (%rdi) {%k1} |
359 | | -; AVX512VL-NEXT: vzeroupper |
360 | | -; AVX512VL-NEXT: retq |
| 353 | +; AVX512FVL-LABEL: truncstore_v8i64_v8i32: |
| 354 | +; AVX512FVL: # %bb.0: |
| 355 | +; AVX512FVL-NEXT: vptestmd %ymm1, %ymm1, %k1 |
| 356 | +; AVX512FVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 |
| 357 | +; AVX512FVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 |
| 358 | +; AVX512FVL-NEXT: vpmovqd %zmm0, (%rdi) {%k1} |
| 359 | +; AVX512FVL-NEXT: vzeroupper |
| 360 | +; AVX512FVL-NEXT: retq |
| 361 | +; |
| 362 | +; AVX512BWVL-LABEL: truncstore_v8i64_v8i32: |
| 363 | +; AVX512BWVL: # %bb.0: |
| 364 | +; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1 |
| 365 | +; AVX512BWVL-NEXT: vpmovsqd %zmm0, (%rdi) {%k1} |
| 366 | +; AVX512BWVL-NEXT: vzeroupper |
| 367 | +; AVX512BWVL-NEXT: retq |
361 | 368 | %a = icmp ne <8 x i32> %mask, zeroinitializer |
362 | 369 | %b = icmp slt <8 x i64> %x, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647> |
363 | 370 | %c = select <8 x i1> %b, <8 x i64> %x, <8 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647> |
@@ -964,9 +971,7 @@ define void @truncstore_v8i64_v8i16(<8 x i64> %x, ptr %p, <8 x i32> %mask) { |
964 | 971 | ; AVX512BWVL-LABEL: truncstore_v8i64_v8i16: |
965 | 972 | ; AVX512BWVL: # %bb.0: |
966 | 973 | ; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1 |
967 | | -; AVX512BWVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 |
968 | | -; AVX512BWVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 |
969 | | -; AVX512BWVL-NEXT: vpmovqw %zmm0, (%rdi) {%k1} |
| 974 | +; AVX512BWVL-NEXT: vpmovsqw %zmm0, (%rdi) {%k1} |
970 | 975 | ; AVX512BWVL-NEXT: vzeroupper |
971 | 976 | ; AVX512BWVL-NEXT: retq |
972 | 977 | %a = icmp ne <8 x i32> %mask, zeroinitializer |
@@ -1572,9 +1577,7 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, ptr %p, <8 x i32> %mask) { |
1572 | 1577 | ; AVX512BWVL-LABEL: truncstore_v8i64_v8i8: |
1573 | 1578 | ; AVX512BWVL: # %bb.0: |
1574 | 1579 | ; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1 |
1575 | | -; AVX512BWVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 |
1576 | | -; AVX512BWVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 |
1577 | | -; AVX512BWVL-NEXT: vpmovqb %zmm0, (%rdi) {%k1} |
| 1580 | +; AVX512BWVL-NEXT: vpmovsqb %zmm0, (%rdi) {%k1} |
1578 | 1581 | ; AVX512BWVL-NEXT: vzeroupper |
1579 | 1582 | ; AVX512BWVL-NEXT: retq |
1580 | 1583 | %a = icmp ne <8 x i32> %mask, zeroinitializer |
@@ -1788,14 +1791,21 @@ define void @truncstore_v4i64_v4i32(<4 x i64> %x, ptr %p, <4 x i32> %mask) { |
1788 | 1791 | ; AVX512-NEXT: vzeroupper |
1789 | 1792 | ; AVX512-NEXT: retq |
1790 | 1793 | ; |
1791 | | -; AVX512VL-LABEL: truncstore_v4i64_v4i32: |
1792 | | -; AVX512VL: # %bb.0: |
1793 | | -; AVX512VL-NEXT: vptestmd %xmm1, %xmm1, %k1 |
1794 | | -; AVX512VL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 |
1795 | | -; AVX512VL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 |
1796 | | -; AVX512VL-NEXT: vpmovqd %ymm0, (%rdi) {%k1} |
1797 | | -; AVX512VL-NEXT: vzeroupper |
1798 | | -; AVX512VL-NEXT: retq |
| 1794 | +; AVX512FVL-LABEL: truncstore_v4i64_v4i32: |
| 1795 | +; AVX512FVL: # %bb.0: |
| 1796 | +; AVX512FVL-NEXT: vptestmd %xmm1, %xmm1, %k1 |
| 1797 | +; AVX512FVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 |
| 1798 | +; AVX512FVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 |
| 1799 | +; AVX512FVL-NEXT: vpmovqd %ymm0, (%rdi) {%k1} |
| 1800 | +; AVX512FVL-NEXT: vzeroupper |
| 1801 | +; AVX512FVL-NEXT: retq |
| 1802 | +; |
| 1803 | +; AVX512BWVL-LABEL: truncstore_v4i64_v4i32: |
| 1804 | +; AVX512BWVL: # %bb.0: |
| 1805 | +; AVX512BWVL-NEXT: vptestmd %xmm1, %xmm1, %k1 |
| 1806 | +; AVX512BWVL-NEXT: vpmovsqd %ymm0, (%rdi) {%k1} |
| 1807 | +; AVX512BWVL-NEXT: vzeroupper |
| 1808 | +; AVX512BWVL-NEXT: retq |
1799 | 1809 | %a = icmp ne <4 x i32> %mask, zeroinitializer |
1800 | 1810 | %b = icmp slt <4 x i64> %x, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647> |
1801 | 1811 | %c = select <4 x i1> %b, <4 x i64> %x, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647> |
@@ -2141,9 +2151,7 @@ define void @truncstore_v4i64_v4i16(<4 x i64> %x, ptr %p, <4 x i32> %mask) { |
2141 | 2151 | ; AVX512BWVL-LABEL: truncstore_v4i64_v4i16: |
2142 | 2152 | ; AVX512BWVL: # %bb.0: |
2143 | 2153 | ; AVX512BWVL-NEXT: vptestmd %xmm1, %xmm1, %k1 |
2144 | | -; AVX512BWVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 |
2145 | | -; AVX512BWVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 |
2146 | | -; AVX512BWVL-NEXT: vpmovqw %ymm0, (%rdi) {%k1} |
| 2154 | +; AVX512BWVL-NEXT: vpmovsqw %ymm0, (%rdi) {%k1} |
2147 | 2155 | ; AVX512BWVL-NEXT: vzeroupper |
2148 | 2156 | ; AVX512BWVL-NEXT: retq |
2149 | 2157 | %a = icmp ne <4 x i32> %mask, zeroinitializer |
@@ -2495,9 +2503,7 @@ define void @truncstore_v4i64_v4i8(<4 x i64> %x, ptr %p, <4 x i32> %mask) { |
2495 | 2503 | ; AVX512BWVL-LABEL: truncstore_v4i64_v4i8: |
2496 | 2504 | ; AVX512BWVL: # %bb.0: |
2497 | 2505 | ; AVX512BWVL-NEXT: vptestmd %xmm1, %xmm1, %k1 |
2498 | | -; AVX512BWVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 |
2499 | | -; AVX512BWVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 |
2500 | | -; AVX512BWVL-NEXT: vpmovqb %ymm0, (%rdi) {%k1} |
| 2506 | +; AVX512BWVL-NEXT: vpmovsqb %ymm0, (%rdi) {%k1} |
2501 | 2507 | ; AVX512BWVL-NEXT: vzeroupper |
2502 | 2508 | ; AVX512BWVL-NEXT: retq |
2503 | 2509 | %a = icmp ne <4 x i32> %mask, zeroinitializer |
@@ -2641,13 +2647,19 @@ define void @truncstore_v2i64_v2i32(<2 x i64> %x, ptr %p, <2 x i64> %mask) { |
2641 | 2647 | ; AVX512-NEXT: vzeroupper |
2642 | 2648 | ; AVX512-NEXT: retq |
2643 | 2649 | ; |
2644 | | -; AVX512VL-LABEL: truncstore_v2i64_v2i32: |
2645 | | -; AVX512VL: # %bb.0: |
2646 | | -; AVX512VL-NEXT: vptestmq %xmm1, %xmm1, %k1 |
2647 | | -; AVX512VL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 |
2648 | | -; AVX512VL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 |
2649 | | -; AVX512VL-NEXT: vpmovqd %xmm0, (%rdi) {%k1} |
2650 | | -; AVX512VL-NEXT: retq |
| 2650 | +; AVX512FVL-LABEL: truncstore_v2i64_v2i32: |
| 2651 | +; AVX512FVL: # %bb.0: |
| 2652 | +; AVX512FVL-NEXT: vptestmq %xmm1, %xmm1, %k1 |
| 2653 | +; AVX512FVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 |
| 2654 | +; AVX512FVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 |
| 2655 | +; AVX512FVL-NEXT: vpmovqd %xmm0, (%rdi) {%k1} |
| 2656 | +; AVX512FVL-NEXT: retq |
| 2657 | +; |
| 2658 | +; AVX512BWVL-LABEL: truncstore_v2i64_v2i32: |
| 2659 | +; AVX512BWVL: # %bb.0: |
| 2660 | +; AVX512BWVL-NEXT: vptestmq %xmm1, %xmm1, %k1 |
| 2661 | +; AVX512BWVL-NEXT: vpmovsqd %xmm0, (%rdi) {%k1} |
| 2662 | +; AVX512BWVL-NEXT: retq |
2651 | 2663 | %a = icmp ne <2 x i64> %mask, zeroinitializer |
2652 | 2664 | %b = icmp slt <2 x i64> %x, <i64 2147483647, i64 2147483647> |
2653 | 2665 | %c = select <2 x i1> %b, <2 x i64> %x, <2 x i64> <i64 2147483647, i64 2147483647> |
@@ -2832,9 +2844,7 @@ define void @truncstore_v2i64_v2i16(<2 x i64> %x, ptr %p, <2 x i64> %mask) { |
2832 | 2844 | ; AVX512BWVL-LABEL: truncstore_v2i64_v2i16: |
2833 | 2845 | ; AVX512BWVL: # %bb.0: |
2834 | 2846 | ; AVX512BWVL-NEXT: vptestmq %xmm1, %xmm1, %k1 |
2835 | | -; AVX512BWVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 |
2836 | | -; AVX512BWVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 |
2837 | | -; AVX512BWVL-NEXT: vpmovqw %xmm0, (%rdi) {%k1} |
| 2847 | +; AVX512BWVL-NEXT: vpmovsqw %xmm0, (%rdi) {%k1} |
2838 | 2848 | ; AVX512BWVL-NEXT: retq |
2839 | 2849 | %a = icmp ne <2 x i64> %mask, zeroinitializer |
2840 | 2850 | %b = icmp slt <2 x i64> %x, <i64 32767, i64 32767> |
@@ -3018,9 +3028,7 @@ define void @truncstore_v2i64_v2i8(<2 x i64> %x, ptr %p, <2 x i64> %mask) { |
3018 | 3028 | ; AVX512BWVL-LABEL: truncstore_v2i64_v2i8: |
3019 | 3029 | ; AVX512BWVL: # %bb.0: |
3020 | 3030 | ; AVX512BWVL-NEXT: vptestmq %xmm1, %xmm1, %k1 |
3021 | | -; AVX512BWVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 |
3022 | | -; AVX512BWVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 |
3023 | | -; AVX512BWVL-NEXT: vpmovqb %xmm0, (%rdi) {%k1} |
| 3031 | +; AVX512BWVL-NEXT: vpmovsqb %xmm0, (%rdi) {%k1} |
3024 | 3032 | ; AVX512BWVL-NEXT: retq |
3025 | 3033 | %a = icmp ne <2 x i64> %mask, zeroinitializer |
3026 | 3034 | %b = icmp slt <2 x i64> %x, <i64 127, i64 127> |
@@ -3816,9 +3824,7 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, ptr %p, <16 x i32> %mask) { |
3816 | 3824 | ; AVX512BWVL-LABEL: truncstore_v16i32_v16i16: |
3817 | 3825 | ; AVX512BWVL: # %bb.0: |
3818 | 3826 | ; AVX512BWVL-NEXT: vptestmd %zmm1, %zmm1, %k1 |
3819 | | -; AVX512BWVL-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 |
3820 | | -; AVX512BWVL-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 |
3821 | | -; AVX512BWVL-NEXT: vpmovdw %zmm0, (%rdi) {%k1} |
| 3827 | +; AVX512BWVL-NEXT: vpmovsdw %zmm0, (%rdi) {%k1} |
3822 | 3828 | ; AVX512BWVL-NEXT: vzeroupper |
3823 | 3829 | ; AVX512BWVL-NEXT: retq |
3824 | 3830 | %a = icmp ne <16 x i32> %mask, zeroinitializer |
@@ -4594,9 +4600,7 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %p, <16 x i32> %mask) { |
4594 | 4600 | ; AVX512BWVL-LABEL: truncstore_v16i32_v16i8: |
4595 | 4601 | ; AVX512BWVL: # %bb.0: |
4596 | 4602 | ; AVX512BWVL-NEXT: vptestmd %zmm1, %zmm1, %k1 |
4597 | | -; AVX512BWVL-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 |
4598 | | -; AVX512BWVL-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 |
4599 | | -; AVX512BWVL-NEXT: vpmovdb %zmm0, (%rdi) {%k1} |
| 4603 | +; AVX512BWVL-NEXT: vpmovsdb %zmm0, (%rdi) {%k1} |
4600 | 4604 | ; AVX512BWVL-NEXT: vzeroupper |
4601 | 4605 | ; AVX512BWVL-NEXT: retq |
4602 | 4606 | %a = icmp ne <16 x i32> %mask, zeroinitializer |
@@ -5034,9 +5038,7 @@ define void @truncstore_v8i32_v8i16(<8 x i32> %x, ptr %p, <8 x i32> %mask) { |
5034 | 5038 | ; AVX512BWVL-LABEL: truncstore_v8i32_v8i16: |
5035 | 5039 | ; AVX512BWVL: # %bb.0: |
5036 | 5040 | ; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1 |
5037 | | -; AVX512BWVL-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 |
5038 | | -; AVX512BWVL-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 |
5039 | | -; AVX512BWVL-NEXT: vpmovdw %ymm0, (%rdi) {%k1} |
| 5041 | +; AVX512BWVL-NEXT: vpmovsdw %ymm0, (%rdi) {%k1} |
5040 | 5042 | ; AVX512BWVL-NEXT: vzeroupper |
5041 | 5043 | ; AVX512BWVL-NEXT: retq |
5042 | 5044 | %a = icmp ne <8 x i32> %mask, zeroinitializer |
@@ -5473,9 +5475,7 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, ptr %p, <8 x i32> %mask) { |
5473 | 5475 | ; AVX512BWVL-LABEL: truncstore_v8i32_v8i8: |
5474 | 5476 | ; AVX512BWVL: # %bb.0: |
5475 | 5477 | ; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1 |
5476 | | -; AVX512BWVL-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 |
5477 | | -; AVX512BWVL-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 |
5478 | | -; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rdi) {%k1} |
| 5478 | +; AVX512BWVL-NEXT: vpmovsdb %ymm0, (%rdi) {%k1} |
5479 | 5479 | ; AVX512BWVL-NEXT: vzeroupper |
5480 | 5480 | ; AVX512BWVL-NEXT: retq |
5481 | 5481 | %a = icmp ne <8 x i32> %mask, zeroinitializer |
@@ -5686,9 +5686,7 @@ define void @truncstore_v4i32_v4i16(<4 x i32> %x, ptr %p, <4 x i32> %mask) { |
5686 | 5686 | ; AVX512BWVL-LABEL: truncstore_v4i32_v4i16: |
5687 | 5687 | ; AVX512BWVL: # %bb.0: |
5688 | 5688 | ; AVX512BWVL-NEXT: vptestmd %xmm1, %xmm1, %k1 |
5689 | | -; AVX512BWVL-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 |
5690 | | -; AVX512BWVL-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 |
5691 | | -; AVX512BWVL-NEXT: vpmovdw %xmm0, (%rdi) {%k1} |
| 5689 | +; AVX512BWVL-NEXT: vpmovsdw %xmm0, (%rdi) {%k1} |
5692 | 5690 | ; AVX512BWVL-NEXT: retq |
5693 | 5691 | %a = icmp ne <4 x i32> %mask, zeroinitializer |
5694 | 5692 | %b = icmp slt <4 x i32> %x, <i32 32767, i32 32767, i32 32767, i32 32767> |
@@ -5904,9 +5902,7 @@ define void @truncstore_v4i32_v4i8(<4 x i32> %x, ptr %p, <4 x i32> %mask) { |
5904 | 5902 | ; AVX512BWVL-LABEL: truncstore_v4i32_v4i8: |
5905 | 5903 | ; AVX512BWVL: # %bb.0: |
5906 | 5904 | ; AVX512BWVL-NEXT: vptestmd %xmm1, %xmm1, %k1 |
5907 | | -; AVX512BWVL-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 |
5908 | | -; AVX512BWVL-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 |
5909 | | -; AVX512BWVL-NEXT: vpmovdb %xmm0, (%rdi) {%k1} |
| 5905 | +; AVX512BWVL-NEXT: vpmovsdb %xmm0, (%rdi) {%k1} |
5910 | 5906 | ; AVX512BWVL-NEXT: retq |
5911 | 5907 | %a = icmp ne <4 x i32> %mask, zeroinitializer |
5912 | 5908 | %b = icmp slt <4 x i32> %x, <i32 127, i32 127, i32 127, i32 127> |
@@ -7332,9 +7328,7 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) { |
7332 | 7328 | ; AVX512BWVL-LABEL: truncstore_v32i16_v32i8: |
7333 | 7329 | ; AVX512BWVL: # %bb.0: |
7334 | 7330 | ; AVX512BWVL-NEXT: vptestmb %ymm1, %ymm1, %k1 |
7335 | | -; AVX512BWVL-NEXT: vpminsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 |
7336 | | -; AVX512BWVL-NEXT: vpmaxsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 |
7337 | | -; AVX512BWVL-NEXT: vpmovwb %zmm0, (%rdi) {%k1} |
| 7331 | +; AVX512BWVL-NEXT: vpmovswb %zmm0, (%rdi) {%k1} |
7338 | 7332 | ; AVX512BWVL-NEXT: vzeroupper |
7339 | 7333 | ; AVX512BWVL-NEXT: retq |
7340 | 7334 | %a = icmp ne <32 x i8> %mask, zeroinitializer |
@@ -8083,9 +8077,7 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, ptr %p, <16 x i8> %mask) { |
8083 | 8077 | ; AVX512BWVL-LABEL: truncstore_v16i16_v16i8: |
8084 | 8078 | ; AVX512BWVL: # %bb.0: |
8085 | 8079 | ; AVX512BWVL-NEXT: vptestmb %xmm1, %xmm1, %k1 |
8086 | | -; AVX512BWVL-NEXT: vpminsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 |
8087 | | -; AVX512BWVL-NEXT: vpmaxsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 |
8088 | | -; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rdi) {%k1} |
| 8080 | +; AVX512BWVL-NEXT: vpmovswb %ymm0, (%rdi) {%k1} |
8089 | 8081 | ; AVX512BWVL-NEXT: vzeroupper |
8090 | 8082 | ; AVX512BWVL-NEXT: retq |
8091 | 8083 | %a = icmp ne <16 x i8> %mask, zeroinitializer |
@@ -8445,9 +8437,7 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, ptr %p, <8 x i16> %mask) { |
8445 | 8437 | ; AVX512BWVL-LABEL: truncstore_v8i16_v8i8: |
8446 | 8438 | ; AVX512BWVL: # %bb.0: |
8447 | 8439 | ; AVX512BWVL-NEXT: vptestmw %xmm1, %xmm1, %k1 |
8448 | | -; AVX512BWVL-NEXT: vpminsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
8449 | | -; AVX512BWVL-NEXT: vpmaxsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
8450 | | -; AVX512BWVL-NEXT: vpmovwb %xmm0, (%rdi) {%k1} |
| 8440 | +; AVX512BWVL-NEXT: vpmovswb %xmm0, (%rdi) {%k1} |
8451 | 8441 | ; AVX512BWVL-NEXT: retq |
8452 | 8442 | %a = icmp ne <8 x i16> %mask, zeroinitializer |
8453 | 8443 | %b = icmp slt <8 x i16> %x, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127> |
|
0 commit comments