Commit bec726f
authored
[X86] optimize ssse3 horizontal saturating add/sub (#169591)
Currently LLVM fails to recognize a manual implementation of `phadd`
https://godbolt.org/z/zozrssaWb
```llvm
declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>)
define <8 x i16> @phaddsw_v8i16_intrinsic(<8 x i16> %a, <8 x i16> %b) {
entry:
%res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %res
}
define <8 x i16> @phaddsw_v8i16_generic(<8 x i16> %a, <8 x i16> %b) {
entry:
%even = shufflevector <8 x i16> %a, <8 x i16> %b,
<8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%odd = shufflevector <8 x i16> %a, <8 x i16> %b,
<8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
%sum = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %even, <8 x i16> %odd)
ret <8 x i16> %sum
}
```
```asm
phaddsw_v8i16_intrinsic: # @phaddsw_v8i16_intrinsic
phaddsw xmm0, xmm1
ret
phaddsw_v8i16_generic: # @phaddsw_v8i16_generic
movdqa xmm2, xmmword ptr [rip + .LCPI1_0] # xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
movdqa xmm3, xmm1
pshufb xmm3, xmm2
movdqa xmm4, xmm0
pshufb xmm4, xmm2
punpcklqdq xmm4, xmm3 # xmm4 = xmm4[0],xmm3[0]
psrad xmm1, 16
psrad xmm0, 16
packssdw xmm0, xmm1
paddsw xmm0, xmm4
ret
```
This PR does recognize the pattern.1 parent 0dbedd1 commit bec726f
File tree
6 files changed
+150
-24
lines changed- llvm
- lib/Target/X86
- test/CodeGen/X86
6 files changed
+150
-24
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
2668 | 2668 | | |
2669 | 2669 | | |
2670 | 2670 | | |
| 2671 | + | |
| 2672 | + | |
2671 | 2673 | | |
2672 | 2674 | | |
2673 | 2675 | | |
| |||
8151 | 8153 | | |
8152 | 8154 | | |
8153 | 8155 | | |
| 8156 | + | |
| 8157 | + | |
8154 | 8158 | | |
8155 | 8159 | | |
8156 | 8160 | | |
| |||
35121 | 35125 | | |
35122 | 35126 | | |
35123 | 35127 | | |
| 35128 | + | |
| 35129 | + | |
35124 | 35130 | | |
35125 | 35131 | | |
35126 | 35132 | | |
| |||
40897 | 40903 | | |
40898 | 40904 | | |
40899 | 40905 | | |
40900 | | - | |
40901 | | - | |
| 40906 | + | |
| 40907 | + | |
| 40908 | + | |
40902 | 40909 | | |
40903 | 40910 | | |
40904 | 40911 | | |
| |||
54231 | 54238 | | |
54232 | 54239 | | |
54233 | 54240 | | |
54234 | | - | |
| 54241 | + | |
| 54242 | + | |
| 54243 | + | |
54235 | 54244 | | |
54236 | 54245 | | |
54237 | 54246 | | |
| |||
54261 | 54270 | | |
54262 | 54271 | | |
54263 | 54272 | | |
54264 | | - | |
54265 | | - | |
| 54273 | + | |
| 54274 | + | |
| 54275 | + | |
| 54276 | + | |
| 54277 | + | |
| 54278 | + | |
| 54279 | + | |
54266 | 54280 | | |
54267 | 54281 | | |
54268 | | - | |
| 54282 | + | |
| 54283 | + | |
54269 | 54284 | | |
54270 | 54285 | | |
54271 | 54286 | | |
| |||
61052 | 61067 | | |
61053 | 61068 | | |
61054 | 61069 | | |
| 61070 | + | |
| 61071 | + | |
61055 | 61072 | | |
61056 | 61073 | | |
61057 | 61074 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
270 | 270 | | |
271 | 271 | | |
272 | 272 | | |
| 273 | + | |
| 274 | + | |
| 275 | + | |
| 276 | + | |
273 | 277 | | |
274 | 278 | | |
275 | 279 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
71 | 71 | | |
72 | 72 | | |
73 | 73 | | |
| 74 | + | |
| 75 | + | |
74 | 76 | | |
75 | 77 | | |
76 | 78 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
4864 | 4864 | | |
4865 | 4865 | | |
4866 | 4866 | | |
4867 | | - | |
4868 | | - | |
4869 | | - | |
4870 | | - | |
4871 | | - | |
4872 | | - | |
| 4867 | + | |
| 4868 | + | |
| 4869 | + | |
| 4870 | + | |
| 4871 | + | |
| 4872 | + | |
4873 | 4873 | | |
4874 | 4874 | | |
4875 | 4875 | | |
| |||
4907 | 4907 | | |
4908 | 4908 | | |
4909 | 4909 | | |
4910 | | - | |
4911 | | - | |
4912 | | - | |
4913 | | - | |
4914 | | - | |
4915 | | - | |
| 4910 | + | |
| 4911 | + | |
| 4912 | + | |
| 4913 | + | |
| 4914 | + | |
| 4915 | + | |
4916 | 4916 | | |
4917 | 4917 | | |
4918 | 4918 | | |
| |||
4935 | 4935 | | |
4936 | 4936 | | |
4937 | 4937 | | |
4938 | | - | |
4939 | | - | |
4940 | | - | |
4941 | | - | |
4942 | | - | |
4943 | | - | |
| 4938 | + | |
| 4939 | + | |
| 4940 | + | |
| 4941 | + | |
4944 | 4942 | | |
4945 | 4943 | | |
4946 | 4944 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
724 | 724 | | |
725 | 725 | | |
726 | 726 | | |
| 727 | + | |
727 | 728 | | |
728 | 729 | | |
| 730 | + | |
729 | 731 | | |
730 | 732 | | |
731 | 733 | | |
| |||
2017 | 2019 | | |
2018 | 2020 | | |
2019 | 2021 | | |
| 2022 | + | |
2020 | 2023 | | |
2021 | 2024 | | |
2022 | 2025 | | |
2023 | 2026 | | |
2024 | 2027 | | |
| 2028 | + | |
2025 | 2029 | | |
2026 | 2030 | | |
2027 | 2031 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
| 1 | + | |
| 2 | + | |
| 3 | + | |
| 4 | + | |
| 5 | + | |
| 6 | + | |
| 7 | + | |
| 8 | + | |
| 9 | + | |
| 10 | + | |
| 11 | + | |
| 12 | + | |
| 13 | + | |
| 14 | + | |
| 15 | + | |
| 16 | + | |
| 17 | + | |
| 18 | + | |
| 19 | + | |
| 20 | + | |
| 21 | + | |
| 22 | + | |
| 23 | + | |
| 24 | + | |
| 25 | + | |
| 26 | + | |
| 27 | + | |
| 28 | + | |
| 29 | + | |
| 30 | + | |
| 31 | + | |
| 32 | + | |
| 33 | + | |
| 34 | + | |
| 35 | + | |
| 36 | + | |
| 37 | + | |
| 38 | + | |
| 39 | + | |
| 40 | + | |
| 41 | + | |
| 42 | + | |
| 43 | + | |
| 44 | + | |
| 45 | + | |
| 46 | + | |
| 47 | + | |
| 48 | + | |
| 49 | + | |
| 50 | + | |
| 51 | + | |
| 52 | + | |
| 53 | + | |
| 54 | + | |
| 55 | + | |
| 56 | + | |
| 57 | + | |
| 58 | + | |
| 59 | + | |
| 60 | + | |
| 61 | + | |
| 62 | + | |
| 63 | + | |
| 64 | + | |
| 65 | + | |
| 66 | + | |
| 67 | + | |
| 68 | + | |
| 69 | + | |
| 70 | + | |
| 71 | + | |
| 72 | + | |
| 73 | + | |
| 74 | + | |
| 75 | + | |
| 76 | + | |
| 77 | + | |
| 78 | + | |
| 79 | + | |
| 80 | + | |
| 81 | + | |
| 82 | + | |
| 83 | + | |
| 84 | + | |
| 85 | + | |
| 86 | + | |
| 87 | + | |
| 88 | + | |
| 89 | + | |
| 90 | + | |
| 91 | + | |
| 92 | + | |
| 93 | + | |
| 94 | + | |
| 95 | + | |
| 96 | + | |
| 97 | + | |
| 98 | + | |
| 99 | + | |
| 100 | + | |
| 101 | + | |
0 commit comments