|
14 | 14 |
|
15 | 15 | #define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \ |
16 | 16 | DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \ |
17 | | - return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y)); \ |
| 17 | + return (RET_TYPE##2)(FUNCTION(x.s0), FUNCTION(x.s1)); \ |
18 | 18 | } \ |
19 | 19 | \ |
20 | 20 | DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) { \ |
21 | | - return (RET_TYPE##3)(FUNCTION(x.x), FUNCTION(x.y), FUNCTION(x.z)); \ |
| 21 | + return (RET_TYPE##3)(FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2)); \ |
22 | 22 | } \ |
23 | 23 | \ |
24 | 24 | DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x) { \ |
25 | | - return (RET_TYPE##4)(FUNCTION(x.lo), FUNCTION(x.hi)); \ |
| 25 | + return (RET_TYPE##4)(FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), \ |
| 26 | + FUNCTION(x.s3)); \ |
26 | 27 | } \ |
27 | 28 | \ |
28 | 29 | DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x) { \ |
29 | | - return (RET_TYPE##8)(FUNCTION(x.lo), FUNCTION(x.hi)); \ |
| 30 | + return (RET_TYPE##8)(FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), \ |
| 31 | + FUNCTION(x.s3), FUNCTION(x.s4), FUNCTION(x.s5), \ |
| 32 | + FUNCTION(x.s6), FUNCTION(x.s7)); \ |
30 | 33 | } \ |
31 | 34 | \ |
32 | 35 | DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x) { \ |
33 | | - return (RET_TYPE##16)(FUNCTION(x.lo), FUNCTION(x.hi)); \ |
| 36 | + return (RET_TYPE##16)( \ |
| 37 | + FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), FUNCTION(x.s3), \ |
| 38 | + FUNCTION(x.s4), FUNCTION(x.s5), FUNCTION(x.s6), FUNCTION(x.s7), \ |
| 39 | + FUNCTION(x.s8), FUNCTION(x.s9), FUNCTION(x.sa), FUNCTION(x.sb), \ |
| 40 | + FUNCTION(x.sc), FUNCTION(x.sd), FUNCTION(x.se), FUNCTION(x.sf)); \ |
34 | 41 | } |
35 | 42 |
|
36 | 43 | #define _CLC_BINARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \ |
37 | 44 | ARG2_TYPE) \ |
38 | 45 | DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y) { \ |
39 | | - return (RET_TYPE##2)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y)); \ |
| 46 | + return (RET_TYPE##2)(FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1)); \ |
40 | 47 | } \ |
41 | 48 | \ |
42 | 49 | DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y) { \ |
43 | | - return (RET_TYPE##3)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y), \ |
44 | | - FUNCTION(x.z, y.z)); \ |
| 50 | + return (RET_TYPE##3)(FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \ |
| 51 | + FUNCTION(x.s2, y.s2)); \ |
45 | 52 | } \ |
46 | 53 | \ |
47 | 54 | DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y) { \ |
48 | | - return (RET_TYPE##4)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \ |
| 55 | + return (RET_TYPE##4)(FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \ |
| 56 | + FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3)); \ |
49 | 57 | } \ |
50 | 58 | \ |
51 | 59 | DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y) { \ |
52 | | - return (RET_TYPE##8)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \ |
| 60 | + return (RET_TYPE##8)(FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \ |
| 61 | + FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3), \ |
| 62 | + FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), \ |
| 63 | + FUNCTION(x.s6, y.s6), FUNCTION(x.s7, y.s7)); \ |
53 | 64 | } \ |
54 | 65 | \ |
55 | 66 | DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y) { \ |
56 | | - return (RET_TYPE##16)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \ |
| 67 | + return (RET_TYPE##16)( \ |
| 68 | + FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), FUNCTION(x.s2, y.s2), \ |
| 69 | + FUNCTION(x.s3, y.s3), FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), \ |
| 70 | + FUNCTION(x.s6, y.s6), FUNCTION(x.s7, y.s7), FUNCTION(x.s8, y.s8), \ |
| 71 | + FUNCTION(x.s9, y.s9), FUNCTION(x.sa, y.sa), FUNCTION(x.sb, y.sb), \ |
| 72 | + FUNCTION(x.sc, y.sc), FUNCTION(x.sd, y.sd), FUNCTION(x.se, y.se), \ |
| 73 | + FUNCTION(x.sf, y.sf)); \ |
57 | 74 | } |
58 | 75 |
|
59 | 76 | #define _CLC_V_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \ |
60 | 77 | ARG2_TYPE) \ |
61 | 78 | DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE##2 y) { \ |
62 | | - return (RET_TYPE##2)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \ |
| 79 | + return (RET_TYPE##2)(FUNCTION(x, y.s0), FUNCTION(x, y.s1)); \ |
63 | 80 | } \ |
64 | 81 | \ |
65 | 82 | DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE x, ARG2_TYPE##3 y) { \ |
66 | | - return (RET_TYPE##3)(FUNCTION(x, y.x), FUNCTION(x, y.y), \ |
67 | | - FUNCTION(x, y.z)); \ |
| 83 | + return (RET_TYPE##3)(FUNCTION(x, y.s0), FUNCTION(x, y.s1), \ |
| 84 | + FUNCTION(x, y.s2)); \ |
68 | 85 | } \ |
69 | 86 | \ |
70 | 87 | DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE x, ARG2_TYPE##4 y) { \ |
71 | | - return (RET_TYPE##4)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \ |
| 88 | + return (RET_TYPE##4)(FUNCTION(x, y.s0), FUNCTION(x, y.s1), \ |
| 89 | + FUNCTION(x, y.s2), FUNCTION(x, y.s3)); \ |
72 | 90 | } \ |
73 | 91 | \ |
74 | 92 | DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE x, ARG2_TYPE##8 y) { \ |
75 | | - return (RET_TYPE##8)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \ |
| 93 | + return (RET_TYPE##8)(FUNCTION(x, y.s0), FUNCTION(x, y.s1), \ |
| 94 | + FUNCTION(x, y.s2), FUNCTION(x, y.s3), \ |
| 95 | + FUNCTION(x, y.s4), FUNCTION(x, y.s5), \ |
| 96 | + FUNCTION(x, y.s6), FUNCTION(x, y.s7)); \ |
76 | 97 | } \ |
77 | 98 | \ |
78 | 99 | DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE x, ARG2_TYPE##16 y) { \ |
79 | | - return (RET_TYPE##16)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \ |
| 100 | + return (RET_TYPE##16)( \ |
| 101 | + FUNCTION(x, y.s0), FUNCTION(x, y.s1), FUNCTION(x, y.s2), \ |
| 102 | + FUNCTION(x, y.s3), FUNCTION(x, y.s4), FUNCTION(x, y.s5), \ |
| 103 | + FUNCTION(x, y.s6), FUNCTION(x, y.s7), FUNCTION(x, y.s8), \ |
| 104 | + FUNCTION(x, y.s9), FUNCTION(x, y.sa), FUNCTION(x, y.sb), \ |
| 105 | + FUNCTION(x, y.sc), FUNCTION(x, y.sd), FUNCTION(x, y.se), \ |
| 106 | + FUNCTION(x, y.sf)); \ |
80 | 107 | } |
81 | 108 |
|
82 | 109 | #define _CLC_TERNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \ |
83 | 110 | ARG2_TYPE, ARG3_TYPE) \ |
84 | 111 | DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y, \ |
85 | 112 | ARG3_TYPE##2 z) { \ |
86 | | - return (RET_TYPE##2)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y)); \ |
| 113 | + return (RET_TYPE##2)(FUNCTION(x.s0, y.s0, z.s0), \ |
| 114 | + FUNCTION(x.s1, y.s1, z.s1)); \ |
87 | 115 | } \ |
88 | 116 | \ |
89 | 117 | DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y, \ |
90 | 118 | ARG3_TYPE##3 z) { \ |
91 | | - return (RET_TYPE##3)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y), \ |
92 | | - FUNCTION(x.z, y.z, z.z)); \ |
| 119 | + return (RET_TYPE##3)(FUNCTION(x.s0, y.s0, z.s0), \ |
| 120 | + FUNCTION(x.s1, y.s1, z.s1), \ |
| 121 | + FUNCTION(x.s2, y.s2, z.s2)); \ |
93 | 122 | } \ |
94 | 123 | \ |
95 | 124 | DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y, \ |
96 | 125 | ARG3_TYPE##4 z) { \ |
97 | | - return (RET_TYPE##4)(FUNCTION(x.lo, y.lo, z.lo), \ |
98 | | - FUNCTION(x.hi, y.hi, z.hi)); \ |
| 126 | + return (RET_TYPE##4)( \ |
| 127 | + FUNCTION(x.s0, y.s0, z.s0), FUNCTION(x.s1, y.s1, z.s1), \ |
| 128 | + FUNCTION(x.s2, y.s2, z.s2), FUNCTION(x.s3, y.s3, z.s3)); \ |
99 | 129 | } \ |
100 | 130 | \ |
101 | 131 | DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y, \ |
102 | 132 | ARG3_TYPE##8 z) { \ |
103 | | - return (RET_TYPE##8)(FUNCTION(x.lo, y.lo, z.lo), \ |
104 | | - FUNCTION(x.hi, y.hi, z.hi)); \ |
| 133 | + return (RET_TYPE##8)( \ |
| 134 | + FUNCTION(x.s0, y.s0, z.s0), FUNCTION(x.s1, y.s1, z.s1), \ |
| 135 | + FUNCTION(x.s2, y.s2, z.s2), FUNCTION(x.s3, y.s3, z.s3), \ |
| 136 | + FUNCTION(x.s4, y.s4, z.s4), FUNCTION(x.s5, y.s5, z.s5), \ |
| 137 | + FUNCTION(x.s6, y.s6, z.s6), FUNCTION(x.s7, y.s7, z.s7)); \ |
105 | 138 | } \ |
106 | 139 | \ |
107 | 140 | DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y, \ |
108 | 141 | ARG3_TYPE##16 z) { \ |
109 | | - return (RET_TYPE##16)(FUNCTION(x.lo, y.lo, z.lo), \ |
110 | | - FUNCTION(x.hi, y.hi, z.hi)); \ |
| 142 | + return (RET_TYPE##16)( \ |
| 143 | + FUNCTION(x.s0, y.s0, z.s0), FUNCTION(x.s1, y.s1, z.s1), \ |
| 144 | + FUNCTION(x.s2, y.s2, z.s2), FUNCTION(x.s3, y.s3, z.s3), \ |
| 145 | + FUNCTION(x.s4, y.s4, z.s4), FUNCTION(x.s5, y.s5, z.s5), \ |
| 146 | + FUNCTION(x.s6, y.s6, z.s6), FUNCTION(x.s7, y.s7, z.s7), \ |
| 147 | + FUNCTION(x.s8, y.s8, z.s8), FUNCTION(x.s9, y.s9, z.s9), \ |
| 148 | + FUNCTION(x.sa, y.sa, z.sa), FUNCTION(x.sb, y.sb, z.sb), \ |
| 149 | + FUNCTION(x.sc, y.sc, z.sc), FUNCTION(x.sd, y.sd, z.sd), \ |
| 150 | + FUNCTION(x.se, y.se, z.se), FUNCTION(x.sf, y.sf, z.sf)); \ |
111 | 151 | } |
112 | 152 |
|
113 | 153 | #define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \ |
114 | 154 | ADDR_SPACE, ARG2_TYPE) \ |
115 | 155 | DECLSPEC __CLC_XCONCAT(RET_TYPE, 2) \ |
116 | 156 | FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 2) x, \ |
117 | 157 | ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) * y) { \ |
118 | | - return (__CLC_XCONCAT(RET_TYPE, 2))( \ |
119 | | - FUNCTION(x.x, (ADDR_SPACE ARG2_TYPE *)y), \ |
120 | | - FUNCTION(x.y, \ |
121 | | - (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 1))); \ |
| 158 | + ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ |
| 159 | + return (__CLC_XCONCAT(RET_TYPE, 2))(FUNCTION(x.s0, ptr), \ |
| 160 | + FUNCTION(x.s1, ptr + 1)); \ |
122 | 161 | } \ |
123 | 162 | \ |
124 | 163 | DECLSPEC __CLC_XCONCAT(RET_TYPE, 3) \ |
125 | 164 | FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 3) x, \ |
126 | 165 | ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 3) * y) { \ |
127 | | - return (__CLC_XCONCAT(RET_TYPE, 3))( \ |
128 | | - FUNCTION(x.x, (ADDR_SPACE ARG2_TYPE *)y), \ |
129 | | - FUNCTION(x.y, \ |
130 | | - (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 1)), \ |
131 | | - FUNCTION(x.z, \ |
132 | | - (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 2))); \ |
| 166 | + ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ |
| 167 | + return (__CLC_XCONCAT(RET_TYPE, 3))(FUNCTION(x.s0, ptr), \ |
| 168 | + FUNCTION(x.s1, ptr + 1), \ |
| 169 | + FUNCTION(x.s2, ptr + 2)); \ |
133 | 170 | } \ |
134 | 171 | \ |
135 | 172 | DECLSPEC __CLC_XCONCAT(RET_TYPE, 4) \ |
136 | 173 | FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 4) x, \ |
137 | 174 | ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) * y) { \ |
| 175 | + ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ |
138 | 176 | return (__CLC_XCONCAT(RET_TYPE, 4))( \ |
139 | | - FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) *)y), \ |
140 | | - FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \ |
141 | | - ARG2_TYPE, 2) *)((ADDR_SPACE ARG2_TYPE *)y + 2))); \ |
| 177 | + FUNCTION(x.s0, ptr), FUNCTION(x.s1, ptr + 1), FUNCTION(x.s2, ptr + 2), \ |
| 178 | + FUNCTION(x.s3, ptr + 3)); \ |
142 | 179 | } \ |
143 | 180 | \ |
144 | 181 | DECLSPEC __CLC_XCONCAT(RET_TYPE, 8) \ |
145 | 182 | FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 8) x, \ |
146 | 183 | ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) * y) { \ |
| 184 | + ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ |
147 | 185 | return (__CLC_XCONCAT(RET_TYPE, 8))( \ |
148 | | - FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) *)y), \ |
149 | | - FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \ |
150 | | - ARG2_TYPE, 4) *)((ADDR_SPACE ARG2_TYPE *)y + 4))); \ |
| 186 | + FUNCTION(x.s0, ptr), FUNCTION(x.s1, ptr + 1), FUNCTION(x.s2, ptr + 2), \ |
| 187 | + FUNCTION(x.s3, ptr + 3), FUNCTION(x.s4, ptr + 4), \ |
| 188 | + FUNCTION(x.s5, ptr + 5), FUNCTION(x.s6, ptr + 6), \ |
| 189 | + FUNCTION(x.s7, ptr + 7)); \ |
151 | 190 | } \ |
152 | 191 | \ |
153 | 192 | DECLSPEC __CLC_XCONCAT(RET_TYPE, 16) \ |
154 | 193 | FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 16) x, \ |
155 | 194 | ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 16) * y) { \ |
| 195 | + ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ |
156 | 196 | return (__CLC_XCONCAT(RET_TYPE, 16))( \ |
157 | | - FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) *)y), \ |
158 | | - FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \ |
159 | | - ARG2_TYPE, 8) *)((ADDR_SPACE ARG2_TYPE *)y + 8))); \ |
| 197 | + FUNCTION(x.s0, ptr), FUNCTION(x.s1, ptr + 1), FUNCTION(x.s2, ptr + 2), \ |
| 198 | + FUNCTION(x.s3, ptr + 3), FUNCTION(x.s4, ptr + 4), \ |
| 199 | + FUNCTION(x.s5, ptr + 5), FUNCTION(x.s6, ptr + 6), \ |
| 200 | + FUNCTION(x.s7, ptr + 7), FUNCTION(x.s8, ptr + 8), \ |
| 201 | + FUNCTION(x.s9, ptr + 9), FUNCTION(x.sa, ptr + 10), \ |
| 202 | + FUNCTION(x.sb, ptr + 11), FUNCTION(x.sc, ptr + 12), \ |
| 203 | + FUNCTION(x.sd, ptr + 13), FUNCTION(x.se, ptr + 14), \ |
| 204 | + FUNCTION(x.sf, ptr + 15)); \ |
160 | 205 | } |
161 | 206 |
|
162 | 207 | #define _CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, \ |
|
0 commit comments