|
15 | 15 | #ifndef __AVXVNNIINT16INTRIN_H |
16 | 16 | #define __AVXVNNIINT16INTRIN_H |
17 | 17 |
|
18 | | -// clang-format off |
19 | 18 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with |
20 | | -/// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate |
21 | | -/// signed 16-bit results. Sum these 2 results with the corresponding |
22 | | -/// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. |
| 19 | +/// corresponding unsigned 16-bit integers in \a __B, producing 2 |
| 20 | +/// intermediate signed 16-bit results. Sum these 2 results with the |
| 21 | +/// corresponding 32-bit integer in \a __W, and store the packed 32-bit |
| 22 | +/// results in \a dst. |
23 | 23 | /// |
24 | 24 | /// \headerfile <immintrin.h> |
25 | 25 | /// |
|
41 | 41 | /// \code{.operation} |
42 | 42 | /// FOR j := 0 to 3 |
43 | 43 | /// tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) |
44 | | -/// tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) |
| 44 | +/// tmp2.dword := |
| 45 | +/// SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) |
45 | 46 | /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 |
46 | 47 | /// ENDFOR |
47 | 48 | /// dst[MAX:128] := 0 |
48 | 49 | /// \endcode |
49 | | -// clang-format on |
50 | 50 | #define _mm_dpwsud_epi32(__W, __A, __B) \ |
51 | 51 | ((__m128i)__builtin_ia32_vpdpwsud128((__v4si)(__W), (__v8hi)(__A), \ |
52 | 52 | (__v8hu)(__B))) |
53 | 53 |
|
54 | | -// clang-format off |
55 | 54 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with |
56 | | -/// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate |
57 | | -/// signed 16-bit results. Sum these 2 results with the corresponding |
58 | | -/// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. |
| 55 | +/// corresponding unsigned 16-bit integers in \a __B, producing 2 |
| 56 | +/// intermediate signed 16-bit results. Sum these 2 results with the |
| 57 | +/// corresponding 32-bit integer in \a __W, and store the packed 32-bit |
| 58 | +/// results in \a dst. |
59 | 59 | /// |
60 | 60 | /// \headerfile <immintrin.h> |
61 | 61 | /// |
|
77 | 77 | /// \code{.operation} |
78 | 78 | /// FOR j := 0 to 7 |
79 | 79 | /// tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) |
80 | | -/// tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) |
81 | | -/// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 |
| 80 | +/// tmp2.dword := |
| 81 | +/// SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) |
| 82 | +/// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 |
82 | 83 | /// ENDFOR |
83 | 84 | /// dst[MAX:256] := 0 |
84 | 85 | /// \endcode |
85 | | -// clang-format on |
86 | 86 | #define _mm256_dpwsud_epi32(__W, __A, __B) \ |
87 | 87 | ((__m256i)__builtin_ia32_vpdpwsud256((__v8si)(__W), (__v16hi)(__A), \ |
88 | 88 | (__v16hu)(__B))) |
89 | 89 |
|
90 | | -// clang-format off |
91 | 90 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with |
92 | | -/// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate |
93 | | -/// signed 16-bit results. Sum these 2 results with the corresponding |
94 | | -/// 32-bit integer in \a __W with signed saturation, and store the packed |
95 | | -/// 32-bit results in \a dst. |
| 91 | +/// corresponding unsigned 16-bit integers in \a __B, producing 2 |
| 92 | +/// intermediate signed 16-bit results. Sum these 2 results with the |
| 93 | +/// corresponding 32-bit integer in \a __W with signed saturation, and store |
| 94 | +/// the packed 32-bit results in \a dst. |
96 | 95 | /// |
97 | 96 | /// \headerfile <immintrin.h> |
98 | 97 | /// |
|
114 | 113 | /// \code{.operation} |
115 | 114 | /// FOR j := 0 to 3 |
116 | 115 | /// tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) |
117 | | -/// tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) |
| 116 | +/// tmp2.dword := |
| 117 | +/// SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) |
118 | 118 | /// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) |
119 | 119 | /// ENDFOR |
120 | 120 | /// dst[MAX:128] := 0 |
121 | 121 | /// \endcode |
122 | | -// clang-format on |
123 | 122 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with |
124 | 123 | #define _mm_dpwsuds_epi32(__W, __A, __B) \ |
125 | 124 | ((__m128i)__builtin_ia32_vpdpwsuds128((__v4si)(__W), (__v8hi)(__A), \ |
126 | 125 | (__v8hu)(__B))) |
127 | 126 |
|
128 | | -// clang-format off |
129 | 127 | /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with |
130 | | -/// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate |
131 | | -/// signed 16-bit results. Sum these 2 results with the corresponding |
132 | | -/// 32-bit integer in \a __W with signed saturation, and store the packed |
133 | | -/// 32-bit results in \a dst. |
| 128 | +/// corresponding unsigned 16-bit integers in \a __B, producing 2 |
| 129 | +/// intermediate signed 16-bit results. Sum these 2 results with the |
| 130 | +/// corresponding 32-bit integer in \a __W with signed saturation, and store |
| 131 | +/// the packed 32-bit results in \a dst. |
134 | 132 | /// |
135 | 133 | /// \headerfile <immintrin.h> |
136 | 134 | /// |
|
152 | 150 | /// \code{.operation} |
153 | 151 | /// FOR j := 0 to 7 |
154 | 152 | /// tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) |
155 | | -/// tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) |
| 153 | +/// tmp2.dword := |
| 154 | +/// SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) |
156 | 155 | /// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) |
157 | 156 | /// ENDFOR |
158 | 157 | /// dst[MAX:256] := 0 |
159 | 158 | /// \endcode |
160 | | -// clang-format on |
161 | 159 | #define _mm256_dpwsuds_epi32(__W, __A, __B) \ |
162 | 160 | ((__m256i)__builtin_ia32_vpdpwsuds256((__v8si)(__W), (__v16hi)(__A), \ |
163 | 161 | (__v16hu)(__B))) |
164 | 162 |
|
165 | | -// clang-format off |
166 | | -/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with |
167 | | -/// corresponding signed 16-bit integers in \a __B, producing 2 intermediate |
168 | | -/// signed 16-bit results. Sum these 2 results with the corresponding |
169 | | -/// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. |
| 163 | +/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A |
| 164 | +/// with corresponding signed 16-bit integers in \a __B, producing 2 |
| 165 | +/// intermediate signed 16-bit results. Sum these 2 results with the |
| 166 | +/// corresponding 32-bit integer in \a __W, and store the packed 32-bit |
| 167 | +/// results in \a dst. |
170 | 168 | /// |
171 | 169 | /// \headerfile <immintrin.h> |
172 | 170 | /// |
|
188 | 186 | /// \code{.operation} |
189 | 187 | /// FOR j := 0 to 3 |
190 | 188 | /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) |
191 | | -/// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) |
| 189 | +/// tmp2.dword := |
| 190 | +/// ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) |
192 | 191 | /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 |
193 | 192 | /// ENDFOR |
194 | 193 | /// dst[MAX:128] := 0 |
195 | 194 | /// \endcode |
196 | | -// clang-format on |
197 | 195 | #define _mm_dpwusd_epi32(__W, __A, __B) \ |
198 | 196 | ((__m128i)__builtin_ia32_vpdpwusd128((__v4si)(__W), (__v8hu)(__A), \ |
199 | 197 | (__v8hi)(__B))) |
200 | 198 |
|
201 | | -// clang-format off |
202 | | -/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with |
203 | | -/// corresponding signed 16-bit integers in \a __B, producing 2 intermediate |
204 | | -/// signed 16-bit results. Sum these 2 results with the corresponding |
205 | | -/// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. |
| 199 | +/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A |
| 200 | +/// with corresponding signed 16-bit integers in \a __B, producing 2 |
| 201 | +/// intermediate signed 16-bit results. Sum these 2 results with the |
| 202 | +/// corresponding 32-bit integer in \a __W, and store the packed 32-bit |
| 203 | +/// results in \a dst. |
206 | 204 | /// |
207 | 205 | /// \headerfile <immintrin.h> |
208 | 206 | /// |
|
224 | 222 | /// \code{.operation} |
225 | 223 | /// FOR j := 0 to 7 |
226 | 224 | /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) |
227 | | -/// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) |
| 225 | +/// tmp2.dword := |
| 226 | +/// ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) |
228 | 227 | /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 |
229 | 228 | /// ENDFOR |
230 | 229 | /// dst[MAX:256] := 0 |
231 | 230 | /// \endcode |
232 | | -// clang-format on |
233 | 231 | #define _mm256_dpwusd_epi32(__W, __A, __B) \ |
234 | 232 | ((__m256i)__builtin_ia32_vpdpwusd256((__v8si)(__W), (__v16hu)(__A), \ |
235 | 233 | (__v16hi)(__B))) |
236 | 234 |
|
237 | | -// clang-format off |
238 | | -/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with |
239 | | -/// corresponding signed 16-bit integers in \a __B, producing 2 intermediate |
240 | | -/// signed 16-bit results. Sum these 2 results with the corresponding |
241 | | -/// 32-bit integer in \a __W with signed saturation, and store the packed |
242 | | -/// 32-bit results in \a dst. |
| 235 | +/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A |
| 236 | +/// with corresponding signed 16-bit integers in \a __B, producing 2 |
| 237 | +/// intermediate signed 16-bit results. Sum these 2 results with the |
| 238 | +/// corresponding 32-bit integer in \a __W with signed saturation, and |
| 239 | +/// store the packed 32-bit results in \a dst. |
243 | 240 | /// |
244 | 241 | /// \headerfile <immintrin.h> |
245 | 242 | /// |
|
261 | 258 | /// \code{.operation} |
262 | 259 | /// FOR j := 0 to 3 |
263 | 260 | /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) |
264 | | -/// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) |
| 261 | +/// tmp2.dword := |
| 262 | +/// ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) |
265 | 263 | /// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) |
266 | 264 | /// ENDFOR |
267 | 265 | /// dst[MAX:128] := 0 |
268 | 266 | /// \endcode |
269 | | -// clang-format on |
270 | 267 | #define _mm_dpwusds_epi32(__W, __A, __B) \ |
271 | 268 | ((__m128i)__builtin_ia32_vpdpwusds128((__v4si)(__W), (__v8hu)(__A), \ |
272 | 269 | (__v8hi)(__B))) |
273 | 270 |
|
274 | | -// clang-format off |
275 | | -/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with |
276 | | -/// corresponding signed 16-bit integers in \a __B, producing 2 intermediate |
277 | | -/// signed 16-bit results. Sum these 2 results with the corresponding |
278 | | -/// 32-bit integer in \a __W with signed saturation, and store the packed |
279 | | -/// 32-bit results in \a dst. |
| 271 | +/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A |
| 272 | +/// with corresponding signed 16-bit integers in \a __B, producing 2 |
| 273 | +/// intermediate signed 16-bit results. Sum these 2 results with the |
| 274 | +/// corresponding 32-bit integer in \a __W with signed saturation, and |
| 275 | +/// store the packed 32-bit results in \a dst. |
280 | 276 | /// |
281 | 277 | /// \headerfile <immintrin.h> |
282 | 278 | /// |
|
298 | 294 | /// \code{.operation} |
299 | 295 | /// FOR j := 0 to 7 |
300 | 296 | /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) |
301 | | -/// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) |
| 297 | +/// tmp2.dword := |
| 298 | +/// ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) |
302 | 299 | /// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) |
303 | 300 | /// ENDFOR |
304 | 301 | /// dst[MAX:256] := 0 |
305 | 302 | /// \endcode |
306 | | -// clang-format on |
307 | 303 | #define _mm256_dpwusds_epi32(__W, __A, __B) \ |
308 | 304 | ((__m256i)__builtin_ia32_vpdpwusds256((__v8si)(__W), (__v16hu)(__A), \ |
309 | 305 | (__v16hi)(__B))) |
310 | 306 |
|
311 | | -// clang-format off |
312 | | -/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with |
313 | | -/// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate |
314 | | -/// signed 16-bit results. Sum these 2 results with the corresponding |
315 | | -/// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. |
| 307 | +/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A |
| 308 | +/// with corresponding unsigned 16-bit integers in \a __B, producing 2 |
| 309 | +/// intermediate signed 16-bit results. Sum these 2 results with the |
| 310 | +/// corresponding 32-bit integer in \a __W, and store the packed 32-bit |
| 311 | +/// results in \a dst. |
316 | 312 | /// |
317 | 313 | /// \headerfile <immintrin.h> |
318 | 314 | /// |
|
334 | 330 | /// \code{.operation} |
335 | 331 | /// FOR j := 0 to 3 |
336 | 332 | /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) |
337 | | -/// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) |
| 333 | +/// tmp2.dword := |
| 334 | +/// ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) |
338 | 335 | /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 |
339 | 336 | /// ENDFOR |
340 | 337 | /// dst[MAX:128] := 0 |
341 | 338 | /// \endcode |
342 | | -// clang-format on |
343 | 339 | #define _mm_dpwuud_epi32(__W, __A, __B) \ |
344 | 340 | ((__m128i)__builtin_ia32_vpdpwuud128((__v4si)(__W), (__v8hu)(__A), \ |
345 | 341 | (__v8hu)(__B))) |
346 | 342 |
|
347 | | -// clang-format off |
348 | | -/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with |
349 | | -/// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate |
350 | | -/// signed 16-bit results. Sum these 2 results with the corresponding |
351 | | -/// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. |
| 343 | +/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A |
| 344 | +/// with corresponding unsigned 16-bit integers in \a __B, producing 2 |
| 345 | +/// intermediate signed 16-bit results. Sum these 2 results with the |
| 346 | +/// corresponding 32-bit integer in \a __W, and store the packed 32-bit |
| 347 | +/// results in \a dst. |
352 | 348 | /// |
353 | 349 | /// \headerfile <immintrin.h> |
354 | 350 | /// |
|
370 | 366 | /// \code{.operation} |
371 | 367 | /// FOR j := 0 to 7 |
372 | 368 | /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) |
373 | | -/// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) |
| 369 | +/// tmp2.dword := |
| 370 | +/// ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) |
374 | 371 | /// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 |
375 | 372 | /// ENDFOR |
376 | 373 | /// dst[MAX:256] := 0 |
377 | 374 | /// \endcode |
378 | | -// clang-format on |
379 | 375 | #define _mm256_dpwuud_epi32(__W, __A, __B) \ |
380 | 376 | ((__m256i)__builtin_ia32_vpdpwuud256((__v8si)(__W), (__v16hu)(__A), \ |
381 | 377 | (__v16hu)(__B))) |
382 | 378 |
|
383 | | -// clang-format off |
384 | | -/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with |
385 | | -/// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate |
386 | | -/// signed 16-bit results. Sum these 2 results with the corresponding |
387 | | -/// 32-bit integer in \a __W with signed saturation, and store the packed |
388 | | -/// 32-bit results in \a dst. |
| 379 | +/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A |
| 380 | +/// with corresponding unsigned 16-bit integers in \a __B, producing 2 |
| 381 | +/// intermediate signed 16-bit results. Sum these 2 results with the |
| 382 | +/// corresponding 32-bit integer in \a __W with signed saturation, and store |
| 383 | +/// the packed 32-bit results in \a dst. |
389 | 384 | /// |
390 | 385 | /// \headerfile <immintrin.h> |
391 | 386 | /// |
|
407 | 402 | /// \code{.operation} |
408 | 403 | /// FOR j := 0 to 3 |
409 | 404 | /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) |
410 | | -/// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) |
| 405 | +/// tmp2.dword := |
| 406 | +/// ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) |
411 | 407 | /// dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) |
412 | 408 | /// ENDFOR |
413 | 409 | /// dst[MAX:128] := 0 |
414 | 410 | /// \endcode |
415 | | -// clang-format on |
416 | 411 | #define _mm_dpwuuds_epi32(__W, __A, __B) \ |
417 | 412 | ((__m128i)__builtin_ia32_vpdpwuuds128((__v4si)(__W), (__v8hu)(__A), \ |
418 | 413 | (__v8hu)(__B))) |
419 | 414 |
|
420 | | -// clang-format off |
421 | | -/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with |
422 | | -/// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate |
423 | | -/// signed 16-bit results. Sum these 2 results with the corresponding |
424 | | -/// 32-bit integer in \a __W with signed saturation, and store the packed |
425 | | -/// 32-bit results in \a dst. |
| 415 | +/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A |
| 416 | +/// with corresponding unsigned 16-bit integers in \a __B, producing 2 |
| 417 | +/// intermediate signed 16-bit results. Sum these 2 results with the |
| 418 | +/// corresponding 32-bit integer in \a __W with signed saturation, and store |
| 419 | +/// the packed 32-bit results in \a dst. |
426 | 420 | /// |
427 | 421 | /// \headerfile <immintrin.h> |
428 | 422 | /// |
|
444 | 438 | /// \code{.operation} |
445 | 439 | /// FOR j := 0 to 7 |
446 | 440 | /// tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j]) |
447 | | -/// tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) |
| 441 | +/// tmp2.dword := |
| 442 | +/// ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1]) |
448 | 443 | /// dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2) |
449 | 444 | /// ENDFOR |
450 | 445 | /// dst[MAX:256] := 0 |
451 | 446 | /// \endcode |
452 | | -// clang-format on |
453 | 447 | #define _mm256_dpwuuds_epi32(__W, __A, __B) \ |
454 | 448 | ((__m256i)__builtin_ia32_vpdpwuuds256((__v8si)(__W), (__v16hu)(__A), \ |
455 | 449 | (__v16hu)(__B))) |
|
0 commit comments