|
2231 | 2231 | return F32x4(v0, v1, v2, v3); |
2232 | 2232 | #endif |
2233 | 2233 | } |
| 2234 | + inline F32x4 abs(const F32x4& value) { |
| 2235 | + #if defined(USE_SSE2) |
| 2236 | + // Mask out the negation bit to make the value positive. |
| 2237 | + return value & F32x4(DSR_FLOAT_INF); |
| 2238 | + #elif defined(USE_NEON) |
| 2239 | + return F32x4(vabsq_f32(value.v)); |
| 2240 | + #else |
| 2241 | + float v0 = value.scalars[0]; |
| 2242 | + float v1 = value.scalars[1]; |
| 2243 | + float v2 = value.scalars[2]; |
| 2244 | + float v3 = value.scalars[3]; |
| 2245 | + return F32x4( |
| 2246 | + v0 < 0.0f ? -v0 : v0, |
| 2247 | + v1 < 0.0f ? -v1 : v1, |
| 2248 | + v2 < 0.0f ? -v2 : v2, |
| 2249 | + v3 < 0.0f ? -v3 : v3 |
| 2250 | + ); |
| 2251 | + #endif |
| 2252 | + } |
2234 | 2253 | inline I32x4 operator+(const I32x4& left, const I32x4& right) { |
2235 | 2254 | #if defined(USE_BASIC_SIMD) |
2236 | 2255 | return I32x4(ADD_I32_SIMD(left.v, right.v)); |
|
2257 | 2276 | IMPL_SCALAR_REFERENCE_INFIX_4_LANES(left, right, I32x4, int32_t, *) |
2258 | 2277 | #endif |
2259 | 2278 | } |
| 2279 | + // Behaviour is undefined if taking the absolute value of the most negative value that has no corresponding positive value. |
| 2280 | + inline I32x4 abs(const I32x4& value) { |
| 2281 | + #if defined(USE_SSE2) |
| 2282 | + return I32x4(_mm_abs_epi32(value.v)); |
| 2283 | + #elif defined(USE_NEON) |
| 2284 | + return I32x4(vabsq_s32(value.v)); |
| 2285 | + #else |
| 2286 | + int32_t v0 = value.scalars[0]; |
| 2287 | + int32_t v1 = value.scalars[1]; |
| 2288 | + int32_t v2 = value.scalars[2]; |
| 2289 | + int32_t v3 = value.scalars[3]; |
| 2290 | + return I32x4( |
| 2291 | + v0 < 0.0f ? -v0 : v0, |
| 2292 | + v1 < 0.0f ? -v1 : v1, |
| 2293 | + v2 < 0.0f ? -v2 : v2, |
| 2294 | + v3 < 0.0f ? -v3 : v3 |
| 2295 | + ); |
| 2296 | + #endif |
| 2297 | + } |
2260 | 2298 | // TODO: Specify the behavior of truncated unsigned integer overflow and add it to the tests. |
2261 | 2299 | inline U32x4 operator+(const U32x4& left, const U32x4& right) { |
2262 | 2300 | #if defined(USE_BASIC_SIMD) |
|
3055 | 3093 | return F32x8(v0, v1, v2, v3, v4, v5, v6, v7); |
3056 | 3094 | #endif |
3057 | 3095 | } |
| 3096 | + inline F32x8 abs(const F32x8& value) { |
| 3097 | + #if defined(USE_SSE2) |
| 3098 | + return F32x8(_mm_abs_epi32(value.v)); |
| 3099 | + #else |
| 3100 | + float v0 = value.scalars[0]; |
| 3101 | + float v1 = value.scalars[1]; |
| 3102 | + float v2 = value.scalars[2]; |
| 3103 | + float v3 = value.scalars[3]; |
| 3104 | + float v4 = value.scalars[4]; |
| 3105 | + float v5 = value.scalars[5]; |
| 3106 | + float v6 = value.scalars[6]; |
| 3107 | + float v7 = value.scalars[7]; |
| 3108 | + return F32x8( |
| 3109 | + v0 < 0.0f ? -v0 : v0, |
| 3110 | + v1 < 0.0f ? -v1 : v1, |
| 3111 | + v2 < 0.0f ? -v2 : v2, |
| 3112 | + v3 < 0.0f ? -v3 : v3, |
| 3113 | + v4 < 0.0f ? -v4 : v4, |
| 3114 | + v5 < 0.0f ? -v5 : v5, |
| 3115 | + v6 < 0.0f ? -v6 : v6, |
| 3116 | + v7 < 0.0f ? -v7 : v7 |
| 3117 | + ); |
| 3118 | + #endif |
| 3119 | + } |
3058 | 3120 | inline I32x8 operator+(const I32x8& left, const I32x8& right) { |
3059 | 3121 | #if defined(USE_256BIT_X_SIMD) |
3060 | 3122 | return I32x8(ADD_I32_SIMD256(left.v, right.v)); |
|
3076 | 3138 | IMPL_SCALAR_REFERENCE_INFIX_8_LANES(left, right, I32x8, int32_t, *) |
3077 | 3139 | #endif |
3078 | 3140 | } |
| 3141 | + // Behaviour is undefined if taking the absolute value of the most negative value that has no corresponding positive value. |
| 3142 | + inline I32x8 abs(const I32x8& value) { |
| 3143 | + #if defined(USE_AVX2) |
| 3144 | + return I32x8(_mm256_abs_epi32(value.v)); |
| 3145 | + #else |
| 3146 | + int32_t v0 = value.scalars[0]; |
| 3147 | + int32_t v1 = value.scalars[1]; |
| 3148 | + int32_t v2 = value.scalars[2]; |
| 3149 | + int32_t v3 = value.scalars[3]; |
| 3150 | + int32_t v4 = value.scalars[4]; |
| 3151 | + int32_t v5 = value.scalars[5]; |
| 3152 | + int32_t v6 = value.scalars[6]; |
| 3153 | + int32_t v7 = value.scalars[7]; |
| 3154 | + return I32x8( |
| 3155 | + v0 < 0 ? -v0 : v0, |
| 3156 | + v1 < 0 ? -v1 : v1, |
| 3157 | + v2 < 0 ? -v2 : v2, |
| 3158 | + v3 < 0 ? -v3 : v3, |
| 3159 | + v4 < 0 ? -v4 : v4, |
| 3160 | + v5 < 0 ? -v5 : v5, |
| 3161 | + v6 < 0 ? -v6 : v6, |
| 3162 | + v7 < 0 ? -v7 : v7 |
| 3163 | + ); |
| 3164 | + #endif |
| 3165 | + } |
3079 | 3166 | inline U32x8 operator+(const U32x8& left, const U32x8& right) { |
3080 | 3167 | #if defined(USE_256BIT_X_SIMD) |
3081 | 3168 | return U32x8(ADD_U32_SIMD256(left.v, right.v)); |
|
0 commit comments