Skip to content

Commit b30eaf5

Browse files
committed
Imeplemented and tested a SIMD implementation of an absolute function.
1 parent 865253f commit b30eaf5

File tree

2 files changed

+105
-2
lines changed

2 files changed

+105
-2
lines changed

Source/DFPSR/base/simd.h

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2231,6 +2231,25 @@
22312231
return F32x4(v0, v1, v2, v3);
22322232
#endif
22332233
}
2234+
inline F32x4 abs(const F32x4& value) {
2235+
#if defined(USE_SSE2)
2236+
// Mask out the negation bit to make the value positive.
2237+
return value & F32x4(DSR_FLOAT_INF);
2238+
#elif defined(USE_NEON)
2239+
return F32x4(vabsq_f32(value.v));
2240+
#else
2241+
float v0 = value.scalars[0];
2242+
float v1 = value.scalars[1];
2243+
float v2 = value.scalars[2];
2244+
float v3 = value.scalars[3];
2245+
return F32x4(
2246+
v0 < 0.0f ? -v0 : v0,
2247+
v1 < 0.0f ? -v1 : v1,
2248+
v2 < 0.0f ? -v2 : v2,
2249+
v3 < 0.0f ? -v3 : v3
2250+
);
2251+
#endif
2252+
}
22342253
inline I32x4 operator+(const I32x4& left, const I32x4& right) {
22352254
#if defined(USE_BASIC_SIMD)
22362255
return I32x4(ADD_I32_SIMD(left.v, right.v));
@@ -2257,6 +2276,25 @@
22572276
IMPL_SCALAR_REFERENCE_INFIX_4_LANES(left, right, I32x4, int32_t, *)
22582277
#endif
22592278
}
2279+
// Behaviour is undefined if taking the absolute value of the most negative value that has no corresponding positive value.
2280+
inline I32x4 abs(const I32x4& value) {
2281+
#if defined(USE_SSE2)
2282+
return I32x4(_mm_abs_epi32(value.v));
2283+
#elif defined(USE_NEON)
2284+
return I32x4(vabsq_s32(value.v));
2285+
#else
2286+
int32_t v0 = value.scalars[0];
2287+
int32_t v1 = value.scalars[1];
2288+
int32_t v2 = value.scalars[2];
2289+
int32_t v3 = value.scalars[3];
2290+
return I32x4(
2291+
v0 < 0.0f ? -v0 : v0,
2292+
v1 < 0.0f ? -v1 : v1,
2293+
v2 < 0.0f ? -v2 : v2,
2294+
v3 < 0.0f ? -v3 : v3
2295+
);
2296+
#endif
2297+
}
22602298
// TODO: Specify the behavior of truncated unsigned integer overflow and add it to the tests.
22612299
inline U32x4 operator+(const U32x4& left, const U32x4& right) {
22622300
#if defined(USE_BASIC_SIMD)
@@ -3055,6 +3093,30 @@
30553093
return F32x8(v0, v1, v2, v3, v4, v5, v6, v7);
30563094
#endif
30573095
}
3096+
inline F32x8 abs(const F32x8& value) {
3097+
#if defined(USE_SSE2)
3098+
return F32x8(_mm_abs_epi32(value.v));
3099+
#else
3100+
float v0 = value.scalars[0];
3101+
float v1 = value.scalars[1];
3102+
float v2 = value.scalars[2];
3103+
float v3 = value.scalars[3];
3104+
float v4 = value.scalars[4];
3105+
float v5 = value.scalars[5];
3106+
float v6 = value.scalars[6];
3107+
float v7 = value.scalars[7];
3108+
return F32x8(
3109+
v0 < 0.0f ? -v0 : v0,
3110+
v1 < 0.0f ? -v1 : v1,
3111+
v2 < 0.0f ? -v2 : v2,
3112+
v3 < 0.0f ? -v3 : v3,
3113+
v4 < 0.0f ? -v4 : v4,
3114+
v5 < 0.0f ? -v5 : v5,
3115+
v6 < 0.0f ? -v6 : v6,
3116+
v7 < 0.0f ? -v7 : v7
3117+
);
3118+
#endif
3119+
}
30583120
inline I32x8 operator+(const I32x8& left, const I32x8& right) {
30593121
#if defined(USE_256BIT_X_SIMD)
30603122
return I32x8(ADD_I32_SIMD256(left.v, right.v));
@@ -3076,6 +3138,31 @@
30763138
IMPL_SCALAR_REFERENCE_INFIX_8_LANES(left, right, I32x8, int32_t, *)
30773139
#endif
30783140
}
3141+
// Behaviour is undefined if taking the absolute value of the most negative value that has no corresponding positive value.
3142+
inline I32x8 abs(const I32x8& value) {
3143+
#if defined(USE_AVX2)
3144+
return I32x8(_mm256_abs_epi32(value.v));
3145+
#else
3146+
int32_t v0 = value.scalars[0];
3147+
int32_t v1 = value.scalars[1];
3148+
int32_t v2 = value.scalars[2];
3149+
int32_t v3 = value.scalars[3];
3150+
int32_t v4 = value.scalars[4];
3151+
int32_t v5 = value.scalars[5];
3152+
int32_t v6 = value.scalars[6];
3153+
int32_t v7 = value.scalars[7];
3154+
return I32x8(
3155+
v0 < 0 ? -v0 : v0,
3156+
v1 < 0 ? -v1 : v1,
3157+
v2 < 0 ? -v2 : v2,
3158+
v3 < 0 ? -v3 : v3,
3159+
v4 < 0 ? -v4 : v4,
3160+
v5 < 0 ? -v5 : v5,
3161+
v6 < 0 ? -v6 : v6,
3162+
v7 < 0 ? -v7 : v7
3163+
);
3164+
#endif
3165+
}
30793166
inline U32x8 operator+(const U32x8& left, const U32x8& right) {
30803167
#if defined(USE_256BIT_X_SIMD)
30813168
return U32x8(ADD_U32_SIMD256(left.v, right.v));

Source/test/tests/SimdTest.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
#include "../../DFPSR/base/simd.h"
44
#include "../../DFPSR/base/endian.h"
55

6-
// TODO: Write tests for the abs function in noSimd.h, using SIMD vectors.
7-
// Implement the abs function directly to override the template functoin when hardware is available for the vector type.
86
// TODO: Set up a test where SIMD is disabled to force using the reference implementation.
97
// TODO: Keep the reference implementation alongside the SIMD types during brute-force testing with millions of random inputs.
108

@@ -1076,6 +1074,24 @@ START_TEST(Simd)
10761074
ASSERT_EQUAL_SIMD(max(F32x4(1.1f, 2.2f, 3.3f, 4.4f), F32x4(5.0f, 3.0f, 1.0f, -1.0f)), F32x4(5.0f, 3.0f, 3.3f, 4.4f));
10771075
ASSERT_EQUAL_SIMD(max(F32x8(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f), F32x8(5.0f, 3.0f, 1.0f, -1.0f, 4.0f, 5.0f, -2.5f, 10.0f)), F32x8(5.0f, 3.0f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 10.0f));
10781076

1077+
// Absolute
1078+
ASSERT_EQUAL_SIMD(
1079+
abs(F32x4(1.1f,-2.2f, 3.3f,-4.4f)),
1080+
F32x4(1.1f, 2.2f, 3.3f, 4.4f)
1081+
);
1082+
ASSERT_EQUAL_SIMD(
1083+
abs(F32x8(1.1f,-2.2f,-3.3f, 4.4f, 5.5f,-6.6f,-7.7f,-8.8f)),
1084+
F32x8(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f)
1085+
);
1086+
ASSERT_EQUAL_SIMD(
1087+
abs(I32x4(1,-2, 3,-4)),
1088+
I32x4(1, 2, 3, 4)
1089+
);
1090+
ASSERT_EQUAL_SIMD(
1091+
abs(I32x8(1,-2,-3, 4, 5,-6,-7,-8)),
1092+
I32x8(1, 2, 3, 4, 5, 6, 7, 8)
1093+
);
1094+
10791095
// Clamp
10801096
ASSERT_EQUAL_SIMD(clamp(F32x4(-1.5f), F32x4(-35.1f, 1.0f, 2.0f, 45.7f), F32x4(1.5f)), F32x4(-1.5f, 1.0f, 1.5f, 1.5f));
10811097
ASSERT_EQUAL_SIMD(clampUpper(F32x4(-35.1f, 1.0f, 2.0f, 45.7f), F32x4(1.5f)), F32x4(-35.1f, 1.0f, 1.5f, 1.5f));

0 commit comments

Comments
 (0)