Skip to content

Commit ac1634d

Browse files
committed
Extended bruteforce SIMD test to basic floating-point operations.
1 parent 12b9dfb commit ac1634d

File tree

1 file changed

+43
-15
lines changed

1 file changed

+43
-15
lines changed

Source/test/tests/BruteSimdTest.cpp

Lines changed: 43 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,24 @@
66

77
static const intptr_t ITERATIONS = 1000000;
88

9+
template<typename T>
10+
T generate(RandomGenerator &generator) {
11+
return (T)random_generate_U64(generator);
12+
}
13+
template<>
14+
float generate<float>(RandomGenerator &generator) {
15+
// Too big floats will fail from not having enough precision, so this random generator is limited within -1000.0f to 1000.0f.
16+
int32_t fractions = random_generate_range(generator, -1000000, 1000000);
17+
return float(fractions) * 0.001f;
18+
}
19+
20+
template<typename T>
21+
bool somewhatEqual(const T &a, const T &b) {
22+
double da = double(a);
23+
double db = double(b);
24+
return (da < db + 0.0001) && (da > db - 0.0001);
25+
}
26+
927
template<typename S_IN, typename S_OUT, typename V_IN, typename V_OUT>
1028
void unaryEquivalent (
1129
const TemporaryCallback<S_OUT(const S_IN &a)> &scalarOp,
@@ -23,7 +41,7 @@ void unaryEquivalent (
2341
// Generate random input.
2442
ALIGN_BYTES(sizeof(V_IN)) S_IN inputA[laneCount];
2543
for (intptr_t lane = 0; lane < laneCount; lane++) {
26-
inputA[lane] = (S_IN)random_generate_U64(generator);
44+
inputA[lane] = generate<S_IN>(generator);
2745
}
2846
// Execute scalar operation for all lanes.
2947
ALIGN_BYTES(sizeof(V_OUT)) S_OUT scalarResult[laneCount];
@@ -33,16 +51,17 @@ void unaryEquivalent (
3351
// Execute SIMD operation with all lanes at the same time.
3452
V_IN simdInputA = V_IN::readAlignedUnsafe(inputA);
3553
V_OUT simdOutput = simdOp(simdInputA);
36-
ALIGN_BYTES(sizeof(V_OUT)) S_OUT simdResult[laneCount];
37-
simdOutput.writeAlignedUnsafe(simdResult);
54+
ALIGN_BYTES(sizeof(V_OUT)) S_OUT vectorResult[laneCount];
55+
simdOutput.writeAlignedUnsafe(vectorResult);
3856
// Compare results.
3957
for (intptr_t lane = 0; lane < laneCount; lane++) {
40-
// TODO: Handle tolerance margins for floating-point elements.
41-
if (scalarResult[lane] != simdResult[lane]) {
42-
printText(U"\nWrong result at lane ", lane, U" in 0..", laneCount - 1, U" at iteration ", iteration, U" of ", testName, U"!\n");
58+
if (!somewhatEqual(scalarResult[lane], vectorResult[lane])) {
59+
printText(U"\n_______________________________ FAIL _______________________________\n");
60+
printText(U"Wrong result at lane ", lane, U" of 0..", laneCount - 1, U" at iteration ", iteration, U" of ", testName, U"!\n");
4361
printText(U"Input: ", inputA[lane], U"\n");
4462
printText(U"Scalar result: ", scalarResult[lane], U"\n");
45-
printText(U"SIMD result: ", simdResult[lane], U"\n");
63+
printText(U"Vector result: ", vectorResult[lane], U"\n");
64+
printText(U"\n____________________________________________________________________\n");
4665
failed = true;
4766
return;
4867
}
@@ -69,8 +88,8 @@ void binaryEquivalent (
6988
ALIGN_BYTES(sizeof(V_OUT)) S_IN inputA[laneCount];
7089
ALIGN_BYTES(sizeof(V_OUT)) S_IN inputB[laneCount];
7190
for (intptr_t lane = 0; lane < laneCount; lane++) {
72-
inputA[lane] = (S_IN)random_generate_U64(generator);
73-
inputB[lane] = (S_IN)random_generate_U64(generator);
91+
inputA[lane] = generate<S_IN>(generator);
92+
inputB[lane] = generate<S_IN>(generator);
7493
}
7594
// Execute scalar operation for all lanes.
7695
ALIGN_BYTES(sizeof(V_OUT)) S_OUT scalarResult[laneCount];
@@ -81,16 +100,17 @@ void binaryEquivalent (
81100
V_IN simdInputA = V_IN::readAlignedUnsafe(inputA);
82101
V_IN simdInputB = V_IN::readAlignedUnsafe(inputB);
83102
V_OUT simdOutput = simdOp(simdInputA, simdInputB);
84-
ALIGN_BYTES(sizeof(V_OUT)) S_OUT simdResult[laneCount];
85-
simdOutput.writeAlignedUnsafe(simdResult);
103+
ALIGN_BYTES(sizeof(V_OUT)) S_OUT vectorResult[laneCount];
104+
simdOutput.writeAlignedUnsafe(vectorResult);
86105
// Compare results.
87106
for (intptr_t lane = 0; lane < laneCount; lane++) {
88-
// TODO: Handle tolerance margins for floating-point elements.
89-
if (scalarResult[lane] != simdResult[lane]) {
90-
printText(U"\nWrong result at lane ", lane, U" in 0..", laneCount - 1, U" at iteration ", iteration, U" of ", testName, U"!\n");
107+
if (!somewhatEqual(scalarResult[lane], vectorResult[lane])) {
108+
printText(U"\n_______________________________ FAIL _______________________________\n");
109+
printText(U"\nWrong result at lane ", lane, U" of 0..", laneCount - 1, U" at iteration ", iteration, U" of ", testName, U"!\n");
91110
printText(U"Input: ", inputA[lane], U", ", inputB[lane], U"\n");
92111
printText(U"Scalar result: ", scalarResult[lane], U"\n");
93-
printText(U"SIMD result: ", simdResult[lane], U"\n");
112+
printText(U"Vector result: ", vectorResult[lane], U"\n");
113+
printText(U"\n____________________________________________________________________\n");
94114
failed = true;
95115
return;
96116
}
@@ -129,6 +149,8 @@ START_TEST(BruteSimd)
129149
BINARY_POINT_EQUIVALENCE(uint32_t, U32x8 , a + b);
130150
BINARY_POINT_EQUIVALENCE(int32_t , I32x4 , a + b);
131151
BINARY_POINT_EQUIVALENCE(int32_t , I32x4 , a + b);
152+
BINARY_POINT_EQUIVALENCE(float , F32x4 , a + b);
153+
BINARY_POINT_EQUIVALENCE(float , F32x4 , a + b);
132154

133155
// Subtraction
134156
BINARY_POINT_EQUIVALENCE(uint8_t , U8x16 , a - b);
@@ -139,10 +161,14 @@ START_TEST(BruteSimd)
139161
BINARY_POINT_EQUIVALENCE(uint32_t, U32x8 , a - b);
140162
BINARY_POINT_EQUIVALENCE(int32_t , I32x4 , a - b);
141163
BINARY_POINT_EQUIVALENCE(int32_t , I32x4 , a - b);
164+
BINARY_POINT_EQUIVALENCE(float , F32x4 , a - b);
165+
BINARY_POINT_EQUIVALENCE(float , F32x4 , a - b);
142166

143167
// Negation
144168
UNARY_POINT_EQUIVALENCE(int32_t , I32x4 , -a);
145169
UNARY_POINT_EQUIVALENCE(int32_t , I32x4 , -a);
170+
UNARY_POINT_EQUIVALENCE(float , F32x4 , -a);
171+
UNARY_POINT_EQUIVALENCE(float , F32x4 , -a);
146172

147173
// Multiplication
148174
//BINARY_POINT_EQUIVALENCE(uint8_t , U8x16 , a * b); // Missing
@@ -153,6 +179,8 @@ START_TEST(BruteSimd)
153179
BINARY_POINT_EQUIVALENCE(uint32_t, U32x8 , a * b);
154180
BINARY_POINT_EQUIVALENCE(int32_t , I32x4 , a * b);
155181
BINARY_POINT_EQUIVALENCE(int32_t , I32x4 , a * b);
182+
BINARY_POINT_EQUIVALENCE(float , F32x4 , a * b);
183+
BINARY_POINT_EQUIVALENCE(float , F32x4 , a * b);
156184

157185
// Bitwise and
158186
//BINARY_POINT_EQUIVALENCE(uint8_t , U8x16 , a & b); // Missing

0 commit comments

Comments
 (0)