66
77static const intptr_t ITERATIONS = 1000000 ;
88
9+ template <typename T>
10+ T generate (RandomGenerator &generator) {
11+ return (T)random_generate_U64 (generator);
12+ }
13+ template <>
14+ float generate<float >(RandomGenerator &generator) {
15+ // Too big floats will fail from not having enough precision, so this random generator is limited within -1000.0f to 1000.0f.
16+ int32_t fractions = random_generate_range (generator, -1000000 , 1000000 );
17+ return float (fractions) * 0 .001f ;
18+ }
19+
20+ template <typename T>
21+ bool somewhatEqual (const T &a, const T &b) {
22+ double da = double (a);
23+ double db = double (b);
24+ return (da < db + 0.0001 ) && (da > db - 0.0001 );
25+ }
26+
927template <typename S_IN, typename S_OUT, typename V_IN, typename V_OUT>
1028void unaryEquivalent (
1129 const TemporaryCallback<S_OUT(const S_IN &a)> &scalarOp,
@@ -23,7 +41,7 @@ void unaryEquivalent (
2341 // Generate random input.
2442 ALIGN_BYTES (sizeof (V_IN)) S_IN inputA[laneCount];
2543 for (intptr_t lane = 0 ; lane < laneCount; lane++) {
26- inputA[lane] = ( S_IN) random_generate_U64 (generator);
44+ inputA[lane] = generate< S_IN> (generator);
2745 }
2846 // Execute scalar operation for all lanes.
2947 ALIGN_BYTES (sizeof (V_OUT)) S_OUT scalarResult[laneCount];
@@ -33,16 +51,17 @@ void unaryEquivalent (
3351 // Execute SIMD operation with all lanes at the same time.
3452 V_IN simdInputA = V_IN::readAlignedUnsafe (inputA);
3553 V_OUT simdOutput = simdOp (simdInputA);
36- ALIGN_BYTES (sizeof (V_OUT)) S_OUT simdResult [laneCount];
37- simdOutput.writeAlignedUnsafe (simdResult );
54+ ALIGN_BYTES (sizeof (V_OUT)) S_OUT vectorResult [laneCount];
55+ simdOutput.writeAlignedUnsafe (vectorResult );
3856 // Compare results.
3957 for (intptr_t lane = 0 ; lane < laneCount; lane++) {
40- // TODO: Handle tolerance margins for floating-point elements.
41- if (scalarResult[lane] != simdResult[lane]) {
42- printText (U" \n Wrong result at lane " , lane, U" in 0.." , laneCount - 1 , U" at iteration " , iteration, U" of " , testName, U" !\n " );
58+ if (! somewhatEqual (scalarResult[lane], vectorResult[lane])) {
59+ printText ( U" \n _______________________________ FAIL _______________________________ \n " );
60+ printText (U" Wrong result at lane " , lane, U" of 0.." , laneCount - 1 , U" at iteration " , iteration, U" of " , testName, U" !\n " );
4361 printText (U" Input: " , inputA[lane], U" \n " );
4462 printText (U" Scalar result: " , scalarResult[lane], U" \n " );
45- printText (U" SIMD result: " , simdResult[lane], U" \n " );
63+ printText (U" Vector result: " , vectorResult[lane], U" \n " );
64+ printText (U" \n ____________________________________________________________________\n " );
4665 failed = true ;
4766 return ;
4867 }
@@ -69,8 +88,8 @@ void binaryEquivalent (
6988 ALIGN_BYTES (sizeof (V_OUT)) S_IN inputA[laneCount];
7089 ALIGN_BYTES (sizeof (V_OUT)) S_IN inputB[laneCount];
7190 for (intptr_t lane = 0 ; lane < laneCount; lane++) {
72- inputA[lane] = ( S_IN) random_generate_U64 (generator);
73- inputB[lane] = ( S_IN) random_generate_U64 (generator);
91+ inputA[lane] = generate< S_IN> (generator);
92+ inputB[lane] = generate< S_IN> (generator);
7493 }
7594 // Execute scalar operation for all lanes.
7695 ALIGN_BYTES (sizeof (V_OUT)) S_OUT scalarResult[laneCount];
@@ -81,16 +100,17 @@ void binaryEquivalent (
81100 V_IN simdInputA = V_IN::readAlignedUnsafe (inputA);
82101 V_IN simdInputB = V_IN::readAlignedUnsafe (inputB);
83102 V_OUT simdOutput = simdOp (simdInputA, simdInputB);
84- ALIGN_BYTES (sizeof (V_OUT)) S_OUT simdResult [laneCount];
85- simdOutput.writeAlignedUnsafe (simdResult );
103+ ALIGN_BYTES (sizeof (V_OUT)) S_OUT vectorResult [laneCount];
104+ simdOutput.writeAlignedUnsafe (vectorResult );
86105 // Compare results.
87106 for (intptr_t lane = 0 ; lane < laneCount; lane++) {
88- // TODO: Handle tolerance margins for floating-point elements.
89- if (scalarResult[lane] != simdResult[lane]) {
90- printText (U" \n Wrong result at lane " , lane, U" in 0.." , laneCount - 1 , U" at iteration " , iteration, U" of " , testName, U" !\n " );
107+ if (! somewhatEqual (scalarResult[lane], vectorResult[lane])) {
108+ printText ( U" \n _______________________________ FAIL _______________________________ \n " );
109+ printText (U" \n Wrong result at lane " , lane, U" of 0.." , laneCount - 1 , U" at iteration " , iteration, U" of " , testName, U" !\n " );
91110 printText (U" Input: " , inputA[lane], U" , " , inputB[lane], U" \n " );
92111 printText (U" Scalar result: " , scalarResult[lane], U" \n " );
93- printText (U" SIMD result: " , simdResult[lane], U" \n " );
112+ printText (U" Vector result: " , vectorResult[lane], U" \n " );
113+ printText (U" \n ____________________________________________________________________\n " );
94114 failed = true ;
95115 return ;
96116 }
@@ -129,6 +149,8 @@ START_TEST(BruteSimd)
129149 BINARY_POINT_EQUIVALENCE (uint32_t , U32x8 , a + b);
130150 BINARY_POINT_EQUIVALENCE (int32_t , I32x4 , a + b);
131151 BINARY_POINT_EQUIVALENCE (int32_t , I32x4 , a + b);
152+ BINARY_POINT_EQUIVALENCE (float , F32x4 , a + b);
153+ BINARY_POINT_EQUIVALENCE (float , F32x4 , a + b);
132154
133155 // Subtraction
134156 BINARY_POINT_EQUIVALENCE (uint8_t , U8x16 , a - b);
@@ -139,10 +161,14 @@ START_TEST(BruteSimd)
139161 BINARY_POINT_EQUIVALENCE (uint32_t , U32x8 , a - b);
140162 BINARY_POINT_EQUIVALENCE (int32_t , I32x4 , a - b);
141163 BINARY_POINT_EQUIVALENCE (int32_t , I32x4 , a - b);
164+ BINARY_POINT_EQUIVALENCE (float , F32x4 , a - b);
165+ BINARY_POINT_EQUIVALENCE (float , F32x4 , a - b);
142166
143167 // Negation
144168 UNARY_POINT_EQUIVALENCE (int32_t , I32x4 , -a);
145169 UNARY_POINT_EQUIVALENCE (int32_t , I32x4 , -a);
170+ UNARY_POINT_EQUIVALENCE (float , F32x4 , -a);
171+ UNARY_POINT_EQUIVALENCE (float , F32x4 , -a);
146172
147173 // Multiplication
148174 // BINARY_POINT_EQUIVALENCE(uint8_t , U8x16 , a * b); // Missing
@@ -153,6 +179,8 @@ START_TEST(BruteSimd)
153179 BINARY_POINT_EQUIVALENCE (uint32_t , U32x8 , a * b);
154180 BINARY_POINT_EQUIVALENCE (int32_t , I32x4 , a * b);
155181 BINARY_POINT_EQUIVALENCE (int32_t , I32x4 , a * b);
182+ BINARY_POINT_EQUIVALENCE (float , F32x4 , a * b);
183+ BINARY_POINT_EQUIVALENCE (float , F32x4 , a * b);
156184
157185 // Bitwise and
158186 // BINARY_POINT_EQUIVALENCE(uint8_t , U8x16 , a & b); // Missing
0 commit comments