Skip to content

Commit 32b369e

Browse files
authored
Merge pull request #564 from cppalliance/gcc_bench
Add GCC/libdfp benchmarks
2 parents 685b337 + f0cd135 commit 32b369e

File tree

2 files changed

+332
-0
lines changed

2 files changed

+332
-0
lines changed

test/benchmark_libdfp.c

Lines changed: 326 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,326 @@
1+
// Copyright 2024 Matt Borland
2+
// Distributed under the Boost Software License, Version 1.0.
3+
// https://www.boost.org/LICENSE_1_0.txt
4+
5+
#include <stdio.h>
6+
#include <stdlib.h>
7+
#include <stdint.h>
8+
#include <time.h>
9+
#include <inttypes.h>
10+
11+
#define K 2000000
12+
#define N 5
13+
14+
double float_rand(double min, double max)
15+
{
16+
float scale = rand() / (double) RAND_MAX;
17+
return min + scale * (max - min);
18+
}
19+
20+
__attribute__ ((__noinline__)) void generate_vector_32(_Decimal32* buffer, size_t buffer_len)
21+
{
22+
size_t i = 0;
23+
while (i < buffer_len)
24+
{
25+
buffer[i] = float_rand(0.0, 1.0);
26+
++i;
27+
}
28+
}
29+
30+
__attribute__ ((__noinline__)) void test_comparisons_32(_Decimal32* data, const char* label)
31+
{
32+
struct timespec t1, t2;
33+
clock_gettime(CLOCK_MONOTONIC, &t1);
34+
35+
size_t s = 0;
36+
37+
for (size_t n = 0; n < N; ++n)
38+
{
39+
for (size_t k = 0; k < K - 1; ++k)
40+
{
41+
_Decimal32 val1 = data[k];
42+
_Decimal32 val2 = data[k + 1];
43+
44+
s += (size_t)(val1 > val2);
45+
s += (size_t)(val1 >= val2);
46+
s += (size_t)(val1 < val2);
47+
s += (size_t)(val1 <= val2);
48+
s += (size_t)(val1 == val2);
49+
s += (size_t)(val1 != val2);
50+
}
51+
}
52+
53+
clock_gettime(CLOCK_MONOTONIC, &t2);
54+
55+
uint64_t elapsed_time_us = (uint64_t)((t2.tv_sec - t1.tv_sec) * 1000000 + (t2.tv_nsec - t1.tv_nsec) / 1000);
56+
printf("Comparisons <%-10s >: %-10" PRIu64 " us (s=%zu)\n", label, elapsed_time_us, s);
57+
}
58+
59+
__attribute__ ((__noinline__)) void generate_vector_64(_Decimal64* buffer, size_t buffer_len)
60+
{
61+
size_t i = 0;
62+
while (i < buffer_len)
63+
{
64+
buffer[i] = float_rand(0.0, 1.0);
65+
++i;
66+
}
67+
}
68+
69+
__attribute__ ((__noinline__)) void test_comparisons_64(_Decimal64* data, const char* label)
70+
{
71+
struct timespec t1, t2;
72+
clock_gettime(CLOCK_MONOTONIC, &t1);
73+
74+
size_t s = 0;
75+
76+
for (size_t n = 0; n < N; ++n)
77+
{
78+
for (size_t k = 0; k < K - 1; ++k)
79+
{
80+
_Decimal64 val1 = data[k];
81+
_Decimal64 val2 = data[k + 1];
82+
83+
s += (size_t)(val1 > val2);
84+
s += (size_t)(val1 >= val2);
85+
s += (size_t)(val1 < val2);
86+
s += (size_t)(val1 <= val2);
87+
s += (size_t)(val1 == val2);
88+
s += (size_t)(val1 != val2);
89+
}
90+
}
91+
92+
clock_gettime(CLOCK_MONOTONIC, &t2);
93+
94+
uint64_t elapsed_time_us = (uint64_t)((t2.tv_sec - t1.tv_sec) * 1000000 + (t2.tv_nsec - t1.tv_nsec) / 1000);
95+
printf("Comparisons <%-10s >: %-10" PRIu64 " us (s=%zu)\n", label, elapsed_time_us, s);
96+
}
97+
98+
__attribute__ ((__noinline__)) void generate_vector_128(_Decimal128* buffer, size_t buffer_len)
99+
{
100+
size_t i = 0;
101+
while (i < buffer_len)
102+
{
103+
buffer[i] = float_rand(0.0, 1.0);
104+
++i;
105+
}
106+
}
107+
108+
__attribute__ ((__noinline__)) void test_comparisons_128(_Decimal128* data, const char* label)
109+
{
110+
struct timespec t1, t2;
111+
clock_gettime(CLOCK_MONOTONIC, &t1);
112+
113+
size_t s = 0;
114+
115+
for (size_t n = 0; n < N; ++n)
116+
{
117+
for (size_t k = 0; k < K - 1; ++k)
118+
{
119+
_Decimal128 val1 = data[k];
120+
_Decimal128 val2 = data[k + 1];
121+
122+
s += (size_t)(val1 > val2);
123+
s += (size_t)(val1 >= val2);
124+
s += (size_t)(val1 < val2);
125+
s += (size_t)(val1 <= val2);
126+
s += (size_t)(val1 == val2);
127+
s += (size_t)(val1 != val2);
128+
}
129+
}
130+
131+
clock_gettime(CLOCK_MONOTONIC, &t2);
132+
133+
uint64_t elapsed_time_us = (uint64_t)((t2.tv_sec - t1.tv_sec) * 1000000 + (t2.tv_nsec - t1.tv_nsec) / 1000);
134+
printf("Comparisons <%-10s>: %-10" PRIu64 " us (s=%zu)\n", label, elapsed_time_us, s);
135+
}
136+
137+
typedef _Decimal32 (*operation_32)(_Decimal32, _Decimal32);
138+
139+
_Decimal32 add_32(_Decimal32 a, _Decimal32 b)
140+
{
141+
return a + b;
142+
}
143+
_Decimal32 sub_32(_Decimal32 a, _Decimal32 b)
144+
{
145+
return a - b;
146+
}
147+
148+
_Decimal32 mul_32(_Decimal32 a, _Decimal32 b)
149+
{
150+
return a * b;
151+
}
152+
153+
_Decimal32 div_32(_Decimal32 a, _Decimal32 b)
154+
{
155+
return a / b;
156+
}
157+
158+
__attribute__ ((__noinline__)) void test_two_element_operation_32(_Decimal32* data, operation_32 op, const char* label, const char* op_label)
159+
{
160+
struct timespec t1, t2;
161+
clock_gettime(CLOCK_MONOTONIC, &t1);
162+
163+
size_t s = 0;
164+
165+
for (size_t n = 0; n < N; ++n)
166+
{
167+
for (size_t k = 0; k < K - 1; ++k)
168+
{
169+
_Decimal32 val1 = data[k];
170+
_Decimal32 val2 = data[k + 1];
171+
172+
s += (size_t)op(val1, val2);
173+
}
174+
}
175+
176+
clock_gettime(CLOCK_MONOTONIC, &t2);
177+
178+
uint64_t elapsed_time_us = (uint64_t)((t2.tv_sec - t1.tv_sec) * 1000000 + (t2.tv_nsec - t1.tv_nsec) / 1000);
179+
printf("%-15s<%-10s >: %-10" PRIu64 " us (s=%zu)\n", op_label, label, elapsed_time_us, s);
180+
}
181+
182+
typedef _Decimal64 (*operation_64)(_Decimal64, _Decimal64);
183+
184+
_Decimal64 add_64(_Decimal64 a, _Decimal64 b)
185+
{
186+
return a + b;
187+
}
188+
_Decimal64 sub_64(_Decimal64 a, _Decimal64 b)
189+
{
190+
return a - b;
191+
}
192+
193+
_Decimal64 mul_64(_Decimal64 a, _Decimal64 b)
194+
{
195+
return a * b;
196+
}
197+
198+
_Decimal64 div_64(_Decimal64 a, _Decimal64 b)
199+
{
200+
return a / b;
201+
}
202+
203+
__attribute__ ((__noinline__)) void test_two_element_operation_64(_Decimal64* data, operation_64 op, const char* label, const char* op_label)
204+
{
205+
struct timespec t1, t2;
206+
clock_gettime(CLOCK_MONOTONIC, &t1);
207+
208+
size_t s = 0;
209+
210+
for (size_t n = 0; n < N; ++n)
211+
{
212+
for (size_t k = 0; k < K - 1; ++k)
213+
{
214+
_Decimal64 val1 = data[k];
215+
_Decimal64 val2 = data[k + 1];
216+
217+
s += (size_t)op(val1, val2);
218+
}
219+
}
220+
221+
clock_gettime(CLOCK_MONOTONIC, &t2);
222+
223+
uint64_t elapsed_time_us = (uint64_t)((t2.tv_sec - t1.tv_sec) * 1000000 + (t2.tv_nsec - t1.tv_nsec) / 1000);
224+
printf("%-15s<%-10s >: %-10" PRIu64 " us (s=%zu)\n", op_label, label, elapsed_time_us, s);
225+
}
226+
227+
typedef _Decimal128 (*operation_128)(_Decimal128, _Decimal128);
228+
229+
_Decimal128 add_128(_Decimal128 a, _Decimal128 b)
230+
{
231+
return a + b;
232+
}
233+
_Decimal128 sub_128(_Decimal128 a, _Decimal128 b)
234+
{
235+
return a - b;
236+
}
237+
238+
_Decimal128 mul_128(_Decimal128 a, _Decimal128 b)
239+
{
240+
return a * b;
241+
}
242+
243+
_Decimal128 div_128(_Decimal128 a, _Decimal128 b)
244+
{
245+
return a / b;
246+
}
247+
248+
__attribute__ ((__noinline__)) void test_two_element_operation_128(_Decimal128* data, operation_128 op, const char* label, const char* op_label)
249+
{
250+
struct timespec t1, t2;
251+
clock_gettime(CLOCK_MONOTONIC, &t1);
252+
253+
size_t s = 0;
254+
255+
for (size_t n = 0; n < N; ++n)
256+
{
257+
for (size_t k = 0; k < K - 1; ++k)
258+
{
259+
_Decimal128 val1 = data[k];
260+
_Decimal128 val2 = data[k + 1];
261+
262+
s += (size_t)op(val1, val2);
263+
}
264+
}
265+
266+
clock_gettime(CLOCK_MONOTONIC, &t2);
267+
268+
uint64_t elapsed_time_us = (uint64_t)((t2.tv_sec - t1.tv_sec) * 1000000 + (t2.tv_nsec - t1.tv_nsec) / 1000);
269+
printf("%-15s<%-10s>: %-10" PRIu64 " us (s=%zu)\n", op_label, label, elapsed_time_us, s);
270+
}
271+
272+
int main()
273+
{
274+
// One time init of random number generator
275+
srand(time(NULL));
276+
277+
_Decimal32* d32_array = malloc(K * sizeof(_Decimal32));
278+
_Decimal64* d64_array = malloc(K * sizeof(_Decimal64));
279+
_Decimal128* d128_array = malloc(K * sizeof(_Decimal128));
280+
281+
if (d32_array == NULL || d64_array == NULL || d128_array == NULL)
282+
{
283+
return 1;
284+
}
285+
286+
printf("===== Comparisons =====\n");
287+
288+
generate_vector_32(d32_array, K);
289+
test_comparisons_32(d32_array, "_Decimal32");
290+
291+
generate_vector_64(d64_array, K);
292+
test_comparisons_64(d64_array, "_Decimal64");
293+
294+
generate_vector_128(d128_array, K);
295+
test_comparisons_128(d128_array, "_Decimal128");
296+
297+
printf("\n===== Addition =====\n");
298+
299+
test_two_element_operation_32(d32_array, add_32, "_Decimal32", "Addition");
300+
test_two_element_operation_64(d64_array, add_64, "_Decimal64", "Addition");
301+
test_two_element_operation_128(d128_array, add_128, "_Decimal128", "Addition");
302+
303+
printf("\n===== Subtraction =====\n");
304+
305+
test_two_element_operation_32(d32_array, sub_32, "_Decimal32", "Subtraction");
306+
test_two_element_operation_64(d64_array, sub_64, "_Decimal64", "Subtraction");
307+
test_two_element_operation_128(d128_array, sub_128, "_Decimal128", "Subtraction");
308+
309+
printf("\n===== Multiplication =====\n");
310+
311+
test_two_element_operation_32(d32_array, mul_32, "_Decimal32", "Multiplication");
312+
test_two_element_operation_64(d64_array, mul_64, "_Decimal64", "Multiplication");
313+
test_two_element_operation_128(d128_array, mul_128, "_Decimal128", "Multiplication");
314+
315+
printf("\n===== Division =====\n");
316+
317+
test_two_element_operation_32(d32_array, div_32, "_Decimal32", "Division");
318+
test_two_element_operation_64(d64_array, div_64, "_Decimal64", "Division");
319+
test_two_element_operation_128(d128_array, div_128, "_Decimal128", "Division");
320+
321+
free(d32_array);
322+
free(d64_array);
323+
free(d128_array);
324+
325+
return 0;
326+
}

test/benchmarks.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@ using namespace std::chrono_literals;
2727
# define BOOST_DECIMAL_NO_INLINE __attribute__ ((__noinline__))
2828
#elif defined(_MSC_VER)
2929
# define BOOST_DECIMAL_NO_INLINE __declspec(noinline)
30+
#elif defined(__GNUC__)
31+
# pragma GCC diagnostic push
32+
# pragma GCC diagnostic ignored "-Wfloat-equal"
33+
# pragma GCC diagnostic ignored "-Wold-style-cast"
34+
# pragma GCC diagnostic ignored "-Wstringop-overread"
35+
# define BOOST_DECIMAL_NO_INLINE __attribute__ ((__noinline__))
3036
#endif
3137

3238
constexpr unsigned N = 2'000'000U;

0 commit comments

Comments
 (0)