Skip to content

Commit 4f83ea0

Browse files
authored
Add MSVC based benchmarks of Intel libbid on x64 (#1284)
1 parent fc8e8d7 commit 4f83ea0

File tree

2 files changed

+121
-27
lines changed

2 files changed

+121
-27
lines changed

doc/modules/ROOT/pages/benchmarks.adoc

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ To run the GCC benchmarks you can use the following command: `gcc benchmark_libd
2929
To run the Intel benchmarks you will need both the https://www.intel.com/content/www/us/en/developer/tools/oneapi/overview.html[Intel Compiler], and the https://www.intel.com/content/www/us/en/developer/articles/tool/intel-decimal-floating-point-math-library.html[library].
3030
You can the use the following command: `icx benchmark_libbid.c -O3 $PATH_TO_LIBBID/libbid.a -std=c17` followed by: `./a.out`
3131
You can also use `gcc` instead of `icx`.
32+
On windows the command is similarly: `cl benchmark_libbid.c /O2 /std:c17 ..\PATH_TO_LIBBID\cl000libbid.lib`, followed by: `.\benchmark_libbid.exe`.
3233

3334
NOTE: The Intel benchmarks can only be run on one of their supported architectures: IA-32, IA-64, and Intel x64
3435

@@ -725,6 +726,15 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14.
725726
| `decimal_fast128_t`
726727
| 801,708
727728
| 4.300
729+
| Intel `BID_UINT32`
730+
| 4,372,973
731+
| 23.457
732+
| Intel `BID_UINT64`
733+
| 9,345,300
734+
| 50.129
735+
| Intel `BID_UINT128`
736+
| 11,504,914
737+
| 61.714
728738
|===
729739

730740
=== Addition
@@ -755,6 +765,15 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14.
755765
| `decimal_fast128_t`
756766
| 3,109,101
757767
| 38.914
768+
| Intel `BID_UINT32`
769+
| 4,967,728
770+
| 62.177
771+
| Intel `BID_UINT64`
772+
| 6,268,077
773+
| 78.452
774+
| Intel `BID_UINT128`
775+
| 4,847,330
776+
| 60.670
758777
|===
759778

760779
=== Subtraction
@@ -785,6 +804,15 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14.
785804
| `decimal_fast128_t`
786805
| 2,963,570
787806
| 9.167
807+
| Intel `BID_UINT32`
808+
| 4,603,462
809+
| 14.240
810+
| Intel `BID_UINT64`
811+
| 5,627,305
812+
| 17.407
813+
| Intel `BID_UINT128`
814+
| 5,824,263
815+
| 18.016
788816
|===
789817

790818
=== Multiplication
@@ -815,6 +843,15 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14.
815843
| `decimal_fast128_t`
816844
| 9,236,110
817845
| 117.434
846+
| Intel `BID_UINT32`
847+
| 3,833,363
848+
| 48.740
849+
| Intel `BID_UINT64`
850+
| 11,671,369
851+
| 148.398
852+
| Intel `BID_UINT128`
853+
| 62,036,577
854+
| 788.778
818855
|===
819856

820857
=== Division
@@ -845,6 +882,15 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14.
845882
| `decimal_fast128_t`
846883
| 11,587,763
847884
| 129,737
885+
| Intel `BID_UINT32`
886+
| 5,037,576
887+
| 46.401
888+
| Intel `BID_UINT64`
889+
| 8,768,259
890+
| 98.170
891+
| Intel `BID_UINT128`
892+
| 38,519,644
893+
| 431.269
848894
|===
849895

850896
=== `from_chars`

test/benchmark_libbid.c

Lines changed: 75 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,25 +2,71 @@
22
// Distributed under the Boost Software License, Version 1.0.
33
// https://www.boost.org/LICENSE_1_0.txt
44

5-
#define _POSIX_C_SOURCE 199309L
5+
#ifdef _WIN32
6+
# define WIN32_LEAN_AND_MEAN
7+
# include <windows.h>
8+
#else
9+
# define _POSIX_C_SOURCE 199309L
10+
#endif
611

712
#include <stdio.h>
813
#include <stdlib.h>
914
#include <stdint.h>
10-
#include <time.h>
1115
#include <inttypes.h>
1216
#include <float.h>
1317
#include <fenv.h>
1418

19+
#include "..\LIBRARY\src\bid_conf.h"
20+
#include "..\LIBRARY\src\bid_functions.h"
21+
1522
typedef BID_UINT32 Decimal32;
1623
typedef BID_UINT64 Decimal64;
17-
#include "../LIBRARY/src/bid_conf.h"
18-
#include "../LIBRARY/src/bid_functions.h"
1924
typedef BID_UINT128 Decimal128;
2025

2126
#define K 20000000
2227
#define N 5
2328

29+
#ifdef _MSC_VER
30+
# define BOOST_DECIMAL_NOINLINE __declspec(noinline)
31+
#else
32+
# define BOOST_DECIMAL_NOINLINE __attribute__ ((noinline))
33+
#endif
34+
35+
#ifdef _WIN32
36+
#include <windows.h>
37+
38+
#define CLOCK_MONOTONIC 1
39+
40+
struct timespec
41+
{
42+
long tv_sec;
43+
long tv_nsec;
44+
};
45+
46+
int clock_gettime(int clock_id, struct timespec* tp)
47+
{
48+
(void)clock_id; // Ignore clock_id, always use QPC
49+
50+
static LARGE_INTEGER frequency = { 0 };
51+
LARGE_INTEGER counter;
52+
53+
if (frequency.QuadPart == 0)
54+
{
55+
QueryPerformanceFrequency(&frequency);
56+
}
57+
58+
QueryPerformanceCounter(&counter);
59+
60+
tp->tv_sec = (long)(counter.QuadPart / frequency.QuadPart);
61+
tp->tv_nsec = (long)(((counter.QuadPart % frequency.QuadPart) * 1000000000LL) / frequency.QuadPart);
62+
63+
return 0;
64+
}
65+
66+
#else
67+
#include <time.h>
68+
#endif
69+
2470
uint32_t flag = 0;
2571

2672
uint32_t random_uint32(void)
@@ -45,15 +91,15 @@ uint64_t random_uint64(void)
4591
return r;
4692
}
4793

48-
__attribute__ ((noinline)) void generate_vector_32(Decimal32* buffer, size_t buffer_len)
94+
BOOST_DECIMAL_NOINLINE void generate_vector_32(Decimal32* buffer, size_t buffer_len)
4995
{
5096
for (size_t i = 0; i < buffer_len; ++i)
5197
{
5298
buffer[i] = bid32_from_uint32(random_uint32(), BID_ROUNDING_TO_NEAREST, &flag);
5399
}
54100
}
55101

56-
__attribute__ ((noinline)) void test_comparisons_32(Decimal32* data, const char* label)
102+
BOOST_DECIMAL_NOINLINE void test_comparisons_32(Decimal32* data, const char* label)
57103
{
58104
struct timespec t1, t2;
59105
clock_gettime(CLOCK_MONOTONIC, &t1);
@@ -82,15 +128,15 @@ __attribute__ ((noinline)) void test_comparisons_32(Decimal32* data, const char*
82128
printf("Comparisons <%-10s >: %-10" PRIu64 " us (s=%zu)\n", label, elapsed_time_us, s);
83129
}
84130

85-
__attribute__ ((noinline)) void generate_vector_64(Decimal64* buffer, size_t buffer_len)
131+
BOOST_DECIMAL_NOINLINE void generate_vector_64(Decimal64* buffer, size_t buffer_len)
86132
{
87133
for (size_t i = 0; i < buffer_len; ++i)
88134
{
89135
buffer[i] = bid64_from_uint64(random_uint64(), BID_ROUNDING_TO_NEAREST, &flag);
90136
}
91137
}
92138

93-
__attribute__ ((noinline)) void test_comparisons_64(Decimal64* data, const char* label)
139+
BOOST_DECIMAL_NOINLINE void test_comparisons_64(Decimal64* data, const char* label)
94140
{
95141
struct timespec t1, t2;
96142
clock_gettime(CLOCK_MONOTONIC, &t1);
@@ -153,12 +199,12 @@ Decimal128 random_decimal128(void)
153199

154200
// 5. Parse to decimal128
155201
_IDEC_flags flags = 0;
156-
Decimal128 result = bid128_from_string(str, &flags);
202+
Decimal128 result = bid128_from_string(str, BID_ROUNDING_TO_NEAREST, &flags);
157203

158204
return result;
159205
}
160206

161-
__attribute__ ((__noinline__)) void generate_vector_128(Decimal128* buffer, size_t buffer_len)
207+
BOOST_DECIMAL_NOINLINE void generate_vector_128(Decimal128* buffer, size_t buffer_len)
162208
{
163209
size_t i = 0;
164210
while (i < buffer_len)
@@ -168,7 +214,7 @@ __attribute__ ((__noinline__)) void generate_vector_128(Decimal128* buffer, size
168214
}
169215
}
170216

171-
__attribute__ ((__noinline__)) void test_comparisons_128(Decimal128* data, const char* label)
217+
BOOST_DECIMAL_NOINLINE void test_comparisons_128(Decimal128* data, const char* label)
172218
{
173219
struct timespec t1, t2;
174220
clock_gettime(CLOCK_MONOTONIC, &t1);
@@ -200,26 +246,26 @@ __attribute__ ((__noinline__)) void test_comparisons_128(Decimal128* data, const
200246

201247
typedef Decimal32 (*operation_32)(Decimal32, Decimal32);
202248

203-
__attribute__ ((noinline)) Decimal32 add_32(Decimal32 a, Decimal32 b)
249+
BOOST_DECIMAL_NOINLINE Decimal32 add_32(Decimal32 a, Decimal32 b)
204250
{
205251
return bid32_add(a, b, BID_ROUNDING_TO_NEAREST, &flag);
206252
}
207-
__attribute__ ((noinline)) Decimal32 sub_32(Decimal32 a, Decimal32 b)
253+
BOOST_DECIMAL_NOINLINE Decimal32 sub_32(Decimal32 a, Decimal32 b)
208254
{
209255
return bid32_sub(a, b, BID_ROUNDING_TO_NEAREST, &flag);
210256
}
211257

212-
__attribute__ ((noinline)) Decimal32 mul_32(Decimal32 a, Decimal32 b)
258+
BOOST_DECIMAL_NOINLINE Decimal32 mul_32(Decimal32 a, Decimal32 b)
213259
{
214260
return bid32_mul(a, b, BID_ROUNDING_TO_NEAREST, &flag);
215261
}
216262

217-
__attribute__ ((noinline)) Decimal32 div_32(Decimal32 a, Decimal32 b)
263+
BOOST_DECIMAL_NOINLINE Decimal32 div_32(Decimal32 a, Decimal32 b)
218264
{
219265
return bid32_div(a, b, BID_ROUNDING_TO_NEAREST, &flag);
220266
}
221267

222-
__attribute__ ((noinline)) void test_two_element_operation_32(Decimal32* data, operation_32 op, const char* label, const char* op_label)
268+
BOOST_DECIMAL_NOINLINE void test_two_element_operation_32(Decimal32* data, operation_32 op, const char* label, const char* op_label)
223269
{
224270
struct timespec t1, t2;
225271
clock_gettime(CLOCK_MONOTONIC, &t1);
@@ -245,27 +291,27 @@ __attribute__ ((noinline)) void test_two_element_operation_32(Decimal32* data, o
245291

246292
typedef Decimal64 (*operation_64)(Decimal64, Decimal64);
247293

248-
__attribute__ ((noinline)) Decimal64 add_64(Decimal64 a, Decimal64 b)
294+
BOOST_DECIMAL_NOINLINE Decimal64 add_64(Decimal64 a, Decimal64 b)
249295
{
250296
return bid64_add(a, b, BID_ROUNDING_TO_NEAREST, &flag);
251297
}
252298

253-
__attribute__ ((noinline)) Decimal64 sub_64(Decimal64 a, Decimal64 b)
299+
BOOST_DECIMAL_NOINLINE Decimal64 sub_64(Decimal64 a, Decimal64 b)
254300
{
255301
return bid64_sub(a, b, BID_ROUNDING_TO_NEAREST, &flag);
256302
}
257303

258-
__attribute__ ((noinline)) Decimal64 mul_64(Decimal64 a, Decimal64 b)
304+
BOOST_DECIMAL_NOINLINE Decimal64 mul_64(Decimal64 a, Decimal64 b)
259305
{
260306
return bid64_mul(a, b, BID_ROUNDING_TO_NEAREST, &flag);
261307
}
262308

263-
__attribute__ ((noinline)) Decimal64 div_64(Decimal64 a, Decimal64 b)
309+
BOOST_DECIMAL_NOINLINE Decimal64 div_64(Decimal64 a, Decimal64 b)
264310
{
265311
return bid64_div(a, b, BID_ROUNDING_TO_NEAREST, &flag);
266312
}
267313

268-
__attribute__ ((noinline)) void test_two_element_operation_64(Decimal64* data, operation_64 op, const char* label, const char* op_label)
314+
BOOST_DECIMAL_NOINLINE void test_two_element_operation_64(Decimal64* data, operation_64 op, const char* label, const char* op_label)
269315
{
270316
struct timespec t1, t2;
271317
clock_gettime(CLOCK_MONOTONIC, &t1);
@@ -292,27 +338,27 @@ __attribute__ ((noinline)) void test_two_element_operation_64(Decimal64* data, o
292338

293339
typedef Decimal128 (*operation_128)(Decimal128, Decimal128);
294340

295-
__attribute__ ((__noinline__)) Decimal128 add_128(Decimal128 a, Decimal128 b)
341+
BOOST_DECIMAL_NOINLINE Decimal128 add_128(Decimal128 a, Decimal128 b)
296342
{
297343
return bid128_add(a, b, BID_ROUNDING_TO_NEAREST, &flag);
298344
}
299345

300-
__attribute__ ((__noinline__)) Decimal128 sub_128(Decimal128 a, Decimal128 b)
346+
BOOST_DECIMAL_NOINLINE Decimal128 sub_128(Decimal128 a, Decimal128 b)
301347
{
302348
return bid128_sub(a, b, BID_ROUNDING_TO_NEAREST, &flag);
303349
}
304350

305-
__attribute__ ((__noinline__)) Decimal128 mul_128(Decimal128 a, Decimal128 b)
351+
BOOST_DECIMAL_NOINLINE Decimal128 mul_128(Decimal128 a, Decimal128 b)
306352
{
307353
return bid128_mul(a, b, BID_ROUNDING_TO_NEAREST, &flag);
308354
}
309355

310-
__attribute__ ((__noinline__)) Decimal128 div_128(Decimal128 a, Decimal128 b)
356+
BOOST_DECIMAL_NOINLINE Decimal128 div_128(Decimal128 a, Decimal128 b)
311357
{
312358
return bid128_div(a, b, BID_ROUNDING_TO_NEAREST, &flag);
313359
}
314360

315-
__attribute__ ((__noinline__)) void test_two_element_operation_128(Decimal128* data, operation_128 op, const char* label, const char* op_label)
361+
BOOST_DECIMAL_NOINLINE void test_two_element_operation_128(Decimal128* data, operation_128 op, const char* label, const char* op_label)
316362
{
317363
struct timespec t1, t2;
318364
clock_gettime(CLOCK_MONOTONIC, &t1);
@@ -342,8 +388,10 @@ int main()
342388
// One time init of random number generator
343389
srand(time(NULL));
344390

391+
#ifndef _WIN32
345392
fedisableexcept(FE_ALL_EXCEPT);
346-
393+
#endif
394+
347395
Decimal32* d32_array = malloc(K * sizeof(Decimal32));
348396
Decimal64* d64_array = malloc(K * sizeof(Decimal64));
349397
Decimal128* d128_array = malloc(K * sizeof(Decimal128));

0 commit comments

Comments
 (0)