Add MSVC based benchmarks of Intel libbid on x64 (#1284)

mborland · web-flow · commit 4f83ea00a5db · 2025-12-16T23:18:40.000+03:00
diff --git a/doc/modules/ROOT/pages/benchmarks.adoc b/doc/modules/ROOT/pages/benchmarks.adoc
@@ -29,6 +29,7 @@ To run the GCC benchmarks you can use the following command: `gcc benchmark_libd
 To run the Intel benchmarks you will need both the https://www.intel.com/content/www/us/en/developer/tools/oneapi/overview.html[Intel Compiler], and the https://www.intel.com/content/www/us/en/developer/articles/tool/intel-decimal-floating-point-math-library.html[library].
 You can the use the following command: `icx benchmark_libbid.c -O3 $PATH_TO_LIBBID/libbid.a -std=c17` followed by: `./a.out`
 You can also use `gcc` instead of `icx`.
+On windows the command is similarly: `cl benchmark_libbid.c /O2 /std:c17 ..\PATH_TO_LIBBID\cl000libbid.lib`, followed by: `.\benchmark_libbid.exe`.
 
 NOTE: The Intel benchmarks can only be run on one of their supported architectures: IA-32, IA-64, and Intel x64
 
@@ -725,6 +726,15 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14.
 | `decimal_fast128_t`
 | 801,708
 | 4.300
+| Intel `BID_UINT32`
+| 4,372,973
+| 23.457
+| Intel `BID_UINT64`
+| 9,345,300
+| 50.129
+| Intel `BID_UINT128`
+| 11,504,914
+| 61.714
 |===
 
 === Addition
@@ -755,6 +765,15 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14.
 | `decimal_fast128_t`
 | 3,109,101
 | 38.914
+| Intel `BID_UINT32`
+| 4,967,728
+| 62.177
+| Intel `BID_UINT64`
+| 6,268,077
+| 78.452
+| Intel `BID_UINT128`
+| 4,847,330
+| 60.670
 |===
 
 === Subtraction
@@ -785,6 +804,15 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14.
 | `decimal_fast128_t`
 | 2,963,570
 | 9.167
+| Intel `BID_UINT32`
+| 4,603,462
+| 14.240
+| Intel `BID_UINT64`
+| 5,627,305
+| 17.407
+| Intel `BID_UINT128`
+| 5,824,263
+| 18.016
 |===
 
 === Multiplication
@@ -815,6 +843,15 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14.
 | `decimal_fast128_t`
 | 9,236,110
 | 117.434
+| Intel `BID_UINT32`
+| 3,833,363
+| 48.740
+| Intel `BID_UINT64`
+| 11,671,369
+| 148.398
+| Intel `BID_UINT128`
+| 62,036,577
+| 788.778
 |===
 
 === Division
@@ -845,6 +882,15 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14.
 | `decimal_fast128_t`
 | 11,587,763
 | 129,737
+| Intel `BID_UINT32`
+| 5,037,576
+| 46.401
+| Intel `BID_UINT64`
+| 8,768,259
+| 98.170
+| Intel `BID_UINT128`
+| 38,519,644
+| 431.269
 |===
 
 === `from_chars`
diff --git a/test/benchmark_libbid.c b/test/benchmark_libbid.c
@@ -2,25 +2,71 @@
 // Distributed under the Boost Software License, Version 1.0.
 // https://www.boost.org/LICENSE_1_0.txt
 
-#define _POSIX_C_SOURCE 199309L
+#ifdef _WIN32
+#  define WIN32_LEAN_AND_MEAN
+#  include <windows.h>
+#else
+#  define _POSIX_C_SOURCE 199309L
+#endif
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
-#include <time.h>
 #include <inttypes.h>
 #include <float.h>
 #include <fenv.h>
 
+#include "..\LIBRARY\src\bid_conf.h"
+#include "..\LIBRARY\src\bid_functions.h"
+
 typedef BID_UINT32 Decimal32;
 typedef BID_UINT64 Decimal64;
-#include "../LIBRARY/src/bid_conf.h"
-#include "../LIBRARY/src/bid_functions.h"
 typedef BID_UINT128 Decimal128;
 
 #define K 20000000
 #define N 5
 
+#ifdef _MSC_VER
+#  define BOOST_DECIMAL_NOINLINE  __declspec(noinline)
+#else
+#  define BOOST_DECIMAL_NOINLINE __attribute__ ((noinline))
+#endif
+
+#ifdef _WIN32
+#include <windows.h>
+
+#define CLOCK_MONOTONIC 1
+
+struct timespec
+{
+    long tv_sec;
+    long tv_nsec;
+};
+
+int clock_gettime(int clock_id, struct timespec* tp) 
+{
+    (void)clock_id;  // Ignore clock_id, always use QPC
+
+    static LARGE_INTEGER frequency = { 0 };
+    LARGE_INTEGER counter;
+
+    if (frequency.QuadPart == 0) 
+    {
+        QueryPerformanceFrequency(&frequency);
+    }
+
+    QueryPerformanceCounter(&counter);
+
+    tp->tv_sec = (long)(counter.QuadPart / frequency.QuadPart);
+    tp->tv_nsec = (long)(((counter.QuadPart % frequency.QuadPart) * 1000000000LL) / frequency.QuadPart);
+
+    return 0;
+}
+
+#else
+#include <time.h>
+#endif
+
 uint32_t flag = 0;
 
 uint32_t random_uint32(void) 
@@ -45,15 +91,15 @@ uint64_t random_uint64(void)
     return r;
 }
 
-__attribute__ ((noinline)) void generate_vector_32(Decimal32* buffer, size_t buffer_len)
+BOOST_DECIMAL_NOINLINE void generate_vector_32(Decimal32* buffer, size_t buffer_len)
 {
     for (size_t i = 0; i < buffer_len; ++i)
     {
         buffer[i] = bid32_from_uint32(random_uint32(), BID_ROUNDING_TO_NEAREST, &flag);
     }
 }
 
-__attribute__ ((noinline)) void test_comparisons_32(Decimal32* data, const char* label)
+BOOST_DECIMAL_NOINLINE void test_comparisons_32(Decimal32* data, const char* label)
 {
     struct timespec t1, t2;
     clock_gettime(CLOCK_MONOTONIC, &t1);
@@ -82,15 +128,15 @@ __attribute__ ((noinline)) void test_comparisons_32(Decimal32* data, const char*
     printf("Comparisons    <%-10s >: %-10" PRIu64 " us (s=%zu)\n", label, elapsed_time_us, s);
 }
 
-__attribute__ ((noinline)) void generate_vector_64(Decimal64* buffer, size_t buffer_len)
+BOOST_DECIMAL_NOINLINE void generate_vector_64(Decimal64* buffer, size_t buffer_len)
 {
     for (size_t i = 0; i < buffer_len; ++i)
     {
         buffer[i] = bid64_from_uint64(random_uint64(), BID_ROUNDING_TO_NEAREST, &flag);
     }
 }
 
-__attribute__ ((noinline)) void test_comparisons_64(Decimal64* data, const char* label)
+BOOST_DECIMAL_NOINLINE void test_comparisons_64(Decimal64* data, const char* label)
 {
     struct timespec t1, t2;
     clock_gettime(CLOCK_MONOTONIC, &t1);
@@ -153,12 +199,12 @@ Decimal128 random_decimal128(void)
 
     // 5. Parse to decimal128
     _IDEC_flags flags = 0;
-    Decimal128 result = bid128_from_string(str, &flags);
+    Decimal128 result = bid128_from_string(str, BID_ROUNDING_TO_NEAREST, &flags);
 
     return result;
 }
 
-__attribute__ ((__noinline__)) void generate_vector_128(Decimal128* buffer, size_t buffer_len)
+BOOST_DECIMAL_NOINLINE void generate_vector_128(Decimal128* buffer, size_t buffer_len)
 {
     size_t i = 0;
     while (i < buffer_len)
@@ -168,7 +214,7 @@ __attribute__ ((__noinline__)) void generate_vector_128(Decimal128* buffer, size
     }
 }
 
-__attribute__ ((__noinline__)) void test_comparisons_128(Decimal128* data, const char* label)
+BOOST_DECIMAL_NOINLINE void test_comparisons_128(Decimal128* data, const char* label)
 {
     struct timespec t1, t2;
     clock_gettime(CLOCK_MONOTONIC, &t1);
@@ -200,26 +246,26 @@ __attribute__ ((__noinline__)) void test_comparisons_128(Decimal128* data, const
 
 typedef Decimal32 (*operation_32)(Decimal32, Decimal32);
 
-__attribute__ ((noinline)) Decimal32 add_32(Decimal32 a, Decimal32 b)
+BOOST_DECIMAL_NOINLINE Decimal32 add_32(Decimal32 a, Decimal32 b)
 {
     return bid32_add(a, b, BID_ROUNDING_TO_NEAREST, &flag);
 }
-__attribute__ ((noinline)) Decimal32 sub_32(Decimal32 a, Decimal32 b)
+BOOST_DECIMAL_NOINLINE Decimal32 sub_32(Decimal32 a, Decimal32 b)
 {
     return bid32_sub(a, b, BID_ROUNDING_TO_NEAREST, &flag);
 }
 
-__attribute__ ((noinline)) Decimal32 mul_32(Decimal32 a, Decimal32 b)
+BOOST_DECIMAL_NOINLINE Decimal32 mul_32(Decimal32 a, Decimal32 b)
 {
     return bid32_mul(a, b, BID_ROUNDING_TO_NEAREST, &flag);
 }
 
-__attribute__ ((noinline)) Decimal32 div_32(Decimal32 a, Decimal32 b)
+BOOST_DECIMAL_NOINLINE Decimal32 div_32(Decimal32 a, Decimal32 b)
 {
     return bid32_div(a, b, BID_ROUNDING_TO_NEAREST, &flag);
 }
 
-__attribute__ ((noinline)) void test_two_element_operation_32(Decimal32* data, operation_32 op, const char* label, const char* op_label)
+BOOST_DECIMAL_NOINLINE void test_two_element_operation_32(Decimal32* data, operation_32 op, const char* label, const char* op_label)
 {
     struct timespec t1, t2;
     clock_gettime(CLOCK_MONOTONIC, &t1);
@@ -245,27 +291,27 @@ __attribute__ ((noinline)) void test_two_element_operation_32(Decimal32* data, o
 
 typedef Decimal64 (*operation_64)(Decimal64, Decimal64);
 
-__attribute__ ((noinline)) Decimal64 add_64(Decimal64 a, Decimal64 b)
+BOOST_DECIMAL_NOINLINE Decimal64 add_64(Decimal64 a, Decimal64 b)
 {
     return bid64_add(a, b, BID_ROUNDING_TO_NEAREST, &flag);
 }
 
-__attribute__ ((noinline)) Decimal64 sub_64(Decimal64 a, Decimal64 b)
+BOOST_DECIMAL_NOINLINE Decimal64 sub_64(Decimal64 a, Decimal64 b)
 {
     return bid64_sub(a, b, BID_ROUNDING_TO_NEAREST, &flag);
 }
 
-__attribute__ ((noinline)) Decimal64 mul_64(Decimal64 a, Decimal64 b)
+BOOST_DECIMAL_NOINLINE Decimal64 mul_64(Decimal64 a, Decimal64 b)
 {
     return bid64_mul(a, b, BID_ROUNDING_TO_NEAREST, &flag);
 }
 
-__attribute__ ((noinline)) Decimal64 div_64(Decimal64 a, Decimal64 b)
+BOOST_DECIMAL_NOINLINE Decimal64 div_64(Decimal64 a, Decimal64 b)
 {
     return bid64_div(a, b, BID_ROUNDING_TO_NEAREST, &flag);
 }
 
-__attribute__ ((noinline)) void test_two_element_operation_64(Decimal64* data, operation_64 op, const char* label, const char* op_label)
+BOOST_DECIMAL_NOINLINE void test_two_element_operation_64(Decimal64* data, operation_64 op, const char* label, const char* op_label)
 {
     struct timespec t1, t2;
     clock_gettime(CLOCK_MONOTONIC, &t1);
@@ -292,27 +338,27 @@ __attribute__ ((noinline)) void test_two_element_operation_64(Decimal64* data, o
 
 typedef Decimal128 (*operation_128)(Decimal128, Decimal128);
 
-__attribute__ ((__noinline__)) Decimal128 add_128(Decimal128 a, Decimal128 b)
+BOOST_DECIMAL_NOINLINE Decimal128 add_128(Decimal128 a, Decimal128 b)
 {
     return bid128_add(a, b, BID_ROUNDING_TO_NEAREST, &flag);
 }
 
-__attribute__ ((__noinline__)) Decimal128 sub_128(Decimal128 a, Decimal128 b)
+BOOST_DECIMAL_NOINLINE Decimal128 sub_128(Decimal128 a, Decimal128 b)
 {
     return bid128_sub(a, b, BID_ROUNDING_TO_NEAREST, &flag);
 }
 
-__attribute__ ((__noinline__)) Decimal128 mul_128(Decimal128 a, Decimal128 b)
+BOOST_DECIMAL_NOINLINE Decimal128 mul_128(Decimal128 a, Decimal128 b)
 {
     return bid128_mul(a, b, BID_ROUNDING_TO_NEAREST, &flag);
 }
 
-__attribute__ ((__noinline__)) Decimal128 div_128(Decimal128 a, Decimal128 b)
+BOOST_DECIMAL_NOINLINE Decimal128 div_128(Decimal128 a, Decimal128 b)
 {
     return bid128_div(a, b, BID_ROUNDING_TO_NEAREST, &flag);
 }
 
-__attribute__ ((__noinline__)) void test_two_element_operation_128(Decimal128* data, operation_128 op, const char* label, const char* op_label)
+BOOST_DECIMAL_NOINLINE void test_two_element_operation_128(Decimal128* data, operation_128 op, const char* label, const char* op_label)
 {
     struct timespec t1, t2;
     clock_gettime(CLOCK_MONOTONIC, &t1);
@@ -342,8 +388,10 @@ int main()
     // One time init of random number generator
     srand(time(NULL));
 
+    #ifndef _WIN32
     fedisableexcept(FE_ALL_EXCEPT);
-    
+    #endif
+
     Decimal32* d32_array = malloc(K * sizeof(Decimal32));
     Decimal64* d64_array = malloc(K * sizeof(Decimal64));
     Decimal128* d128_array = malloc(K * sizeof(Decimal128));

Original file line number	Diff line number	Diff line change
`@@ -2,25 +2,71 @@`
`2`	`2`	`// Distributed under the Boost Software License, Version 1.0.`
`3`	`3`	`// https://www.boost.org/LICENSE_1_0.txt`
`4`	`4`
`5`		`-#define _POSIX_C_SOURCE 199309L`
	`5`	`+#ifdef _WIN32`
	`6`	`+# define WIN32_LEAN_AND_MEAN`
	`7`	`+# include <windows.h>`
	`8`	`+#else`
	`9`	`+# define _POSIX_C_SOURCE 199309L`
	`10`	`+#endif`
`6`	`11`
`7`	`12`	`#include <stdio.h>`
`8`	`13`	`#include <stdlib.h>`
`9`	`14`	`#include <stdint.h>`
`10`		`-#include <time.h>`
`11`	`15`	`#include <inttypes.h>`
`12`	`16`	`#include <float.h>`
`13`	`17`	`#include <fenv.h>`
`14`	`18`
	`19`	`+#include "..\LIBRARY\src\bid_conf.h"`
	`20`	`+#include "..\LIBRARY\src\bid_functions.h"`
	`21`	`+`
`15`	`22`	`typedef BID_UINT32 Decimal32;`
`16`	`23`	`typedef BID_UINT64 Decimal64;`
`17`		`-#include "../LIBRARY/src/bid_conf.h"`
`18`		`-#include "../LIBRARY/src/bid_functions.h"`
`19`	`24`	`typedef BID_UINT128 Decimal128;`
`20`	`25`
`21`	`26`	`#define K 20000000`
`22`	`27`	`#define N 5`
`23`	`28`
	`29`	`+#ifdef _MSC_VER`
	`30`	`+# define BOOST_DECIMAL_NOINLINE __declspec(noinline)`
	`31`	`+#else`
	`32`	`+# define BOOST_DECIMAL_NOINLINE __attribute__ ((noinline))`
	`33`	`+#endif`
	`34`	`+`
	`35`	`+#ifdef _WIN32`
	`36`	`+#include <windows.h>`
	`37`	`+`
	`38`	`+#define CLOCK_MONOTONIC 1`
	`39`	`+`
	`40`	`+struct timespec`
	`41`	`+{`
	`42`	`+ long tv_sec;`
	`43`	`+ long tv_nsec;`
	`44`	`+};`
	`45`	`+`
	`46`	`+int clock_gettime(int clock_id, struct timespec* tp)`
	`47`	`+{`
	`48`	`+ (void)clock_id; // Ignore clock_id, always use QPC`
	`49`	`+`
	`50`	`+ static LARGE_INTEGER frequency = { 0 };`
	`51`	`+ LARGE_INTEGER counter;`
	`52`	`+`
	`53`	`+ if (frequency.QuadPart == 0)`
	`54`	`+ {`
	`55`	`+ QueryPerformanceFrequency(&frequency);`
	`56`	`+ }`
	`57`	`+`
	`58`	`+ QueryPerformanceCounter(&counter);`
	`59`	`+`
	`60`	`+ tp->tv_sec = (long)(counter.QuadPart / frequency.QuadPart);`
	`61`	`+ tp->tv_nsec = (long)(((counter.QuadPart % frequency.QuadPart) * 1000000000LL) / frequency.QuadPart);`
	`62`	`+`
	`63`	`+ return 0;`
	`64`	`+}`
	`65`	`+`
	`66`	`+#else`
	`67`	`+#include <time.h>`
	`68`	`+#endif`
	`69`	`+`
`24`	`70`	`uint32_t flag = 0;`
`25`	`71`
`26`	`72`	`uint32_t random_uint32(void)`
`@@ -45,15 +91,15 @@ uint64_t random_uint64(void)`
`45`	`91`	`return r;`
`46`	`92`	`}`
`47`	`93`
`48`		`-__attribute__ ((noinline)) void generate_vector_32(Decimal32* buffer, size_t buffer_len)`
	`94`	`+BOOST_DECIMAL_NOINLINE void generate_vector_32(Decimal32* buffer, size_t buffer_len)`
`49`	`95`	`{`
`50`	`96`	`for (size_t i = 0; i < buffer_len; ++i)`
`51`	`97`	`{`
`52`	`98`	`buffer[i] = bid32_from_uint32(random_uint32(), BID_ROUNDING_TO_NEAREST, &flag);`
`53`	`99`	`}`
`54`	`100`	`}`
`55`	`101`
`56`		`-__attribute__ ((noinline)) void test_comparisons_32(Decimal32* data, const char* label)`
	`102`	`+BOOST_DECIMAL_NOINLINE void test_comparisons_32(Decimal32* data, const char* label)`
`57`	`103`	`{`
`58`	`104`	`struct timespec t1, t2;`
`59`	`105`	`clock_gettime(CLOCK_MONOTONIC, &t1);`
`@@ -82,15 +128,15 @@ __attribute__ ((noinline)) void test_comparisons_32(Decimal32* data, const char*`
`82`	`128`	`printf("Comparisons <%-10s >: %-10" PRIu64 " us (s=%zu)\n", label, elapsed_time_us, s);`
`83`	`129`	`}`
`84`	`130`
`85`		`-__attribute__ ((noinline)) void generate_vector_64(Decimal64* buffer, size_t buffer_len)`
	`131`	`+BOOST_DECIMAL_NOINLINE void generate_vector_64(Decimal64* buffer, size_t buffer_len)`
`86`	`132`	`{`
`87`	`133`	`for (size_t i = 0; i < buffer_len; ++i)`
`88`	`134`	`{`
`89`	`135`	`buffer[i] = bid64_from_uint64(random_uint64(), BID_ROUNDING_TO_NEAREST, &flag);`
`90`	`136`	`}`
`91`	`137`	`}`
`92`	`138`
`93`		`-__attribute__ ((noinline)) void test_comparisons_64(Decimal64* data, const char* label)`
	`139`	`+BOOST_DECIMAL_NOINLINE void test_comparisons_64(Decimal64* data, const char* label)`
`94`	`140`	`{`
`95`	`141`	`struct timespec t1, t2;`
`96`	`142`	`clock_gettime(CLOCK_MONOTONIC, &t1);`
`@@ -153,12 +199,12 @@ Decimal128 random_decimal128(void)`
`153`	`199`
`154`	`200`	`// 5. Parse to decimal128`
`155`	`201`	`_IDEC_flags flags = 0;`
`156`		`- Decimal128 result = bid128_from_string(str, &flags);`
	`202`	`+ Decimal128 result = bid128_from_string(str, BID_ROUNDING_TO_NEAREST, &flags);`
`157`	`203`
`158`	`204`	`return result;`
`159`	`205`	`}`
`160`	`206`
`161`		`-__attribute__ ((__noinline__)) void generate_vector_128(Decimal128* buffer, size_t buffer_len)`
	`207`	`+BOOST_DECIMAL_NOINLINE void generate_vector_128(Decimal128* buffer, size_t buffer_len)`
`162`	`208`	`{`
`163`	`209`	`size_t i = 0;`
`164`	`210`	`while (i < buffer_len)`
`@@ -168,7 +214,7 @@ __attribute__ ((__noinline__)) void generate_vector_128(Decimal128* buffer, size`
`168`	`214`	`}`
`169`	`215`	`}`
`170`	`216`
`171`		`-__attribute__ ((__noinline__)) void test_comparisons_128(Decimal128* data, const char* label)`
	`217`	`+BOOST_DECIMAL_NOINLINE void test_comparisons_128(Decimal128* data, const char* label)`
`172`	`218`	`{`
`173`	`219`	`struct timespec t1, t2;`
`174`	`220`	`clock_gettime(CLOCK_MONOTONIC, &t1);`
`@@ -200,26 +246,26 @@ __attribute__ ((__noinline__)) void test_comparisons_128(Decimal128* data, const`
`200`	`246`
`201`	`247`	`typedef Decimal32 (*operation_32)(Decimal32, Decimal32);`
`202`	`248`
`203`		`-__attribute__ ((noinline)) Decimal32 add_32(Decimal32 a, Decimal32 b)`
	`249`	`+BOOST_DECIMAL_NOINLINE Decimal32 add_32(Decimal32 a, Decimal32 b)`
`204`	`250`	`{`
`205`	`251`	`return bid32_add(a, b, BID_ROUNDING_TO_NEAREST, &flag);`
`206`	`252`	`}`
`207`		`-__attribute__ ((noinline)) Decimal32 sub_32(Decimal32 a, Decimal32 b)`
	`253`	`+BOOST_DECIMAL_NOINLINE Decimal32 sub_32(Decimal32 a, Decimal32 b)`
`208`	`254`	`{`
`209`	`255`	`return bid32_sub(a, b, BID_ROUNDING_TO_NEAREST, &flag);`
`210`	`256`	`}`
`211`	`257`
`212`		`-__attribute__ ((noinline)) Decimal32 mul_32(Decimal32 a, Decimal32 b)`
	`258`	`+BOOST_DECIMAL_NOINLINE Decimal32 mul_32(Decimal32 a, Decimal32 b)`
`213`	`259`	`{`
`214`	`260`	`return bid32_mul(a, b, BID_ROUNDING_TO_NEAREST, &flag);`
`215`	`261`	`}`
`216`	`262`
`217`		`-__attribute__ ((noinline)) Decimal32 div_32(Decimal32 a, Decimal32 b)`
	`263`	`+BOOST_DECIMAL_NOINLINE Decimal32 div_32(Decimal32 a, Decimal32 b)`
`218`	`264`	`{`
`219`	`265`	`return bid32_div(a, b, BID_ROUNDING_TO_NEAREST, &flag);`
`220`	`266`	`}`
`221`	`267`
`222`		`-__attribute__ ((noinline)) void test_two_element_operation_32(Decimal32* data, operation_32 op, const char* label, const char* op_label)`
	`268`	`+BOOST_DECIMAL_NOINLINE void test_two_element_operation_32(Decimal32* data, operation_32 op, const char* label, const char* op_label)`
`223`	`269`	`{`
`224`	`270`	`struct timespec t1, t2;`
`225`	`271`	`clock_gettime(CLOCK_MONOTONIC, &t1);`
`@@ -245,27 +291,27 @@ __attribute__ ((noinline)) void test_two_element_operation_32(Decimal32* data, o`
`245`	`291`
`246`	`292`	`typedef Decimal64 (*operation_64)(Decimal64, Decimal64);`
`247`	`293`
`248`		`-__attribute__ ((noinline)) Decimal64 add_64(Decimal64 a, Decimal64 b)`
	`294`	`+BOOST_DECIMAL_NOINLINE Decimal64 add_64(Decimal64 a, Decimal64 b)`
`249`	`295`	`{`
`250`	`296`	`return bid64_add(a, b, BID_ROUNDING_TO_NEAREST, &flag);`
`251`	`297`	`}`
`252`	`298`
`253`		`-__attribute__ ((noinline)) Decimal64 sub_64(Decimal64 a, Decimal64 b)`
	`299`	`+BOOST_DECIMAL_NOINLINE Decimal64 sub_64(Decimal64 a, Decimal64 b)`
`254`	`300`	`{`
`255`	`301`	`return bid64_sub(a, b, BID_ROUNDING_TO_NEAREST, &flag);`
`256`	`302`	`}`
`257`	`303`
`258`		`-__attribute__ ((noinline)) Decimal64 mul_64(Decimal64 a, Decimal64 b)`
	`304`	`+BOOST_DECIMAL_NOINLINE Decimal64 mul_64(Decimal64 a, Decimal64 b)`
`259`	`305`	`{`
`260`	`306`	`return bid64_mul(a, b, BID_ROUNDING_TO_NEAREST, &flag);`
`261`	`307`	`}`
`262`	`308`
`263`		`-__attribute__ ((noinline)) Decimal64 div_64(Decimal64 a, Decimal64 b)`
	`309`	`+BOOST_DECIMAL_NOINLINE Decimal64 div_64(Decimal64 a, Decimal64 b)`
`264`	`310`	`{`
`265`	`311`	`return bid64_div(a, b, BID_ROUNDING_TO_NEAREST, &flag);`
`266`	`312`	`}`
`267`	`313`
`268`		`-__attribute__ ((noinline)) void test_two_element_operation_64(Decimal64* data, operation_64 op, const char* label, const char* op_label)`
	`314`	`+BOOST_DECIMAL_NOINLINE void test_two_element_operation_64(Decimal64* data, operation_64 op, const char* label, const char* op_label)`
`269`	`315`	`{`
`270`	`316`	`struct timespec t1, t2;`
`271`	`317`	`clock_gettime(CLOCK_MONOTONIC, &t1);`
`@@ -292,27 +338,27 @@ __attribute__ ((noinline)) void test_two_element_operation_64(Decimal64* data, o`
`292`	`338`
`293`	`339`	`typedef Decimal128 (*operation_128)(Decimal128, Decimal128);`
`294`	`340`
`295`		`-__attribute__ ((__noinline__)) Decimal128 add_128(Decimal128 a, Decimal128 b)`
	`341`	`+BOOST_DECIMAL_NOINLINE Decimal128 add_128(Decimal128 a, Decimal128 b)`
`296`	`342`	`{`
`297`	`343`	`return bid128_add(a, b, BID_ROUNDING_TO_NEAREST, &flag);`
`298`	`344`	`}`
`299`	`345`
`300`		`-__attribute__ ((__noinline__)) Decimal128 sub_128(Decimal128 a, Decimal128 b)`
	`346`	`+BOOST_DECIMAL_NOINLINE Decimal128 sub_128(Decimal128 a, Decimal128 b)`
`301`	`347`	`{`
`302`	`348`	`return bid128_sub(a, b, BID_ROUNDING_TO_NEAREST, &flag);`
`303`	`349`	`}`
`304`	`350`
`305`		`-__attribute__ ((__noinline__)) Decimal128 mul_128(Decimal128 a, Decimal128 b)`
	`351`	`+BOOST_DECIMAL_NOINLINE Decimal128 mul_128(Decimal128 a, Decimal128 b)`
`306`	`352`	`{`
`307`	`353`	`return bid128_mul(a, b, BID_ROUNDING_TO_NEAREST, &flag);`
`308`	`354`	`}`
`309`	`355`
`310`		`-__attribute__ ((__noinline__)) Decimal128 div_128(Decimal128 a, Decimal128 b)`
	`356`	`+BOOST_DECIMAL_NOINLINE Decimal128 div_128(Decimal128 a, Decimal128 b)`
`311`	`357`	`{`
`312`	`358`	`return bid128_div(a, b, BID_ROUNDING_TO_NEAREST, &flag);`
`313`	`359`	`}`
`314`	`360`
`315`		`-__attribute__ ((__noinline__)) void test_two_element_operation_128(Decimal128* data, operation_128 op, const char* label, const char* op_label)`
	`361`	`+BOOST_DECIMAL_NOINLINE void test_two_element_operation_128(Decimal128* data, operation_128 op, const char* label, const char* op_label)`
`316`	`362`	`{`
`317`	`363`	`struct timespec t1, t2;`
`318`	`364`	`clock_gettime(CLOCK_MONOTONIC, &t1);`
`@@ -342,8 +388,10 @@ int main()`
`342`	`388`	`// One time init of random number generator`
`343`	`389`	`srand(time(NULL));`
`344`	`390`
	`391`	`+ #ifndef _WIN32`
`345`	`392`	`fedisableexcept(FE_ALL_EXCEPT);`
`346`		`-`
	`393`	`+ #endif`
	`394`	`+`
`347`	`395`	`Decimal32* d32_array = malloc(K * sizeof(Decimal32));`
`348`	`396`	`Decimal64* d64_array = malloc(K * sizeof(Decimal64));`
`349`	`397`	`Decimal128* d128_array = malloc(K * sizeof(Decimal128));`