Skip to content
Open
132 changes: 132 additions & 0 deletions include/boost/math/special_functions/fast_float_distance.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// (C) Copyright Matt Borland 2022.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#ifndef BOOST_MATH_SF_FAST_FLOAT_DISTANCE
#define BOOST_MATH_SF_FAST_FLOAT_DISTANCE

#include <boost/math/special_functions/next.hpp>
#include <boost/math/tools/throw_exception.hpp>
#include <stdexcept>
#include <limits>

#if defined(BOOST_MATH_USE_FLOAT128) && !defined(BOOST_MATH_STANDALONE)
#include <boost/multiprecision/float128.hpp>
#include <boost/multiprecision/detail/standalone_config.hpp>
#define BOOST_MATH_USE_FAST_FLOAT128
#elif defined(BOOST_MATH_USE_FLOAT128) && defined(BOOST_MATH_STANDALONE)
# if __has_include(<quadmath.h>)
# include <quadmath.h>
# define BOOST_MATH_USE_FAST_STANDALONE_FLOAT128
# endif
#endif

namespace boost { namespace math {

// https://randomascii.wordpress.com/2012/01/23/stupid-float-tricks-2/
// https://blog.regehr.org/archives/959
inline std::int32_t fast_float_distance(float a, float b)
{
return boost::math::float_distance(a, b);
}

inline std::int64_t fast_float_distance(double a, double b)
{
return boost::math::float_distance(a, b);
}

#ifdef BOOST_MATH_USE_FAST_FLOAT128
boost::multiprecision::int128_type fast_float_distance(boost::multiprecision::float128_type a, boost::multiprecision::float128_type b)
{
using std::abs;
using std::isfinite;

constexpr boost::multiprecision::float128_type tol = 2 * BOOST_MP_QUAD_MIN;

// 0, very small, and large magnitude distances all need special handling
if (abs(a) == 0 || abs(b) == 0)
{
return 0;
}
else if (abs(a) < tol || abs(b) < tol)
{
BOOST_MATH_THROW_EXCEPTION(std::domain_error("special handling is required for tiny distances. Please use boost::math::float_distance for a slower but safe solution"));
}

if (!(isfinite)(a))
{
BOOST_MATH_THROW_EXCEPTION(std::domain_error("Both arguments to fast_float_distnace must be finite"));
}
else if (!(isfinite)(b))
{
BOOST_MATH_THROW_EXCEPTION(std::domain_error("Both arguments to fast_float_distnace must be finite"));
}

static_assert(sizeof(boost::multiprecision::int128_type) == sizeof(boost::multiprecision::float128_type), "float128 is the wrong size");

boost::multiprecision::int128_type ai;
boost::multiprecision::int128_type bi;
std::memcpy(&ai, &a, sizeof(boost::multiprecision::float128_type));
std::memcpy(&bi, &b, sizeof(boost::multiprecision::float128_type));

boost::multiprecision::int128_type result = bi - ai;

if (ai < 0 || bi < 0)
{
result = -result;
}

return result;
}

#elif defined(BOOST_MATH_USE_FAST_STANDALONE_FLOAT128)
__int128 fast_float_distance(__float128 a, __float128 b)
{
constexpr __float128 tol = 2 * static_cast<__float128>(1) * static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) *
static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) *
static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) *
static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) *
static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) *
static_cast<__float128>(DBL_MIN) * static_cast<__float128>(DBL_MIN) / 1073741824;

// 0, very small, and large magnitude distances all need special handling
if (::fabsq(a) == 0 || ::fabsq(b) == 0)
{
return 0;
}
else if (::fabsq(a) < tol || ::fabsq(b) < tol)
{
BOOST_MATH_THROW_EXCEPTION(std::domain_error("special handling is required for tiny distances. Please use boost::math::float_distance for a slower but safe solution"));
}

if (!(::isinfq)(a) && !(::isnanq)(a))
{
BOOST_MATH_THROW_EXCEPTION(std::domain_error("Both arguments to fast_float_distnace must be finite"));
}
else if (!(::isinfq)(b) && !(::isnanq)(b))
{
BOOST_MATH_THROW_EXCEPTION(std::domain_error("Both arguments to fast_float_distnace must be finite"));
}

static_assert(sizeof(__int128) == sizeof(__float128));

__int128 ai;
__int128 bi;
std::memcpy(&ai, &a, sizeof(__float128));
std::memcpy(&bi, &b, sizeof(__float128));

__int128 result = bi - ai;

if (ai < 0 || bi < 0)
{
result = -result;
}

return result;
}
#endif

}} // Namespaces

#endif
101 changes: 100 additions & 1 deletion include/boost/math/special_functions/next.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@
#include <boost/math/special_functions/sign.hpp>
#include <boost/math/special_functions/trunc.hpp>
#include <boost/math/tools/traits.hpp>
#include <boost/math/tools/config.hpp>
#include <type_traits>
#include <cfloat>

#include <cstdint>
#include <cstring>

#if !defined(_CRAYC) && !defined(__CUDACC__) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3)))
#if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(__SSE2__)
Expand Down Expand Up @@ -717,6 +719,103 @@ typename tools::promote_args<T, U>::type float_distance(const T& a, const U& b)
return boost::math::float_distance(a, b, policies::policy<>());
}

// https://randomascii.wordpress.com/2012/01/23/stupid-float-tricks-2/
// https://blog.regehr.org/archives/959
inline std::int32_t float_distance(float a, float b)
{
using std::abs;
using std::isfinite;
constexpr auto tol = 2 * (std::numeric_limits<float>::min)();

// 0, very small, and large magnitude distances all need special handling
if (abs(a) == 0 || abs(b) == 0)
{
return static_cast<std::int32_t>(float_distance(a, b, policies::policy<>()));
}
else if (abs(a) < tol || abs(b) < tol)
{
return static_cast<std::int32_t>(float_distance(a, b, policies::policy<>()));
}

static const char* function = "float_distance<%1%>(%1%, %1%)";
if(!(boost::math::isfinite)(a))
{
return policies::raise_domain_error<float>(
function,
"Argument a must be finite, but got %1%", a, policies::policy<>());
}
if(!(boost::math::isfinite)(b))
{
return policies::raise_domain_error<float>(
function,
"Argument b must be finite, but got %1%", b, policies::policy<>());
}

static_assert(sizeof(float) == sizeof(std::int32_t), "float is incorrect size.");

std::int32_t ai;
std::int32_t bi;
std::memcpy(&ai, &a, sizeof(float));
std::memcpy(&bi, &b, sizeof(float));

auto result = bi - ai;

if (ai < 0 || bi < 0)
{
result = -result;
}

return result;
}

inline std::int64_t float_distance(double a, double b)
{
using std::abs;
using std::isfinite;
constexpr auto tol = 2 * (std::numeric_limits<double>::min)();

// 0, very small, and large magnitude distances all need special handling
if (abs(a) == 0 || abs(b) == 0)
{
return static_cast<std::int64_t>(float_distance(a, b, policies::policy<>()));
}
else if (abs(a) < tol || abs(b) < tol)
{
return static_cast<std::int64_t>(float_distance(a, b, policies::policy<>()));
}

static const char* function = "float_distance<%1%>(%1%, %1%)";
if(!(boost::math::isfinite)(a))
{
return policies::raise_domain_error<double>(
function,
"Argument a must be finite, but got %1%", a, policies::policy<>());
}
if(!(boost::math::isfinite)(b))
{
return policies::raise_domain_error<double>(
function,
"Argument b must be finite, but got %1%", b, policies::policy<>());
}


static_assert(sizeof(double) == sizeof(std::int64_t), "double is incorrect size.");

std::int64_t ai;
std::int64_t bi;
std::memcpy(&ai, &a, sizeof(double));
std::memcpy(&bi, &b, sizeof(double));

auto result = bi - ai;

if (ai < 0 || bi < 0)
{
result = -result;
}

return result;
}

namespace detail{

template <class T, class Policy>
Expand Down
126 changes: 126 additions & 0 deletions reporting/performance/new_next_performance.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
// (C) Copyright Matt Borland 2022.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <boost/math/special_functions/next.hpp>
#include <benchmark/benchmark.h>

template <typename T>
void float_distance(benchmark::State& state)
{
const auto difference = static_cast<int>(state.range(0));
T left = 2;
T right = boost::math::float_advance(left, difference);

for (auto _ : state)
{
benchmark::DoNotOptimize(boost::math::float_distance(left, right));
}
state.SetComplexityN(state.range(0));
}

BENCHMARK_TEMPLATE(float_distance, float)->RangeMultiplier(2)->Range(1 << 1, 1 << 14)->Complexity()->UseRealTime();
BENCHMARK_TEMPLATE(float_distance, double)->RangeMultiplier(2)->Range(1 << 1, 1 << 14)->Complexity()->UseRealTime();

BENCHMARK_MAIN();

/*
Run on Apple M1 Pro Arch using Apple Clang 14.0.0 (15OCT22)

Original performance (Boost 1.80.0):

Unable to determine clock rate from sysctl: hw.cpufrequency: No such file or directory
This does not affect benchmark measurements, only the metadata output.
2022-10-15T15:24:07-07:00
Running ./new_next_performance
Run on (10 X 24.0916 MHz CPU s)
CPU Caches:
L1 Data 64 KiB
L1 Instruction 128 KiB
L2 Unified 4096 KiB (x10)
Load Average: 1.86, 2.53, 5.83
---------------------------------------------------------------------------------
Benchmark Time CPU Iterations
---------------------------------------------------------------------------------
float_distance<float>/2/real_time 61.4 ns 61.4 ns 9074469
float_distance<float>/4/real_time 61.7 ns 61.7 ns 11384150
float_distance<float>/8/real_time 61.4 ns 61.4 ns 10814604
float_distance<float>/16/real_time 61.7 ns 61.7 ns 11348376
float_distance<float>/32/real_time 61.4 ns 61.4 ns 11387167
float_distance<float>/64/real_time 61.6 ns 61.6 ns 11131932
float_distance<float>/128/real_time 61.4 ns 61.4 ns 11382029
float_distance<float>/256/real_time 61.4 ns 61.4 ns 11307649
float_distance<float>/512/real_time 61.4 ns 61.4 ns 11376048
float_distance<float>/1024/real_time 61.4 ns 61.4 ns 11355748
float_distance<float>/2048/real_time 61.8 ns 61.8 ns 11373776
float_distance<float>/4096/real_time 61.4 ns 61.4 ns 11382368
float_distance<float>/8192/real_time 61.4 ns 61.4 ns 11353453
float_distance<float>/16384/real_time 61.4 ns 61.4 ns 11378298
float_distance<float>/real_time_BigO 61.48 (1) 61.47 (1)
float_distance<float>/real_time_RMS 0 % 0 %
float_distance<double>/2/real_time 55.6 ns 55.6 ns 12580218
float_distance<double>/4/real_time 55.6 ns 55.6 ns 12577835
float_distance<double>/8/real_time 55.6 ns 55.6 ns 12564909
float_distance<double>/16/real_time 56.2 ns 56.2 ns 12554909
float_distance<double>/32/real_time 56.0 ns 56.0 ns 12544381
float_distance<double>/64/real_time 55.6 ns 55.6 ns 12566488
float_distance<double>/128/real_time 55.6 ns 55.6 ns 12499581
float_distance<double>/256/real_time 55.6 ns 55.6 ns 12565661
float_distance<double>/512/real_time 56.1 ns 56.1 ns 12550023
float_distance<double>/1024/real_time 55.8 ns 55.8 ns 12568603
float_distance<double>/2048/real_time 55.6 ns 55.6 ns 12546049
float_distance<double>/4096/real_time 55.6 ns 55.6 ns 12528525
float_distance<double>/8192/real_time 55.9 ns 55.9 ns 12563030
float_distance<double>/16384/real_time 56.0 ns 56.0 ns 12447644
float_distance<double>/real_time_BigO 55.78 (1) 55.78 (1)
float_distance<double>/real_time_RMS 0 % 0 %

New performance:

Unable to determine clock rate from sysctl: hw.cpufrequency: No such file or directory
This does not affect benchmark measurements, only the metadata output.
2022-10-15T15:31:37-07:00
Running ./new_next_performance
Run on (10 X 24.122 MHz CPU s)
CPU Caches:
L1 Data 64 KiB
L1 Instruction 128 KiB
L2 Unified 4096 KiB (x10)
Load Average: 2.12, 2.17, 4.26
---------------------------------------------------------------------------------
Benchmark Time CPU Iterations
---------------------------------------------------------------------------------
float_distance<float>/2/real_time 15.8 ns 15.8 ns 42162717
float_distance<float>/4/real_time 15.9 ns 15.9 ns 44213877
float_distance<float>/8/real_time 15.8 ns 15.8 ns 43972542
float_distance<float>/16/real_time 15.8 ns 15.8 ns 44209456
float_distance<float>/32/real_time 15.8 ns 15.8 ns 44200244
float_distance<float>/64/real_time 15.8 ns 15.8 ns 44239293
float_distance<float>/128/real_time 15.8 ns 15.8 ns 44171202
float_distance<float>/256/real_time 15.8 ns 15.8 ns 44241507
float_distance<float>/512/real_time 15.9 ns 15.8 ns 44230034
float_distance<float>/1024/real_time 15.8 ns 15.8 ns 44241554
float_distance<float>/2048/real_time 15.8 ns 15.8 ns 44220802
float_distance<float>/4096/real_time 15.8 ns 15.8 ns 44220441
float_distance<float>/8192/real_time 15.9 ns 15.9 ns 44213994
float_distance<float>/16384/real_time 15.8 ns 15.8 ns 44215413
float_distance<float>/real_time_BigO 15.83 (1) 15.83 (1)
float_distance<float>/real_time_RMS 0 % 0 %
float_distance<double>/2/real_time 15.5 ns 15.5 ns 45098165
float_distance<double>/4/real_time 15.6 ns 15.6 ns 45065465
float_distance<double>/8/real_time 15.5 ns 15.5 ns 45058733
float_distance<double>/16/real_time 15.8 ns 15.7 ns 45078404
float_distance<double>/32/real_time 15.5 ns 15.5 ns 44832734
float_distance<double>/64/real_time 15.5 ns 15.5 ns 45077303
float_distance<double>/128/real_time 15.5 ns 15.5 ns 45067255
float_distance<double>/256/real_time 15.5 ns 15.5 ns 45073844
float_distance<double>/512/real_time 15.6 ns 15.6 ns 45109342
float_distance<double>/1024/real_time 15.5 ns 15.5 ns 44845180
float_distance<double>/2048/real_time 15.5 ns 15.5 ns 45051846
float_distance<double>/4096/real_time 15.5 ns 15.5 ns 45064317
float_distance<double>/8192/real_time 15.5 ns 15.5 ns 45115653
float_distance<double>/16384/real_time 15.5 ns 15.5 ns 45067642
float_distance<double>/real_time_BigO 15.54 (1) 15.54 (1)
float_distance<double>/real_time_RMS 0 % 0 %
*/
1 change: 1 addition & 0 deletions test/Jamfile.v2
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,7 @@ test-suite special_fun :
[ run test_ldouble_simple.cpp ../../test/build//boost_unit_test_framework ]
# Needs to run in release mode, as it's rather slow:
[ run test_next.cpp pch ../../test/build//boost_unit_test_framework : : : release ]
[ run test_fast_float_distance.cpp ../../test/build//boost_unit_test_framework : : : release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>"-Bstatic -lquadmath -Bdynamic" : <build>no ] ]
[ run test_next_decimal.cpp pch ../../test/build//boost_unit_test_framework : : : release ]
[ run test_owens_t.cpp ../../test/build//boost_unit_test_framework ]
[ run test_polygamma.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ]
Expand Down
Loading