Skip to content

Commit f5b3407

Browse files
fast-math support
1 parent e13f082 commit f5b3407

File tree

10 files changed

+125
-44
lines changed

10 files changed

+125
-44
lines changed

include/xsimd/arch/common/xsimd_common_complex.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,11 +78,15 @@ namespace xsimd
7878
using batch_type = complex_batch_type_t<batch<T, A>>;
7979
using real_batch = typename batch_type::real_batch;
8080
using real_value_type = typename real_batch::value_type;
81+
#ifdef __FAST_MATH__
82+
return {self};
83+
#else
8184
auto cond = xsimd::isinf(real(self)) || xsimd::isinf(imag(self));
8285
return select(cond,
8386
batch_type(constants::infinity<real_batch>(),
8487
copysign(real_batch(real_value_type(0)), imag(self))),
8588
batch_type(self));
89+
#endif
8690
}
8791

8892
template <class A, class T>

include/xsimd/arch/common/xsimd_common_logical.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,12 +124,22 @@ namespace xsimd
124124
template <class A>
125125
XSIMD_INLINE batch_bool<float, A> isinf(batch<float, A> const& self, requires_arch<common>) noexcept
126126
{
127+
#ifdef __FAST_MATH__
128+
(void)self;
129+
return {false};
130+
#else
127131
return abs(self) == std::numeric_limits<float>::infinity();
132+
#endif
128133
}
129134
template <class A>
130135
XSIMD_INLINE batch_bool<double, A> isinf(batch<double, A> const& self, requires_arch<common>) noexcept
131136
{
137+
#ifdef __FAST_MATH__
138+
(void)self;
139+
return {false};
140+
#else
132141
return abs(self) == std::numeric_limits<double>::infinity();
142+
#endif
133143
}
134144

135145
// isfinite

include/xsimd/arch/common/xsimd_common_math.hpp

Lines changed: 65 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -896,7 +896,9 @@ namespace xsimd
896896
batch_type k = reducer_t::reduce(self, x);
897897
x = reducer_t::approx(x);
898898
x = select(self <= reducer_t::minlog(), batch_type(0.), ldexp(x, to_int(k)));
899+
#ifndef __FAST_MATH__
899900
x = select(self >= reducer_t::maxlog(), constants::infinity<batch_type>(), x);
901+
#endif
900902
return x;
901903
}
902904

@@ -910,7 +912,9 @@ namespace xsimd
910912
batch_type c = reducer_t::approx(x);
911913
c = reducer_t::finalize(x, c, hi, lo);
912914
c = select(self <= reducer_t::minlog(), batch_type(0.), ldexp(c, to_int(k)));
915+
#ifndef __FAST_MATH__
913916
c = select(self >= reducer_t::maxlog(), constants::infinity<batch_type>(), c);
917+
#endif
914918
return c;
915919
}
916920
}
@@ -1014,11 +1018,11 @@ namespace xsimd
10141018
XSIMD_INLINE batch<T, A> expm1(batch<T, A> const& self, requires_arch<common>) noexcept
10151019
{
10161020
using batch_type = batch<T, A>;
1017-
return select(self < constants::logeps<batch_type>(),
1018-
batch_type(-1.),
1019-
select(self > constants::maxlog<batch_type>(),
1020-
constants::infinity<batch_type>(),
1021-
detail::expm1(self)));
1021+
auto x = detail::expm1(self);
1022+
#ifndef __FAST_MATH__
1023+
x = select(self > constants::maxlog<batch_type>(), constants::infinity<batch_type>(), x);
1024+
#endif
1025+
return select(self < constants::logeps<batch_type>(), batch_type(-1.), x);
10221026
}
10231027

10241028
template <class A, class T>
@@ -1245,12 +1249,20 @@ namespace xsimd
12451249
batch_type r1 = other(q);
12461250
if (any(ltza))
12471251
{
1252+
#ifdef __FAST_MATH__
1253+
r = negative(q, r1);
1254+
#else
12481255
r = select(inf_result, constants::infinity<batch_type>(), negative(q, r1));
1256+
#endif
12491257
if (all(ltza))
12501258
return r;
12511259
}
12521260
batch_type r2 = select(ltza, r, r1);
1261+
#ifdef __FAST_MATH__
1262+
return r2;
1263+
#else
12531264
return select(a == constants::minusinfinity<batch_type>(), constants::nan<batch_type>(), select(inf_result, constants::infinity<batch_type>(), r2));
1265+
#endif
12541266
}
12551267

12561268
private:
@@ -1371,7 +1383,11 @@ namespace xsimd
13711383
}
13721384
batch_type r1 = other(a);
13731385
batch_type r2 = select(test, r, r1);
1386+
#ifdef __FAST_MATH__
1387+
return r2;
1388+
#else
13741389
return select(a == constants::minusinfinity<batch_type>(), constants::nan<batch_type>(), select(inf_result, constants::infinity<batch_type>(), r2));
1390+
#endif
13751391
}
13761392

13771393
private:
@@ -1479,12 +1495,12 @@ namespace xsimd
14791495
batch_type hfsq = batch_type(0.5) * f * f;
14801496
batch_type dk = to_float(k);
14811497
batch_type r = fma(dk, constants::log_2hi<batch_type>(), fma(s, (hfsq + R), dk * constants::log_2lo<batch_type>()) - hfsq + f);
1482-
#ifndef XSIMD_NO_INFINITIES
1483-
batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
1498+
#ifdef __FAST_MATH__
1499+
return r;
14841500
#else
1485-
batch_type zz = select(isnez, r, constants::minusinfinity<batch_type>());
1486-
#endif
1501+
batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
14871502
return select(!(self >= batch_type(0.)), constants::nan<batch_type>(), zz);
1503+
#endif
14881504
}
14891505

14901506
template <class A>
@@ -1522,12 +1538,12 @@ namespace xsimd
15221538
batch_type t2 = z * detail::horner<batch_type, 0x3fe5555555555593ll, 0x3fd2492494229359ll, 0x3fc7466496cb03dell, 0x3fc2f112df3e5244ll>(w);
15231539
batch_type R = t2 + t1;
15241540
batch_type r = fma(dk, constants::log_2hi<batch_type>(), fma(s, (hfsq + R), dk * constants::log_2lo<batch_type>()) - hfsq + f);
1525-
#ifndef XSIMD_NO_INFINITIES
1526-
batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
1541+
#ifdef __FAST_MATH__
1542+
return r;
15271543
#else
1528-
batch_type zz = select(isnez, r, constants::minusinfinity<batch_type>());
1529-
#endif
1544+
batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
15301545
return select(!(self >= batch_type(0.)), constants::nan<batch_type>(), zz);
1546+
#endif
15311547
}
15321548

15331549
template <class A, class T>
@@ -1569,12 +1585,12 @@ namespace xsimd
15691585
batch_type hfsq = batch_type(0.5) * f * f;
15701586
batch_type dk = to_float(k);
15711587
batch_type r = fma(fms(s, hfsq + R, hfsq) + f, constants::invlog_2<batch_type>(), dk);
1572-
#ifndef XSIMD_NO_INFINITIES
1573-
batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
1588+
#ifdef __FAST_MATH__
1589+
return r;
15741590
#else
1575-
batch_type zz = select(isnez, r, constants::minusinfinity<batch_type>());
1576-
#endif
1591+
batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
15771592
return select(!(self >= batch_type(0.)), constants::nan<batch_type>(), zz);
1593+
#endif
15781594
}
15791595

15801596
template <class A>
@@ -1617,12 +1633,12 @@ namespace xsimd
16171633
val_lo += (dk - w1) + val_hi;
16181634
val_hi = w1;
16191635
batch_type r = val_lo + val_hi;
1620-
#ifndef XSIMD_NO_INFINITIES
1621-
batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
1636+
#ifdef __FAST_MATH__
1637+
return r;
16221638
#else
1623-
batch_type zz = select(isnez, r, constants::minusinfinity<batch_type>());
1624-
#endif
1639+
batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
16251640
return select(!(self >= batch_type(0.)), constants::nan<batch_type>(), zz);
1641+
#endif
16261642
}
16271643

16281644
namespace detail
@@ -1757,12 +1773,12 @@ namespace xsimd
17571773
val_lo += (y - w1) + val_hi;
17581774
val_hi = w1;
17591775
batch_type r = val_lo + val_hi;
1760-
#ifndef XSIMD_NO_INFINITIES
1761-
batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
1776+
#ifdef __FAST_MATH__
1777+
return r;
17621778
#else
1763-
batch_type zz = select(isnez, r, constants::minusinfinity<batch_type>());
1764-
#endif
1779+
batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
17651780
return select(!(self >= batch_type(0.)), constants::nan<batch_type>(), zz);
1781+
#endif
17661782
}
17671783

17681784
template <class A, class T>
@@ -1805,12 +1821,12 @@ namespace xsimd
18051821
/* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
18061822
batch_type c = select(batch_bool_cast<float>(k >= i_type(2)), batch_type(1.) - (uf - self), self - (uf - batch_type(1.))) / uf;
18071823
batch_type r = fma(dk, constants::log_2hi<batch_type>(), fma(s, (hfsq + R), dk * constants::log_2lo<batch_type>() + c) - hfsq + f);
1808-
#ifndef XSIMD_NO_INFINITIES
1809-
batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
1824+
#ifdef __FAST_MATH__
1825+
return r;
18101826
#else
1811-
batch_type zz = select(isnez, r, constants::minusinfinity<batch_type>());
1812-
#endif
1827+
batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
18131828
return select(!(uf >= batch_type(0.)), constants::nan<batch_type>(), zz);
1829+
#endif
18141830
}
18151831

18161832
template <class A>
@@ -1838,12 +1854,12 @@ namespace xsimd
18381854
batch_type R = t2 + t1;
18391855
batch_type dk = to_float(k);
18401856
batch_type r = fma(dk, constants::log_2hi<batch_type>(), fma(s, hfsq + R, dk * constants::log_2lo<batch_type>() + c) - hfsq + f);
1841-
#ifndef XSIMD_NO_INFINITIES
1842-
batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
1857+
#ifdef __FAST_MATH__
1858+
return r;
18431859
#else
1844-
batch_type zz = select(isnez, r, constants::minusinfinity<batch_type>());
1845-
#endif
1860+
batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
18461861
return select(!(uf >= batch_type(0.)), constants::nan<batch_type>(), zz);
1862+
#endif
18471863
}
18481864

18491865
template <class A, class T>
@@ -1980,13 +1996,21 @@ namespace xsimd
19801996
static XSIMD_INLINE batch_type next(const batch_type& b) noexcept
19811997
{
19821998
batch_type n = ::xsimd::bitwise_cast<T>(::xsimd::bitwise_cast<int_type>(b) + int_type(1));
1999+
#ifdef __FAST_MATH__
2000+
return n;
2001+
#else
19832002
return select(b == constants::infinity<batch_type>(), b, n);
2003+
#endif
19842004
}
19852005

19862006
static XSIMD_INLINE batch_type prev(const batch_type& b) noexcept
19872007
{
19882008
batch_type p = ::xsimd::bitwise_cast<T>(::xsimd::bitwise_cast<int_type>(b) - int_type(1));
2009+
#ifdef __FAST_MATH__
2010+
return p;
2011+
#else
19892012
return select(b == constants::minusinfinity<batch_type>(), b, p);
2013+
#endif
19902014
}
19912015
};
19922016
}
@@ -2355,10 +2379,12 @@ namespace xsimd
23552379
y *= v;
23562380
y = select(test, y, y * v);
23572381
y *= constants::sqrt_2pi<batch_type>() * w;
2358-
#ifndef XSIMD_NO_INFINITIES
2382+
#ifdef __FAST_MATH__
2383+
return y;
2384+
#else
23592385
y = select(isinf(x), x, y);
2360-
#endif
23612386
return select(x > stirlinglargelim, constants::infinity<batch_type>(), y);
2387+
#endif
23622388
}
23632389

23642390
/* origin: boost/simd/arch/common/detail/common/gamma_kernel.hpp */
@@ -2501,7 +2527,11 @@ namespace xsimd
25012527
}
25022528
batch_type r1 = detail::tgamma_other(self, test);
25032529
batch_type r2 = select(test, r, r1);
2530+
#ifdef __FAST_MATH__
2531+
return r2;
2532+
#else
25042533
return select(self == batch_type(0.), copysign(constants::infinity<batch_type>(), self), select(nan_result, constants::nan<batch_type>(), r2));
2534+
#endif
25052535
}
25062536

25072537
}

include/xsimd/arch/common/xsimd_common_trigo.hpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -310,10 +310,13 @@ namespace xsimd
310310
num = x2 + num * num;
311311
real_batch den = y - one;
312312
den = x2 + den * den;
313-
batch_type res = select((x == real_batch(0.)) && (y == real_batch(1.)),
313+
#ifdef __FAST_MATH__
314+
return batch_type(w, 0.25 * log(num / den));
315+
#else
316+
return select((x == real_batch(0.)) && (y == real_batch(1.)),
314317
batch_type(real_batch(0.), constants::infinity<real_batch>()),
315318
batch_type(w, 0.25 * log(num / den)));
316-
return res;
319+
#endif
317320
}
318321

319322
// atanh
@@ -583,12 +586,14 @@ namespace xsimd
583586
for (std::size_t i = 0; i < size; ++i)
584587
{
585588
double arg = args[i];
589+
#ifndef __FAST_MATH__
586590
if (arg == std::numeric_limits<value_type>::infinity())
587591
{
588592
tmp[i] = 0.;
589593
txr[i] = std::numeric_limits<value_type>::quiet_NaN();
590594
}
591595
else
596+
#endif
592597
{
593598
double y[2];
594599
std::int32_t n = ::xsimd::detail::__ieee754_rem_pio2(arg, y);
@@ -841,11 +846,15 @@ namespace xsimd
841846
using batch_type = batch<std::complex<T>, A>;
842847
using real_batch = typename batch_type::real_batch;
843848
real_batch d = cos(2 * z.real()) + cosh(2 * z.imag());
844-
batch_type winf(constants::infinity<real_batch>(), constants::infinity<real_batch>());
845849
real_batch wreal = sin(2 * z.real()) / d;
846850
real_batch wimag = sinh(2 * z.imag());
851+
#ifdef __FAST_MATH__
852+
return batch_type(wreal, real_batch(1.)), batch_type(wreal, wimag / d);
853+
#else
854+
batch_type winf(constants::infinity<real_batch>(), constants::infinity<real_batch>());
847855
batch_type wres = select(isinf(wimag), batch_type(wreal, real_batch(1.)), batch_type(wreal, wimag / d));
848856
return select(d == real_batch(0.), winf, wres);
857+
#endif
849858
}
850859

851860
// tanh

include/xsimd/arch/xsimd_constants.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,10 @@ namespace xsimd
6161
#pragma GCC push_options
6262
#pragma GCC optimize("signed-zeros")
6363
#endif
64+
#ifndef __FAST_MATH__
6465
XSIMD_DEFINE_CONSTANT(infinity, (std::numeric_limits<float>::infinity()), (std::numeric_limits<double>::infinity()))
66+
XSIMD_DEFINE_CONSTANT(minusinfinity, (-infinity<float>()), (-infinity<double>()))
67+
#endif
6568
XSIMD_DEFINE_CONSTANT(invlog_2, 1.442695040888963407359924681001892137426645954152986f, 1.442695040888963407359924681001892137426645954152986)
6669
XSIMD_DEFINE_CONSTANT_HEX(invlog_2hi, 0x3fb8b000, 0x3ff7154765200000)
6770
XSIMD_DEFINE_CONSTANT_HEX(invlog_2lo, 0xb9389ad4, 0x3de705fc2eefa200)
@@ -83,7 +86,6 @@ namespace xsimd
8386
XSIMD_DEFINE_CONSTANT(minlog, -88.3762626647949f, -708.3964185322641)
8487
XSIMD_DEFINE_CONSTANT(minlog2, -127.0f, -1023.)
8588
XSIMD_DEFINE_CONSTANT(minlog10, -37.89999771118164f, -308.2547155599167)
86-
XSIMD_DEFINE_CONSTANT(minusinfinity, (-infinity<float>()), (-infinity<double>()))
8789
XSIMD_DEFINE_CONSTANT_HEX(nan, 0xffffffff, 0xffffffffffffffff)
8890
XSIMD_DEFINE_CONSTANT_HEX(oneosqrteps, 0x453504f3, 0x4190000000000000)
8991
XSIMD_DEFINE_CONSTANT_HEX(oneotwoeps, 0x4a800000, 0x4320000000000000)

include/xsimd/types/xsimd_api.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,6 +1114,7 @@ namespace xsimd
11141114
return kernel::incr_if<A>(x, mask, A {});
11151115
}
11161116

1117+
#ifndef __FAST_MATH__
11171118
/**
11181119
* @ingroup batch_constant
11191120
*
@@ -1128,6 +1129,7 @@ namespace xsimd
11281129
detail::static_check_supported_config<T, A>();
11291130
return B(std::numeric_limits<T>::infinity());
11301131
}
1132+
#endif
11311133

11321134
/**
11331135
* @ingroup batch_data_transfer

0 commit comments

Comments
 (0)