Skip to content

Commit f40e89f

Browse files
committed
[Math] Inline VecorizedTMath to be instantiated with any SIMD type
1 parent de9f047 commit f40e89f

File tree

3 files changed

+237
-264
lines changed

3 files changed

+237
-264
lines changed

math/mathcore/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,6 @@ ROOT_STANDARD_LIBRARY_PACKAGE(MathCore
177177
src/TStatistic.cxx
178178
src/UnBinData.cxx
179179
src/Util.cxx
180-
src/VectorizedTMath.cxx
181180
DICTIONARY_OPTIONS
182181
-writeEmptyRootPCM
183182
${dictoptions}

math/mathcore/inc/VectorizedTMath.h

Lines changed: 237 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,27 +5,246 @@
55
#include "Math/Types.h"
66
#include "TMath.h"
77

8+
#include <cmath>
9+
810
#ifdef R__HAS_STD_SIMD
911

1012
namespace TMath {
11-
::ROOT::Double_v Log2(::ROOT::Double_v &x);
12-
::ROOT::Double_v BreitWigner(::ROOT::Double_v &x, Double_t mean = 0, Double_t gamma = 1);
13-
::ROOT::Double_v Gaus(::ROOT::Double_v &x, Double_t mean = 0, Double_t sigma = 1, Bool_t norm = kFALSE);
14-
::ROOT::Double_v LaplaceDist(::ROOT::Double_v &x, Double_t alpha = 0, Double_t beta = 1);
15-
::ROOT::Double_v LaplaceDistI(::ROOT::Double_v &x, Double_t alpha = 0, Double_t beta = 1);
16-
::ROOT::Double_v Freq(::ROOT::Double_v &x);
17-
::ROOT::Double_v BesselI0_Split_More(::ROOT::Double_v &ax);
18-
::ROOT::Double_v BesselI0_Split_Less(::ROOT::Double_v &x);
19-
::ROOT::Double_v BesselI0(::ROOT::Double_v &x);
20-
::ROOT::Double_v BesselI1_Split_More(::ROOT::Double_v &ax, ::ROOT::Double_v &x);
21-
::ROOT::Double_v BesselI1_Split_Less(::ROOT::Double_v &x);
22-
::ROOT::Double_v BesselI1(::ROOT::Double_v &x);
23-
::ROOT::Double_v BesselJ0_Split1_More(::ROOT::Double_v &ax);
24-
::ROOT::Double_v BesselJ0_Split1_Less(::ROOT::Double_v &x);
25-
::ROOT::Double_v BesselJ0(::ROOT::Double_v &x);
26-
::ROOT::Double_v BesselJ1_Split1_More(::ROOT::Double_v &ax, ::ROOT::Double_v &x);
27-
::ROOT::Double_v BesselJ1_Split1_Less(::ROOT::Double_v &x);
28-
::ROOT::Double_v BesselJ1(::ROOT::Double_v &x);
13+
::ROOT::Double_v Log2(::ROOT::Double_v &x)
14+
{
15+
return log2(x);
16+
}
17+
18+
/// Calculate a Breit Wigner function with mean and gamma.
19+
::ROOT::Double_v BreitWigner(::ROOT::Double_v &x, Double_t mean = 0, Double_t gamma = 1)
20+
{
21+
return 0.5 * M_1_PI * (gamma / (0.25 * gamma * gamma + (x - mean) * (x - mean)));
22+
}
23+
24+
/// Calculate a gaussian function with mean and sigma.
25+
/// If norm=kTRUE (default is kFALSE) the result is divided
26+
/// by sqrt(2*Pi)*sigma.
27+
::ROOT::Double_v Gaus(::ROOT::Double_v &x, Double_t mean = 0, Double_t sigma = 1, Bool_t norm = false)
28+
{
29+
if (sigma == 0)
30+
return 1.e30;
31+
32+
auto inv_sigma = 1.0 / ::ROOT::Double_v(sigma);
33+
auto arg = (x - ::ROOT::Double_v(mean)) * inv_sigma;
34+
35+
// For those entries of |arg| > 39 result is zero in double precision
36+
::ROOT::Double_v out{};
37+
where(abs(arg) < 39.0, out) = exp(::ROOT::Double_v(-0.5) * arg * arg);
38+
39+
if (norm)
40+
out *= 0.3989422804014327 * inv_sigma; // 1/sqrt(2*Pi)=0.3989422804014327
41+
return out;
42+
}
43+
44+
/// Computes the probability density function of Laplace distribution
45+
/// at point x, with location parameter alpha and shape parameter beta.
46+
/// By default, alpha=0, beta=1
47+
/// This distribution is known under different names, most common is
48+
/// double exponential distribution, but it also appears as
49+
/// the two-tailed exponential or the bilateral exponential distribution
50+
::ROOT::Double_v LaplaceDist(::ROOT::Double_v &x, Double_t alpha = 0, Double_t beta = 1)
51+
{
52+
auto beta_v_inv = ::ROOT::Double_v(1.0 / beta);
53+
auto out = exp(-abs((x - alpha) * beta_v_inv));
54+
out *= 0.5 * beta_v_inv;
55+
return out;
56+
}
57+
58+
/// Computes the distribution function of Laplace distribution
59+
/// at point x, with location parameter alpha and shape parameter beta.
60+
/// By default, alpha=0, beta=1
61+
/// This distribution is known under different names, most common is
62+
/// double exponential distribution, but it also appears as
63+
/// the two-tailed exponential or the bilateral exponential distribution
64+
::ROOT::Double_v LaplaceDistI(::ROOT::Double_v &x, Double_t alpha = 0, Double_t beta = 1)
65+
{
66+
auto alpha_v = ::ROOT::Double_v(alpha);
67+
auto beta_v_inv = ::ROOT::Double_v(1.0) / ::ROOT::Double_v(beta);
68+
auto mask = x <= alpha_v;
69+
::ROOT::Double_v out{};
70+
where(mask, out) = 0.5 * exp(-abs((x - alpha_v) * beta_v_inv));
71+
where(!mask, out) = 1 - 0.5 * exp(-abs((x - alpha_v) * beta_v_inv));
72+
return out;
73+
}
74+
75+
/// Computation of the normal frequency function freq(x).
76+
/// Freq(x) = (1/sqrt(2pi)) Integral(exp(-t^2/2))dt between -infinity and x.
77+
///
78+
/// Translated from CERNLIB C300 by Rene Brun.
79+
::ROOT::Double_v Freq(::ROOT::Double_v &x)
80+
{
81+
Double_t c1 = 0.56418958354775629;
82+
Double_t w2 = 1.41421356237309505;
83+
84+
Double_t p10 = 2.4266795523053175e+2, q10 = 2.1505887586986120e+2, p11 = 2.1979261618294152e+1,
85+
q11 = 9.1164905404514901e+1, p12 = 6.9963834886191355e+0, q12 = 1.5082797630407787e+1,
86+
p13 = -3.5609843701815385e-2;
87+
88+
Double_t p20 = 3.00459261020161601e+2, q20 = 3.00459260956983293e+2, p21 = 4.51918953711872942e+2,
89+
q21 = 7.90950925327898027e+2, p22 = 3.39320816734343687e+2, q22 = 9.31354094850609621e+2,
90+
p23 = 1.52989285046940404e+2, q23 = 6.38980264465631167e+2, p24 = 4.31622272220567353e+1,
91+
q24 = 2.77585444743987643e+2, p25 = 7.21175825088309366e+0, q25 = 7.70001529352294730e+1,
92+
p26 = 5.64195517478973971e-1, q26 = 1.27827273196294235e+1, p27 = -1.36864857382716707e-7;
93+
94+
Double_t p30 = -2.99610707703542174e-3, q30 = 1.06209230528467918e-2, p31 = -4.94730910623250734e-2,
95+
q31 = 1.91308926107829841e-1, p32 = -2.26956593539686930e-1, q32 = 1.05167510706793207e+0,
96+
p33 = -2.78661308609647788e-1, q33 = 1.98733201817135256e+0, p34 = -2.23192459734184686e-2, q34 = 1;
97+
98+
auto v = abs(x) / w2;
99+
100+
::ROOT::Double_v result{};
101+
102+
auto mask1 = v < 0.5;
103+
auto mask2 = !mask1 && v < 4.0;
104+
auto mask3 = !(mask1 || mask2);
105+
106+
auto v2 = v * v;
107+
auto v3 = v2 * v;
108+
auto v4 = v3 * v;
109+
auto v5 = v4 * v;
110+
auto v6 = v5 * v;
111+
auto v7 = v6 * v;
112+
auto v8 = v7 * v;
113+
114+
where(mask1, result) = v * (p10 + p11 * v2 + p12 * v4 + p13 * v6) / (q10 + q11 * v2 + q12 * v4 + v6);
115+
where(mask2, result) =
116+
1.0 - (p20 + p21 * v + p22 * v2 + p23 * v3 + p24 * v4 + p25 * v5 + p26 * v6 + p27 * v7) /
117+
(exp(v2) * (q20 + q21 * v + q22 * v2 + q23 * v3 + q24 * v4 + q25 * v5 + q26 * v6 + v7));
118+
where(mask3, result) = 1.0 - (c1 + (p30 * v8 + p31 * v6 + p32 * v4 + p33 * v2 + p34) /
119+
((q30 * v8 + q31 * v6 + q32 * v4 + q33 * v2 + q34) * v2)) /
120+
(v * exp(v2));
121+
122+
auto out = 0.5 * (1 - result);
123+
where(x > 0, out) = 0.5 + 0.5 * result;
124+
return out;
125+
}
126+
127+
/// Vectorized implementation of Bessel function I_0(x) for a vector x.
128+
::ROOT::Double_v BesselI0_Split_More(::ROOT::Double_v &ax)
129+
{
130+
auto y = 3.75 / ax;
131+
return (exp(ax) / sqrt(ax)) *
132+
(0.39894228 +
133+
y * (1.328592e-2 +
134+
y * (2.25319e-3 +
135+
y * (-1.57565e-3 +
136+
y * (9.16281e-3 +
137+
y * (-2.057706e-2 + y * (2.635537e-2 + y * (-1.647633e-2 + y * 3.92377e-3))))))));
138+
}
139+
140+
::ROOT::Double_v BesselI0_Split_Less(::ROOT::Double_v &x)
141+
{
142+
auto y = x * x * 0.071111111;
143+
144+
return 1.0 +
145+
y * (3.5156229 + y * (3.0899424 + y * (1.2067492 + y * (0.2659732 + y * (3.60768e-2 + y * 4.5813e-3)))));
146+
}
147+
148+
::ROOT::Double_v BesselI0(::ROOT::Double_v &x)
149+
{
150+
auto ax = abs(x);
151+
152+
auto out = BesselI0_Split_More(ax);
153+
where(ax <= 3.75, out) = BesselI0_Split_Less(x);
154+
return out;
155+
}
156+
157+
/// Vectorized implementation of modified Bessel function I_1(x) for a vector x.
158+
::ROOT::Double_v BesselI1_Split_More(::ROOT::Double_v &ax, ::ROOT::Double_v &x)
159+
{
160+
auto y = 3.75 / ax;
161+
auto result = (exp(ax) / sqrt(ax)) *
162+
(0.39894228 +
163+
y * (-3.988024e-2 +
164+
y * (-3.62018e-3 +
165+
y * (1.63801e-3 +
166+
y * (-1.031555e-2 +
167+
y * (2.282967e-2 + y * (-2.895312e-2 + y * (1.787654e-2 + y * -4.20059e-3))))))));
168+
where(x < 0, result) = -result;
169+
return result;
170+
}
171+
172+
::ROOT::Double_v BesselI1_Split_Less(::ROOT::Double_v &x)
173+
{
174+
auto y = x * x * 0.071111111;
175+
176+
return x * (0.5 + y * (0.87890594 +
177+
y * (0.51498869 + y * (0.15084934 + y * (2.658733e-2 + y * (3.01532e-3 + y * 3.2411e-4))))));
178+
}
179+
180+
::ROOT::Double_v BesselI1(::ROOT::Double_v &x)
181+
{
182+
auto ax = abs(x);
183+
184+
auto out = BesselI1_Split_More(ax, x);
185+
where(ax <= 3.75, out) = BesselI1_Split_Less(x);
186+
return out;
187+
}
188+
189+
/// Vectorized implementation of Bessel function J0(x) for a vector x.
190+
::ROOT::Double_v BesselJ0_Split1_More(::ROOT::Double_v &ax)
191+
{
192+
auto z = 8 / ax;
193+
auto y = z * z;
194+
auto xx = ax - 0.785398164;
195+
auto result1 = 1 + y * (-0.1098628627e-2 + y * (0.2734510407e-4 + y * (-0.2073370639e-5 + y * 0.2093887211e-6)));
196+
auto result2 =
197+
-0.1562499995e-1 + y * (0.1430488765e-3 + y * (-0.6911147651e-5 + y * (0.7621095161e-6 - y * 0.934935152e-7)));
198+
return sqrt(0.636619772 / ax) * (cos(xx) * result1 - z * sin(xx) * result2);
199+
}
200+
201+
::ROOT::Double_v BesselJ0_Split1_Less(::ROOT::Double_v &x)
202+
{
203+
auto y = x * x;
204+
return (57568490574.0 +
205+
y * (-13362590354.0 + y * (651619640.7 + y * (-11214424.18 + y * (77392.33017 + y * -184.9052456))))) /
206+
(57568490411.0 + y * (1029532985.0 + y * (9494680.718 + y * (59272.64853 + y * (267.8532712 + y)))));
207+
}
208+
209+
::ROOT::Double_v BesselJ0(::ROOT::Double_v &x)
210+
{
211+
auto ax = abs(x);
212+
auto out = BesselJ0_Split1_More(ax);
213+
where(ax < 8, out) = BesselJ0_Split1_Less(x);
214+
return out;
215+
}
216+
217+
/// Vectorized implementation of Bessel function J1(x) for a vector x.
218+
::ROOT::Double_v BesselJ1_Split1_More(::ROOT::Double_v &ax, ::ROOT::Double_v &x)
219+
{
220+
auto z = 8 / ax;
221+
auto y = z * z;
222+
auto xx = ax - 2.356194491;
223+
auto result1 = 1 + y * (0.183105e-2 + y * (-0.3516396496e-4 + y * (0.2457520174e-5 + y * -0.240337019e-6)));
224+
auto result2 =
225+
0.04687499995 + y * (-0.2002690873e-3 + y * (0.8449199096e-5 + y * (-0.88228987e-6 - y * 0.105787412e-6)));
226+
auto result = sqrt(0.636619772 / ax) * (cos(xx) * result1 - z * sin(xx) * result2);
227+
where(x < 0, result) = -result;
228+
return result;
229+
}
230+
231+
::ROOT::Double_v BesselJ1_Split1_Less(::ROOT::Double_v &x)
232+
{
233+
auto y = x * x;
234+
return x *
235+
(72362614232.0 +
236+
y * (-7895059235.0 + y * (242396853.1 + y * (-2972611.439 + y * (15704.48260 + y * -30.16036606))))) /
237+
(144725228442.0 + y * (2300535178.0 + y * (18583304.74 + y * (99447.43394 + y * (376.9991397 + y)))));
238+
}
239+
240+
::ROOT::Double_v BesselJ1(::ROOT::Double_v &x)
241+
{
242+
auto ax = abs(x);
243+
auto out = BesselJ1_Split1_More(ax, x);
244+
where(ax < 8, out) = BesselJ1_Split1_Less(x);
245+
return out;
246+
}
247+
29248
} // namespace TMath
30249

31250
#endif // R__HAS_STD_SIMD

0 commit comments

Comments
 (0)