Skip to content

Commit 9a3bacf

Browse files
committed
Simplify kernel constructs
1 parent 6e6cf88 commit 9a3bacf

File tree

4 files changed

+309
-327
lines changed

4 files changed

+309
-327
lines changed

src/exp.jl

Lines changed: 53 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -14,39 +14,36 @@ const max_exp2(::Type{Float32}) = 128f0
1414
const min_exp2(::Type{Float64}) = -1075
1515
const min_exp2(::Type{Float32}) = -150f0
1616

17+
@inline function exp2_kernel(x::Float64)
18+
c11d = 0.4434359082926529454e-9
19+
c10d = 0.7073164598085707425e-8
20+
c9d = 0.1017819260921760451e-6
21+
c8d = 0.1321543872511327615e-5
22+
c7d = 0.1525273353517584730e-4
23+
c6d = 0.1540353045101147808e-3
24+
c5d = 0.1333355814670499073e-2
25+
c4d = 0.9618129107597600536e-2
26+
c3d = 0.5550410866482046596e-1
27+
c2d = 0.2402265069591012214
28+
c1d = 0.6931471805599452862
29+
return @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d
30+
end
31+
32+
@inline function exp2_kernel(x::Float32)
33+
c6f = 0.1535920892f-3
34+
c5f = 0.1339262701f-2
35+
c4f = 0.9618384764f-2
36+
c3f = 0.5550347269f-1
37+
c2f = 0.2402264476f0
38+
c1f = 0.6931471825f0
39+
return @horner x c1f c2f c3f c4f c5f c6f
40+
end
41+
1742
"""
1843
exp2(x)
1944
2045
Compute the base-`2` exponential of `x`, that is `2ˣ`.
2146
"""
22-
function exp2 end
23-
24-
let
25-
global exp2
26-
27-
28-
c11d = 0.4434359082926529454e-9
29-
c10d = 0.7073164598085707425e-8
30-
c9d = 0.1017819260921760451e-6
31-
c8d = 0.1321543872511327615e-5
32-
c7d = 0.1525273353517584730e-4
33-
c6d = 0.1540353045101147808e-3
34-
c5d = 0.1333355814670499073e-2
35-
c4d = 0.9618129107597600536e-2
36-
c3d = 0.5550410866482046596e-1
37-
c2d = 0.2402265069591012214
38-
c1d = 0.6931471805599452862
39-
40-
c6f = 0.1535920892f-3
41-
c5f = 0.1339262701f-2
42-
c4f = 0.9618384764f-2
43-
c3f = 0.5550347269f-1
44-
c2f = 0.2402264476f0
45-
c1f = 0.6931471825f0
46-
47-
global @inline exp2_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d
48-
global @inline exp2_kernel(x::Float32) = @horner x c1f c2f c3f c4f c5f c6f
49-
5047
function exp2(d::T) where {T<:Union{Float32,Float64}}
5148
q = unsafe_trunc(Int, round(d))
5249
s = d - q
@@ -60,7 +57,7 @@ function exp2(d::T) where {T<:Union{Float32,Float64}}
6057
d < min_exp2(T) && (u = T(0.0))
6158
return u
6259
end
63-
end
60+
6461

6562
const max_exp10(::Type{Float64}) = 3.08254715559916743851e2 # log 2^1023*(2-2^-52)
6663
const max_exp10(::Type{Float32}) = 38.531839419103626f0 # log 2^127 *(2-2^-23)
@@ -97,44 +94,43 @@ function expm1(x::T) where {T<:Union{Float32,Float64}}
9794
return u
9895
end
9996

97+
10098
const max_exp(::Type{Float64}) = 709.78271114955742909217217426 # log 2^1023*(2-2^-52)
10199
const max_exp(::Type{Float32}) = 88.72283905206835f0 # log 2^127 *(2-2^-23)
102100

103101
const min_exp(::Type{Float64}) = -7.451332191019412076235e2 # log 2^-1075
104102
const min_exp(::Type{Float32}) = -103.97208f0 # ≈ log 2^-150
105103

104+
@inline function exp_kernel(x::Float64)
105+
c11d = 2.08860621107283687536341e-09
106+
c10d = 2.51112930892876518610661e-08
107+
c9d = 2.75573911234900471893338e-07
108+
c8d = 2.75572362911928827629423e-06
109+
c7d = 2.4801587159235472998791e-05
110+
c6d = 0.000198412698960509205564975
111+
c5d = 0.00138888888889774492207962
112+
c4d = 0.00833333333331652721664984
113+
c3d = 0.0416666666666665047591422
114+
c2d = 0.166666666666666851703837
115+
c1d = 0.50
116+
return @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d
117+
end
118+
119+
@inline function exp_kernel(x::Float32)
120+
c6f = 0.000198527617612853646278381f0
121+
c5f = 0.00139304355252534151077271f0
122+
c4f = 0.00833336077630519866943359f0
123+
c3f = 0.0416664853692054748535156f0
124+
c2f = 0.166666671633720397949219f0
125+
c1f = 0.5f0
126+
return @horner x c1f c2f c3f c4f c5f c6f
127+
end
128+
106129
"""
107130
exp(x)
108131
109132
Compute the base-`e` exponential of `x`, that is `eˣ`.
110133
"""
111-
function exp end
112-
113-
let
114-
global exp
115-
116-
c11d = 2.08860621107283687536341e-09
117-
c10d = 2.51112930892876518610661e-08
118-
c9d = 2.75573911234900471893338e-07
119-
c8d = 2.75572362911928827629423e-06
120-
c7d = 2.4801587159235472998791e-05
121-
c6d = 0.000198412698960509205564975
122-
c5d = 0.00138888888889774492207962
123-
c4d = 0.00833333333331652721664984
124-
c3d = 0.0416666666666665047591422
125-
c2d = 0.166666666666666851703837
126-
c1d = 0.50
127-
128-
c6f = 0.000198527617612853646278381f0
129-
c5f = 0.00139304355252534151077271f0
130-
c4f = 0.00833336077630519866943359f0
131-
c3f = 0.0416664853692054748535156f0
132-
c2f = 0.166666671633720397949219f0
133-
c1f = 0.5f0
134-
135-
global @inline exp_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d
136-
global @inline exp_kernel(x::Float32) = @horner x c1f c2f c3f c4f c5f c6f
137-
138134
function exp(d::T) where {T<:Union{Float32,Float64}}
139135
q = unsafe_trunc(Int, round(T(MLN2E) * d))
140136
s = muladd(q, -L2U(T), d)
@@ -145,9 +141,8 @@ function exp(d::T) where {T<:Union{Float32,Float64}}
145141
u = s * s * u + s + 1
146142
u = ldexp2k(u, q)
147143

148-
d < min_exp(T) && (u = T(0))
149144
d > max_exp(T) && (u = T(Inf))
145+
d < min_exp(T) && (u = T(0))
150146

151147
return u
152148
end
153-
end

src/log.jl

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -111,35 +111,34 @@ end
111111
# That being said, since this converges faster when the argument is close to
112112
# 1, we multiply `m` by `2` and subtract 1 for the exponent `e` when `m` is
113113
# less than `sqrt(2)/2`
114+
115+
@inline function log_fast_kernel(x::Float64)
116+
c8d = 0.153487338491425068243146
117+
c7d = 0.152519917006351951593857
118+
c6d = 0.181863266251982985677316
119+
c5d = 0.222221366518767365905163
120+
c4d = 0.285714294746548025383248
121+
c3d = 0.399999999950799600689777
122+
c2d = 0.6666666666667778740063
123+
c1d = 2.0
124+
return @horner x c1d c2d c3d c4d c5d c6d c7d c8d
125+
end
126+
127+
@inline function log_fast_kernel(x::Float32)
128+
c5f = 0.2392828464508056640625f0
129+
c4f = 0.28518211841583251953125f0
130+
c3f = 0.400005877017974853515625f0
131+
c2f = 0.666666686534881591796875f0
132+
c1f = 2f0
133+
return @horner x c1f c2f c3f c4f c5f
134+
end
135+
114136
"""
115137
log_fast(x)
116138
117139
Compute the natural logarithm of `x`. The inverse of the natural logarithm is
118140
the natural expoenential function `exp(x)`
119141
"""
120-
function log_fast end
121-
122-
let
123-
global log_fast
124-
125-
c8d = 0.153487338491425068243146
126-
c7d = 0.152519917006351951593857
127-
c6d = 0.181863266251982985677316
128-
c5d = 0.222221366518767365905163
129-
c4d = 0.285714294746548025383248
130-
c3d = 0.399999999950799600689777
131-
c2d = 0.6666666666667778740063
132-
c1d = 2.0
133-
134-
c5f = 0.2392828464508056640625f0
135-
c4f = 0.28518211841583251953125f0
136-
c3f = 0.400005877017974853515625f0
137-
c2f = 0.666666686534881591796875f0
138-
c1f = 2f0
139-
140-
global @inline log_fast_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d
141-
global @inline log_fast_kernel(x::Float32) = @horner x c1f c2f c3f c4f c5f
142-
143142
function log_fast(d::T) where {T<:Union{Float32,Float64}}
144143
o = d < realmin(T)
145144
o && (d *= T(Int64(1) << 32) * T(Int64(1) << 32))
@@ -161,4 +160,3 @@ function log_fast(d::T) where {T<:Union{Float32,Float64}}
161160

162161
return x
163162
end
164-
end

src/priv.jl

Lines changed: 65 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -170,32 +170,31 @@ end
170170
end
171171

172172

173-
let
174-
global expk
175-
global expk2
176-
177-
c10d = 2.51069683420950419527139e-08
178-
c9d = 2.76286166770270649116855e-07
179-
c8d = 2.75572496725023574143864e-06
180-
c7d = 2.48014973989819794114153e-05
181-
c6d = 0.000198412698809069797676111
182-
c5d = 0.0013888888939977128960529
183-
c4d = 0.00833333333332371417601081
184-
c3d = 0.0416666666665409524128449
185-
c2d = 0.166666666666666740681535
186-
c1d = 0.500000000000000999200722
187-
188-
c5f = 0.00136324646882712841033936f0
189-
c4f = 0.00836596917361021041870117f0
190-
c3f = 0.0416710823774337768554688f0
191-
c2f = 0.166665524244308471679688f0
192-
c1f = 0.499999850988388061523438f0
193-
194-
global @inline expk_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d
195-
global @inline expk_kernel(x::Float32) = @horner x c1f c2f c3f c4f c5f
196-
197-
global under_expk(::Type{Float64}) = -1000.0
198-
global under_expk(::Type{Float32}) = -104f0
173+
const under_expk(::Type{Float64}) = -1000.0
174+
const under_expk(::Type{Float32}) = -104f0
175+
176+
@inline function expk_kernel(x::Float64)
177+
c10d = 2.51069683420950419527139e-08
178+
c9d = 2.76286166770270649116855e-07
179+
c8d = 2.75572496725023574143864e-06
180+
c7d = 2.48014973989819794114153e-05
181+
c6d = 0.000198412698809069797676111
182+
c5d = 0.0013888888939977128960529
183+
c4d = 0.00833333333332371417601081
184+
c3d = 0.0416666666665409524128449
185+
c2d = 0.166666666666666740681535
186+
c1d = 0.500000000000000999200722
187+
return @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d
188+
end
189+
190+
@inline function expk_kernel(x::Float32)
191+
c5f = 0.00136324646882712841033936f0
192+
c4f = 0.00836596917361021041870117f0
193+
c3f = 0.0416710823774337768554688f0
194+
c2f = 0.166665524244308471679688f0
195+
c1f = 0.499999850988388061523438f0
196+
return @horner x c1f c2f c3f c4f c5f
197+
end
199198

200199
@inline function expk(d::Double{T}) where {T<:Union{Float32,Float64}}
201200
q = round(T(d) * T(MLN2E))
@@ -232,28 +231,28 @@ end
232231
t = dadd(T(1.0), t)
233232
return scale(scale(t, T(2.0)), pow2i(T, unsafe_trunc(Int, q - 1)))
234233
end
235-
end
236234

237235

238-
let
239-
global logk2
240-
241-
c8d = 0.13860436390467167910856
242-
c7d = 0.131699838841615374240845
243-
c6d = 0.153914168346271945653214
244-
c5d = 0.181816523941564611721589
245-
c4d = 0.22222224632662035403996
246-
c3d = 0.285714285511134091777308
247-
c2d = 0.400000000000914013309483
248-
c1d = 0.666666666666664853302393
249236

250-
c4f = 0.240320354700088500976562f0
251-
c3f = 0.285112679004669189453125f0
252-
c2f = 0.400007992982864379882812f0
253-
c1f = 0.666666686534881591796875f0
237+
@inline function logk2_kernel(x::Float64)
238+
c8d = 0.13860436390467167910856
239+
c7d = 0.131699838841615374240845
240+
c6d = 0.153914168346271945653214
241+
c5d = 0.181816523941564611721589
242+
c4d = 0.22222224632662035403996
243+
c3d = 0.285714285511134091777308
244+
c2d = 0.400000000000914013309483
245+
c1d = 0.666666666666664853302393
246+
return @horner x c1d c2d c3d c4d c5d c6d c7d c8d
247+
end
254248

255-
global @inline logk2_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d
256-
global @inline logk2_kernel(x::Float32) = @horner x c1f c2f c3f c4f
249+
@inline function logk2_kernel(x::Float32)
250+
c4f = 0.240320354700088500976562f0
251+
c3f = 0.285112679004669189453125f0
252+
c2f = 0.400007992982864379882812f0
253+
c1f = 0.666666686534881591796875f0
254+
return @horner x c1f c2f c3f c4f
255+
end
257256

258257
@inline function logk2(d::Double{T}) where {T<:Union{Float32,Float64}}
259258
e = ilogbk(d.hi * T(1.0/0.75))
@@ -266,28 +265,30 @@ global @inline logk2_kernel(x::Float32) = @horner x c1f c2f c3f c4f
266265

267266
dadd(dmul(MDLN2(T), T(e)), dadd(scale(x, T(2.0)), dmul(dmul(x2, x), t)))
268267
end
268+
269+
270+
271+
@inline function logk_kernel(x::Double{Float64})
272+
c10d = 0.116255524079935043668677
273+
c9d = 0.103239680901072952701192
274+
c8d = 0.117754809412463995466069
275+
c7d = 0.13332981086846273921509
276+
c6d = 0.153846227114512262845736
277+
c5d = 0.181818180850050775676507
278+
c4d = 0.222222222230083560345903
279+
c3d = 0.285714285714249172087875
280+
c2d = 0.400000000000000077715612
281+
c1dd = Double(0.666666666666666629659233, 3.80554962542412056336616e-17)
282+
dadd2(c1dd, dmul(x, @horner x.hi c2d c3d c4d c5d c6d c7d c8d c9d c10d))
269283
end
270284

271-
let
272-
global logk
273-
c10d = 0.116255524079935043668677
274-
c9d = 0.103239680901072952701192
275-
c8d = 0.117754809412463995466069
276-
c7d = 0.13332981086846273921509
277-
c6d = 0.153846227114512262845736
278-
c5d = 0.181818180850050775676507
279-
c4d = 0.222222222230083560345903
280-
c3d = 0.285714285714249172087875
281-
c2d = 0.400000000000000077715612
282-
c1dd = Double(0.666666666666666629659233, 3.80554962542412056336616e-17);
283-
284-
c4f = 0.240320354700088500976562f0
285-
c3f = 0.285112679004669189453125f0
286-
c2f = 0.400007992982864379882812f0
287-
c1fd = Double(0.66666662693023681640625f0, 3.69183861259614332084311f-9)
288-
289-
global @inline logk_kernel(x::Double{Float64}) = dadd2(c1dd, dmul(x, @horner x.hi c2d c3d c4d c5d c6d c7d c8d c9d c10d))
290-
global @inline logk_kernel(x::Double{Float32}) = dadd2(c1fd, dmul(x, @horner x.hi c2f c3f c4f))
285+
@inline function logk_kernel(x::Double{Float32})
286+
c4f = 0.240320354700088500976562f0
287+
c3f = 0.285112679004669189453125f0
288+
c2f = 0.400007992982864379882812f0
289+
c1fd = Double(0.66666662693023681640625f0, 3.69183861259614332084311f-9)
290+
dadd2(c1fd, dmul(x, @horner x.hi c2f c3f c4f))
291+
end
291292

292293
@inline function logk(d::T) where {T<:Union{Float32,Float64}}
293294
o = d < realmin(T)
@@ -305,6 +306,3 @@ global @inline logk_kernel(x::Double{Float32}) = dadd2(c1fd, dmul(x, @horner x.h
305306

306307
dadd(dmul(MDLN2(T), T(e)), dadd(scale(x, T(2.0)), dmul(dmul(x2, x), t)))
307308
end
308-
309-
310-
end

0 commit comments

Comments
 (0)