Skip to content

Commit 0c4d5e7

Browse files
tgymnichchristiangnrdmaleadt
authored
Add expm1 (#395)
Co-authored-by: Christian Guinard <[email protected]> Co-authored-by: Tim Besard <[email protected]>
1 parent dd1d87f commit 0c4d5e7

File tree

2 files changed

+82
-0
lines changed

2 files changed

+82
-0
lines changed

src/device/intrinsics/math.jl

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,3 +384,80 @@ else
384384
ifelse(abs(b.divisor) == 1, a*b.divisor, (signbit(x) + (x >> b.shift)) % Int64)
385385
end
386386
end
387+
388+
# From: https://forums.developer.nvidia.com/t/a-faster-and-more-accurate-implementation-of-expm1f/48085/2
389+
# Original license copied below:
390+
# Copyright (c) 2015-2023 Norbert Juffa
391+
# All rights reserved.
392+
#
393+
# Redistribution and use in source and binary forms, with or without
394+
# modification, are permitted provided that the following conditions
395+
# are met:
396+
#
397+
# 1. Redistributions of source code must retain the above copyright
398+
# notice, this list of conditions and the following disclaimer.
399+
#
400+
# 2. Redistributions in binary form must reproduce the above copyright
401+
# notice, this list of conditions and the following disclaimer in the
402+
# documentation and/or other materials provided with the distribution.
403+
#
404+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
405+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
406+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
407+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
408+
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
409+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
410+
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
411+
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
412+
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
413+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
414+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
415+
416+
@device_override function Base.expm1(a::Float32)
417+
# exp(a) = 2**i * exp(f); i = rintf (a / log(2))
418+
j = fma(1.442695f0, a, 12582912.0f0)
419+
j = j - 12582912.0f0
420+
i = unsafe_trunc(Int32, j)
421+
f = fma(j, -6.93145752f-1, a) # log_2_hi
422+
f = fma(j, -1.42860677f-6, f) # log_2_lo
423+
424+
# approximate r = exp(f)-1 on interval [-log(2)/2, +log(2)/2]
425+
s = f * f
426+
if a == 0.0f0
427+
s = a # ensure -0 is passed through
428+
end
429+
430+
# err = 0.997458 ulp1 = 11094007
431+
r = fma(1.98423862f-4, f, 1.39347673f-3)
432+
t = fma(8.33342969f-3, f, 4.16667424f-2)
433+
r = fma(r, s, t)
434+
r = fma(r, f, 1.66666701f-1)
435+
r = fma(r, f, 4.99999970f-1)
436+
# if i == 0, expm1(a) = z
437+
# if i == 1, expm1(a) = 2*(r*(f*f)+f+0.5)
438+
# if (i < 0) || (i > 1) expm1(a) = 2*((r*(f*f)+f)*t-0.5+t)
439+
u = (j == 1) ? (f + 0.5f0) : f
440+
v = fma(r, s, u)
441+
s = 0.5f0
442+
t = ldexp(s, i)
443+
y = t - s
444+
x = (t - y) - s # double-float canonicalization of difference
445+
r = fma(v, t, x) + y
446+
r = r + r
447+
448+
if j == 0
449+
r = v
450+
end
451+
452+
if j == 1
453+
r = v + v
454+
end
455+
456+
# handle severe overflow and underflow
457+
if abs(a - 1.0f0) > 88.0f0
458+
r = 2^a
459+
r = fma(r, r, -1.0f0)
460+
end
461+
462+
return r
463+
end

test/device/intrinsics.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,11 @@ end
162162
bufferF = MtlArray(f)
163163
vecF = Array(SpecialFunctions.erfinv.(bufferF))
164164
@test vecF SpecialFunctions.erfinv.(f)
165+
166+
f = collect(LinRange(nextfloat(-88f0), 88f0, 100))
167+
bufferF = MtlArray(f)
168+
vecF = Array(expm1.(bufferF))
169+
@test vecF expm1.(f)
165170
end
166171

167172
############################################################################################

0 commit comments

Comments
 (0)