Skip to content

Commit 9ed06e3

Browse files
authored
Merge pull request #627 from mrava87/feat-fourierradonnumbaimprov
feat: speedup fourierradon with engine=cuda
2 parents 0f526c1 + 02cfddd commit 9ed06e3

File tree

4 files changed

+43
-12
lines changed

4 files changed

+43
-12
lines changed

pylops/signalprocessing/_fourierradon2d_cuda.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
1-
from cmath import exp
21
from math import pi
32

3+
import cupy as cp
44
from numba import cuda
55

6+
TWO_PI_MINUS = cp.float32(-2.0 * pi)
7+
TWO_PI_PLUS = cp.float32(2.0 * pi)
8+
IMG = cp.complex64(1j)
9+
610

711
@cuda.jit
812
def _radon_inner_2d_kernel(x, y, f, px, h, flim0, flim1, npx, nh):
@@ -16,7 +20,11 @@ def _radon_inner_2d_kernel(x, y, f, px, h, flim0, flim1, npx, nh):
1620
ih, ifr = cuda.grid(2)
1721
if ih < nh and ifr >= flim0 and ifr <= flim1:
1822
for ipx in range(npx):
19-
y[ih, ifr] += x[ipx, ifr] * exp(-1j * 2 * pi * f[ifr] * px[ipx] * h[ih])
23+
# slow computation of exp(1j * x)
24+
# y[ih, ifr] += x[ipx, ifr] * exp(TWO_PI_MINUS * f[ifr] * px[ipx] * h[ih])
25+
# fast computation of exp(1j * x) - see https://stackoverflow.com/questions/9860711/cucomplex-h-and-exp/9863048#9863048
26+
s, c = cuda.libdevice.sincosf(TWO_PI_MINUS * f[ifr] * px[ipx] * h[ih])
27+
y[ih, ifr] += x[ipx, ifr] * (c + IMG * s)
2028

2129

2230
@cuda.jit
@@ -31,7 +39,11 @@ def _aradon_inner_2d_kernel(x, y, f, px, h, flim0, flim1, npx, nh):
3139
ipx, ifr = cuda.grid(2)
3240
if ipx < npx and ifr >= flim0 and ifr <= flim1:
3341
for ih in range(nh):
34-
x[ipx, ifr] += y[ih, ifr] * exp(1j * 2 * pi * f[ifr] * px[ipx] * h[ih])
42+
# slow computation of exp(1j * x)
43+
# x[ipx, ifr] += y[ih, ifr] * exp(TWO_PI_I_PLUS * f[ifr] * px[ipx] * h[ih])
44+
# fast computation of exp(1j * x) - see https://stackoverflow.com/questions/9860711/cucomplex-h-and-exp/9863048#9863048
45+
s, c = cuda.libdevice.sincosf(TWO_PI_PLUS * f[ifr] * px[ipx] * h[ih])
46+
x[ipx, ifr] += y[ih, ifr] * (c + IMG * s)
3547

3648

3749
def _radon_inner_2d_cuda(

pylops/signalprocessing/_fourierradon3d_cuda.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
1-
from cmath import exp
21
from math import pi
32

3+
import cupy as cp
44
from numba import cuda
55

6+
TWO_PI_MINUS = cp.float32(-2.0 * pi)
7+
TWO_PI_PLUS = cp.float32(2.0 * pi)
8+
IMG = cp.complex64(1j)
9+
610

711
@cuda.jit
812
def _radon_inner_3d_kernel(x, y, f, py, px, hy, hx, flim0, flim1, npy, npx, nhy, nhx):
@@ -17,9 +21,15 @@ def _radon_inner_3d_kernel(x, y, f, py, px, hy, hx, flim0, flim1, npy, npx, nhy,
1721
if ihy < nhy and ihx < nhx and ifr >= flim0 and ifr <= flim1:
1822
for ipy in range(npy):
1923
for ipx in range(npx):
20-
y[ihy, ihx, ifr] += x[ipy, ipx, ifr] * exp(
21-
-1j * 2 * pi * f[ifr] * (py[ipy] * hy[ihy] + px[ipx] * hx[ihx])
24+
# slow computation of exp(1j * x)
25+
# y[ihy, ihx, ifr] += x[ipy, ipx, ifr] * exp(
26+
# TWO_PI_MINUS * f[ifr] * (py[ipy] * hy[ihy] + px[ipx] * hx[ihx])
27+
# )
28+
# fast computation of exp(1j * x) - see https://stackoverflow.com/questions/9860711/cucomplex-h-and-exp/9863048#9863048
29+
s, c = cuda.libdevice.sincosf(
30+
TWO_PI_MINUS * f[ifr] * (py[ipy] * hy[ihy] + px[ipx] * hx[ihx])
2231
)
32+
y[ihy, ihx, ifr] += x[ipy, ipx, ifr] * (c + IMG * s)
2333

2434

2535
@cuda.jit
@@ -35,9 +45,14 @@ def _aradon_inner_3d_kernel(x, y, f, py, px, hy, hx, flim0, flim1, npy, npx, nhy
3545
if ipy < npy and ipx < npx and ifr >= flim0 and ifr <= flim1:
3646
for ihy in range(nhy):
3747
for ihx in range(nhx):
38-
x[ipy, ipx, ifr] += y[ihy, ihx, ifr] * exp(
39-
1j * 2 * pi * f[ifr] * (py[ipy] * hy[ihy] + px[ipx] * hx[ihx])
48+
# slow computation of exp(1j * x)
49+
# x[ipy, ipx, ifr] += y[ihy, ihx, ifr] * exp(
50+
# TWO_PI_I_PLUS * f[ifr] * (py[ipy] * hy[ihy] + px[ipx] * hx[ihx])
51+
# )
52+
s, c = cuda.libdevice.sincosf(
53+
TWO_PI_PLUS * f[ifr] * (py[ipy] * hy[ihy] + px[ipx] * hx[ihx])
4054
)
55+
x[ipy, ipx, ifr] += y[ihy, ihx, ifr] * (c + IMG * s)
4156

4257

4358
def _radon_inner_3d_cuda(

pylops/signalprocessing/fourierradon2d.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@
1313
from pylops.utils.typing import DTypeLike, NDArray
1414

1515
jit_message = deps.numba_import("the radon2d module")
16+
cupy_message = deps.cupy_import("the radon2d module")
1617

1718
if jit_message is None:
18-
from ._fourierradon2d_cuda import _aradon_inner_2d_cuda, _radon_inner_2d_cuda
1919
from ._fourierradon2d_numba import _aradon_inner_2d, _radon_inner_2d
20+
if jit_message is None and cupy_message is None:
21+
from ._fourierradon2d_cuda import _aradon_inner_2d_cuda, _radon_inner_2d_cuda
2022

2123
logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.WARNING)
2224

@@ -167,7 +169,7 @@ def __init__(
167169
self._register_multiplications(engine)
168170

169171
def _register_multiplications(self, engine: str) -> None:
170-
if engine == "numba" and jit_message is None:
172+
if engine == "numba":
171173
self._matvec = self._matvec_numba
172174
self._rmatvec = self._rmatvec_numba
173175
elif engine == "cuda":

pylops/signalprocessing/fourierradon3d.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@
1313
from pylops.utils.typing import DTypeLike, NDArray
1414

1515
jit_message = deps.numba_import("the radon2d module")
16+
cupy_message = deps.cupy_import("the radon2d module")
1617

1718
if jit_message is None:
18-
from ._fourierradon3d_cuda import _aradon_inner_3d_cuda, _radon_inner_3d_cuda
1919
from ._fourierradon3d_numba import _aradon_inner_3d, _radon_inner_3d
20+
if jit_message is None and cupy_message is None:
21+
from ._fourierradon3d_cuda import _aradon_inner_3d_cuda, _radon_inner_3d_cuda
2022

2123
logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.WARNING)
2224

@@ -192,7 +194,7 @@ def __init__(
192194
self._register_multiplications(engine)
193195

194196
def _register_multiplications(self, engine: str) -> None:
195-
if engine == "numba" and jit_message is None:
197+
if engine == "numba":
196198
self._matvec = self._matvec_numba
197199
self._rmatvec = self._rmatvec_numba
198200
elif engine == "cuda":

0 commit comments

Comments
 (0)