Skip to content

Commit 94abe4c

Browse files
committed
feat: speedup fourierradon with engine=cuda
1 parent 893ab69 commit 94abe4c

File tree

4 files changed

+43
-10
lines changed

4 files changed

+43
-10
lines changed

pylops/signalprocessing/_fourierradon2d_cuda.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
from cmath import exp
22
from math import pi
33

4+
import cupy as cp
45
from numba import cuda
56

7+
TWO_PI_MINUS = cp.float32(-2.0 * pi)
8+
TWO_PI_PLUS = cp.float32(2.0 * pi)
9+
I = cp.complex64(1j)
10+
611

712
@cuda.jit
813
def _radon_inner_2d_kernel(x, y, f, px, h, flim0, flim1, npx, nh):
@@ -16,7 +21,11 @@ def _radon_inner_2d_kernel(x, y, f, px, h, flim0, flim1, npx, nh):
1621
ih, ifr = cuda.grid(2)
1722
if ih < nh and ifr >= flim0 and ifr <= flim1:
1823
for ipx in range(npx):
19-
y[ih, ifr] += x[ipx, ifr] * exp(-1j * 2 * pi * f[ifr] * px[ipx] * h[ih])
24+
# slow computation of exp(1j * x)
25+
# y[ih, ifr] += x[ipx, ifr] * exp(TWO_PI_MINUS * f[ifr] * px[ipx] * h[ih])
26+
# fast computation of exp(1j * x) - see https://stackoverflow.com/questions/9860711/cucomplex-h-and-exp/9863048#9863048
27+
s, c = cuda.libdevice.sincosf(TWO_PI_MINUS * f[ifr] * px[ipx] * h[ih])
28+
y[ih, ifr] += x[ipx, ifr] * (c + I * s)
2029

2130

2231
@cuda.jit
@@ -31,7 +40,11 @@ def _aradon_inner_2d_kernel(x, y, f, px, h, flim0, flim1, npx, nh):
3140
ipx, ifr = cuda.grid(2)
3241
if ipx < npx and ifr >= flim0 and ifr <= flim1:
3342
for ih in range(nh):
34-
x[ipx, ifr] += y[ih, ifr] * exp(1j * 2 * pi * f[ifr] * px[ipx] * h[ih])
43+
# slow computation of exp(1j * x)
44+
# x[ipx, ifr] += y[ih, ifr] * exp(TWO_PI_I_PLUS * f[ifr] * px[ipx] * h[ih])
45+
# fast computation of exp(1j * x) - see https://stackoverflow.com/questions/9860711/cucomplex-h-and-exp/9863048#9863048
46+
s, c = cuda.libdevice.sincosf(TWO_PI_PLUS * f[ifr] * px[ipx] * h[ih])
47+
x[ipx, ifr] += y[ih, ifr] * (c + I * s)
3548

3649

3750
def _radon_inner_2d_cuda(

pylops/signalprocessing/_fourierradon3d_cuda.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
from cmath import exp
22
from math import pi
33

4+
import cupy as cp
45
from numba import cuda
56

7+
TWO_PI_MINUS = cp.float32(-2.0 * pi)
8+
TWO_PI_PLUS = cp.float32(2.0 * pi)
9+
I = cp.complex64(1j)
10+
611

712
@cuda.jit
813
def _radon_inner_3d_kernel(x, y, f, py, px, hy, hx, flim0, flim1, npy, npx, nhy, nhx):
@@ -17,9 +22,15 @@ def _radon_inner_3d_kernel(x, y, f, py, px, hy, hx, flim0, flim1, npy, npx, nhy,
1722
if ihy < nhy and ihx < nhx and ifr >= flim0 and ifr <= flim1:
1823
for ipy in range(npy):
1924
for ipx in range(npx):
20-
y[ihy, ihx, ifr] += x[ipy, ipx, ifr] * exp(
21-
-1j * 2 * pi * f[ifr] * (py[ipy] * hy[ihy] + px[ipx] * hx[ihx])
25+
# slow computation of exp(1j * x)
26+
# y[ihy, ihx, ifr] += x[ipy, ipx, ifr] * exp(
27+
# TWO_PI_MINUS * f[ifr] * (py[ipy] * hy[ihy] + px[ipx] * hx[ihx])
28+
# )
29+
# fast computation of exp(1j * x) - see https://stackoverflow.com/questions/9860711/cucomplex-h-and-exp/9863048#9863048
30+
s, c = cuda.libdevice.sincosf(
31+
TWO_PI_MINUS * f[ifr] * (py[ipy] * hy[ihy] + px[ipx] * hx[ihx])
2232
)
33+
y[ihy, ihx, ifr] += x[ipy, ipx, ifr] * (c + I * s)
2334

2435

2536
@cuda.jit
@@ -35,9 +46,14 @@ def _aradon_inner_3d_kernel(x, y, f, py, px, hy, hx, flim0, flim1, npy, npx, nhy
3546
if ipy < npy and ipx < npx and ifr >= flim0 and ifr <= flim1:
3647
for ihy in range(nhy):
3748
for ihx in range(nhx):
38-
x[ipy, ipx, ifr] += y[ihy, ihx, ifr] * exp(
39-
1j * 2 * pi * f[ifr] * (py[ipy] * hy[ihy] + px[ipx] * hx[ihx])
49+
# slow computation of exp(1j * x)
50+
# x[ipy, ipx, ifr] += y[ihy, ihx, ifr] * exp(
51+
# TWO_PI_I_PLUS * f[ifr] * (py[ipy] * hy[ihy] + px[ipx] * hx[ihx])
52+
# )
53+
s, c = cuda.libdevice.sincosf(
54+
TWO_PI_PLUS * f[ifr] * (py[ipy] * hy[ihy] + px[ipx] * hx[ihx])
4055
)
56+
x[ipy, ipx, ifr] += y[ihy, ihx, ifr] * (c + I * s)
4157

4258

4359
def _radon_inner_3d_cuda(

pylops/signalprocessing/fourierradon2d.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@
1313
from pylops.utils.typing import DTypeLike, NDArray
1414

1515
jit_message = deps.numba_import("the radon2d module")
16+
cupy_message = deps.cupy_import("the radon2d module")
1617

1718
if jit_message is None:
18-
from ._fourierradon2d_cuda import _aradon_inner_2d_cuda, _radon_inner_2d_cuda
1919
from ._fourierradon2d_numba import _aradon_inner_2d, _radon_inner_2d
20+
if jit_message is None and cupy_message is None:
21+
from ._fourierradon2d_cuda import _aradon_inner_2d_cuda, _radon_inner_2d_cuda
2022

2123
logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.WARNING)
2224

@@ -167,7 +169,7 @@ def __init__(
167169
self._register_multiplications(engine)
168170

169171
def _register_multiplications(self, engine: str) -> None:
170-
if engine == "numba" and jit_message is None:
172+
if engine == "numba":
171173
self._matvec = self._matvec_numba
172174
self._rmatvec = self._rmatvec_numba
173175
elif engine == "cuda":

pylops/signalprocessing/fourierradon3d.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@
1313
from pylops.utils.typing import DTypeLike, NDArray
1414

1515
jit_message = deps.numba_import("the radon2d module")
16+
cupy_message = deps.cupy_import("the radon2d module")
1617

1718
if jit_message is None:
18-
from ._fourierradon3d_cuda import _aradon_inner_3d_cuda, _radon_inner_3d_cuda
1919
from ._fourierradon3d_numba import _aradon_inner_3d, _radon_inner_3d
20+
if jit_message is None and cupy_message is None:
21+
from ._fourierradon3d_cuda import _aradon_inner_3d_cuda, _radon_inner_3d_cuda
2022

2123
logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.WARNING)
2224

@@ -192,7 +194,7 @@ def __init__(
192194
self._register_multiplications(engine)
193195

194196
def _register_multiplications(self, engine: str) -> None:
195-
if engine == "numba" and jit_message is None:
197+
if engine == "numba":
196198
self._matvec = self._matvec_numba
197199
self._rmatvec = self._rmatvec_numba
198200
elif engine == "cuda":

0 commit comments

Comments
 (0)