Skip to content

Commit 7535a82

Browse files
authored
Merge pull request #447 from lu1and10/cufinufft-modeord
cufinufft modeord
2 parents 9509b88 + a404a80 commit 7535a82

File tree

11 files changed

+251
-91
lines changed

11 files changed

+251
-91
lines changed

CHANGELOG

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
List of features / changes made / release notes, in reverse chronological order.
22
If not stated, FINUFFT is assumed (cuFINUFFT <=1.3 is listed separately).
33

4+
* cufinufft now supports modeord(type 1,2 only): 0 CMCL-style increasing mode
5+
order, 1 FFT-style mode order.
46
* New doc page: migration guide from NFFT3 (2d1 case only).
57
* New foldrescale, removes [-3pi,3pi) restriction on NU points, and slight
68
speedup at large tols. Deprecates both opts.chkbnds and error code

include/cufinufft/cudeconvolve.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,29 +5,29 @@
55

66
namespace cufinufft {
77
namespace deconvolve {
8-
template <typename T>
8+
template <typename T, int modeord>
99
__global__ void deconvolve_1d(int ms, int nf1, int fw_width, cuda_complex<T> *fw, cuda_complex<T> *fk, T *fwkerhalf1);
10-
template <typename T>
10+
template <typename T, int modeord>
1111
__global__ void amplify_1d(int ms, int nf1, int fw_width, cuda_complex<T> *fw, cuda_complex<T> *fk, T *fwkerhalf2);
12-
template <typename T>
12+
template <typename T, int modeord>
1313
__global__ void deconvolve_2d(int ms, int mt, int nf1, int nf2, int fw_width, cuda_complex<T> *fw, cuda_complex<T> *fk,
1414
T *fwkerhalf1, T *fwkerhalf2);
15-
template <typename T>
15+
template <typename T, int modeord>
1616
__global__ void amplify_2d(int ms, int mt, int nf1, int nf2, int fw_width, cuda_complex<T> *fw, cuda_complex<T> *fk,
1717
T *fwkerhalf1, T *fwkerhalf2);
1818

19-
template <typename T>
19+
template <typename T, int modeord>
2020
__global__ void deconvolve_3d(int ms, int mt, int mu, int nf1, int nf2, int nf3, int fw_width, cuda_complex<T> *fw,
2121
cuda_complex<T> *fk, T *fwkerhalf1, T *fwkerhalf2, T *fwkerhalf3);
22-
template <typename T>
22+
template <typename T, int modeord>
2323
__global__ void amplify_3d(int ms, int mt, int mu, int nf1, int nf2, int nf3, int fw_width, cuda_complex<T> *fw,
2424
cuda_complex<T> *fk, T *fwkerhalf1, T *fwkerhalf2, T *fwkerhalf3);
2525

26-
template <typename T>
26+
template <typename T, int modeord>
2727
int cudeconvolve1d(cufinufft_plan_t<T> *d_mem, int blksize);
28-
template <typename T>
28+
template <typename T, int modeord>
2929
int cudeconvolve2d(cufinufft_plan_t<T> *d_mem, int blksize);
30-
template <typename T>
30+
template <typename T, int modeord>
3131
int cudeconvolve3d(cufinufft_plan_t<T> *d_mem, int blksize);
3232
} // namespace deconvolve
3333
} // namespace cufinufft

include/cufinufft_opts.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ typedef struct cufinufft_opts { // see cufinufft_default_opts() for defaults
2626
int gpu_device_id;
2727

2828
void *gpu_stream;
29+
30+
int modeord; // (type 1,2 only): 0 CMCL-style increasing mode order
31+
// 1 FFT-style mode order
2932
} cufinufft_opts;
3033

3134
#endif

python/cufinufft/cufinufft/_cufinufft.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ def _get_NufftOpts():
6363
('gpu_spreadinterponly', c_int),
6464
('gpu_maxbatchsize', c_int),
6565
('gpu_device_id', c_int),
66-
('gpu_stream', c_void_p)
66+
('gpu_stream', c_void_p),
67+
('modeord', c_int)
6768
]
6869
return fields
6970

python/cufinufft/cufinufft/_plan.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,9 @@ class Plan:
6565
memory), ``gpu_sort`` (for ``gpu_method == 1``, 0: no
6666
sort, 1: sort), ``gpu_kerevalmeth`` (0: direct
6767
exp(sqrt), Horner evaluation), ``gpu_device_id`` (GPU
68-
ID), and ``gpu_stream`` (CUDA stream pointer).
68+
ID), ``gpu_stream`` (CUDA stream pointer) and
69+
``modeord`` (0: CMCL-compatible mode ordering,
70+
1: FFT-style mode ordering).
6971
"""
7072

7173
def __init__(self, nufft_type, n_modes, n_trans=1, eps=1e-6, isign=None,

python/cufinufft/tests/test_basic.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,27 +9,29 @@
99
# NOTE: Tests below fail for tolerance 1e-4 (error executing plan).
1010

1111
DTYPES = [np.float32, np.float64]
12-
SHAPES = [(16,), (16, 16), (16, 16, 16)]
12+
SHAPES = [(16,), (16, 16), (16, 16, 16), (19,), (17, 19), (17, 19, 24)]
1313
MS = [256, 1024, 4096]
1414
TOLS = [1e-3, 1e-6]
1515
OUTPUT_ARGS = [False, True]
1616
CONTIGUOUS = [False, True]
17+
MODEORDS = [0, 1]
1718

1819

1920
@pytest.mark.parametrize("dtype", DTYPES)
2021
@pytest.mark.parametrize("shape", SHAPES)
2122
@pytest.mark.parametrize("M", MS)
2223
@pytest.mark.parametrize("tol", TOLS)
2324
@pytest.mark.parametrize("output_arg", OUTPUT_ARGS)
24-
def test_type1(to_gpu, to_cpu, dtype, shape, M, tol, output_arg):
25+
@pytest.mark.parametrize("modeord", MODEORDS)
26+
def test_type1(to_gpu, to_cpu, dtype, shape, M, tol, output_arg, modeord):
2527
complex_dtype = utils._complex_dtype(dtype)
2628

2729
k, c = utils.type1_problem(dtype, shape, M)
2830

2931
k_gpu = to_gpu(k)
3032
c_gpu = to_gpu(c)
3133

32-
plan = Plan(1, shape, eps=tol, dtype=complex_dtype)
34+
plan = Plan(1, shape, eps=tol, dtype=complex_dtype, modeord=modeord)
3335

3436
# Since k_gpu is an array of shape (dim, M), this will expand to
3537
# plan.setpts(k_gpu[0], ..., k_gpu[dim]), allowing us to handle all
@@ -43,6 +45,8 @@ def test_type1(to_gpu, to_cpu, dtype, shape, M, tol, output_arg):
4345
fk_gpu = plan.execute(c_gpu)
4446

4547
fk = to_cpu(fk_gpu)
48+
if modeord == 1:
49+
fk = np.fft.fftshift(fk)
4650

4751
utils.verify_type1(k, c, fk, tol)
4852

@@ -53,12 +57,13 @@ def test_type1(to_gpu, to_cpu, dtype, shape, M, tol, output_arg):
5357
@pytest.mark.parametrize("tol", TOLS)
5458
@pytest.mark.parametrize("output_arg", OUTPUT_ARGS)
5559
@pytest.mark.parametrize("contiguous", CONTIGUOUS)
56-
def test_type2(to_gpu, to_cpu, dtype, shape, M, tol, output_arg, contiguous):
60+
@pytest.mark.parametrize("modeord", MODEORDS)
61+
def test_type2(to_gpu, to_cpu, dtype, shape, M, tol, output_arg, contiguous, modeord):
5762
complex_dtype = utils._complex_dtype(dtype)
5863

5964
k, fk = utils.type2_problem(dtype, shape, M)
6065

61-
plan = Plan(2, shape, eps=tol, dtype=complex_dtype)
66+
plan = Plan(2, shape, eps=tol, dtype=complex_dtype, modeord=modeord)
6267

6368
check_result = True
6469

@@ -81,7 +86,12 @@ def _execute(*args, **kwargs):
8186
return plan.execute(*args, **kwargs)
8287

8388
k_gpu = to_gpu(k)
84-
fk_gpu = to_gpu(fk)
89+
90+
if modeord == 1:
91+
_fk = np.fft.ifftshift(fk)
92+
else:
93+
_fk = fk
94+
fk_gpu = to_gpu(_fk)
8595

8696
plan.setpts(*k_gpu)
8797

src/cuda/1d/cufinufft1d.cu

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,13 @@ int cufinufft1d1_exec(cuda_complex<T> *d_c, cuda_complex<T> *d_fk, cufinufft_pla
5959
return FINUFFT_ERR_CUDA_FAILURE;
6060

6161
// Step 3: deconvolve and shuffle
62-
if ((ier = cudeconvolve1d<T>(d_plan, blksize)))
63-
return ier;
62+
if (d_plan->opts.modeord == 0) {
63+
if ((ier = cudeconvolve1d<T, 0>(d_plan, blksize)))
64+
return ier;
65+
} else {
66+
if ((ier = cudeconvolve1d<T, 1>(d_plan, blksize)))
67+
return ier;
68+
}
6469
}
6570

6671
return 0;
@@ -95,8 +100,13 @@ int cufinufft1d2_exec(cuda_complex<T> *d_c, cuda_complex<T> *d_fk, cufinufft_pla
95100
d_plan->fk = d_fkstart;
96101

97102
// Step 1: amplify Fourier coeffs fk and copy into upsampled array fw
98-
if ((ier = cudeconvolve1d<T>(d_plan, blksize)))
99-
return ier;
103+
if (d_plan->opts.modeord == 0) {
104+
if ((ier = cudeconvolve1d<T, 0>(d_plan, blksize)))
105+
return ier;
106+
} else {
107+
if ((ier = cudeconvolve1d<T, 1>(d_plan, blksize)))
108+
return ier;
109+
}
100110

101111
// Step 2: FFT
102112
cufftResult cufft_status = cufft_ex(d_plan->fftplan, d_plan->fw, d_plan->fw, d_plan->iflag);

src/cuda/2d/cufinufft2d.cu

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,13 @@ int cufinufft2d1_exec(cuda_complex<T> *d_c, cuda_complex<T> *d_fk, cufinufft_pla
5959
return FINUFFT_ERR_CUDA_FAILURE;
6060

6161
// Step 3: deconvolve and shuffle
62-
if ((ier = cudeconvolve2d<T>(d_plan, blksize)))
63-
return ier;
62+
if (d_plan->opts.modeord == 0) {
63+
if ((ier = cudeconvolve2d<T, 0>(d_plan, blksize)))
64+
return ier;
65+
} else {
66+
if ((ier = cudeconvolve2d<T, 1>(d_plan, blksize)))
67+
return ier;
68+
}
6469
}
6570

6671
return 0;
@@ -95,8 +100,13 @@ int cufinufft2d2_exec(cuda_complex<T> *d_c, cuda_complex<T> *d_fk, cufinufft_pla
95100
d_plan->fk = d_fkstart;
96101

97102
// Step 1: amplify Fourier coeffs fk and copy into upsampled array fw
98-
if ((ier = cudeconvolve2d<T>(d_plan, blksize)))
99-
return ier;
103+
if (d_plan->opts.modeord == 0) {
104+
if ((ier = cudeconvolve2d<T, 0>(d_plan, blksize)))
105+
return ier;
106+
} else {
107+
if ((ier = cudeconvolve2d<T, 1>(d_plan, blksize)))
108+
return ier;
109+
}
100110

101111
// Step 2: FFT
102112
cufftResult cufft_status = cufft_ex(d_plan->fftplan, d_plan->fw, d_plan->fw, d_plan->iflag);

src/cuda/3d/cufinufft3d.cu

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,13 @@ int cufinufft3d1_exec(cuda_complex<T> *d_c, cuda_complex<T> *d_fk, cufinufft_pla
5757
return FINUFFT_ERR_CUDA_FAILURE;
5858

5959
// Step 3: deconvolve and shuffle
60-
if ((ier = cudeconvolve3d<T>(d_plan, blksize)))
61-
return ier;
60+
if (d_plan->opts.modeord == 0) {
61+
if ((ier = cudeconvolve3d<T, 0>(d_plan, blksize)))
62+
return ier;
63+
} else {
64+
if ((ier = cudeconvolve3d<T, 1>(d_plan, blksize)))
65+
return ier;
66+
}
6267
}
6368

6469
return 0;
@@ -91,8 +96,13 @@ int cufinufft3d2_exec(cuda_complex<T> *d_c, cuda_complex<T> *d_fk, cufinufft_pla
9196
d_plan->fk = d_fkstart;
9297

9398
// Step 1: amplify Fourier coeffs fk and copy into upsampled array fw
94-
if ((ier = cudeconvolve3d<T>(d_plan, blksize)))
95-
return ier;
99+
if (d_plan->opts.modeord == 0) {
100+
if ((ier = cudeconvolve3d<T, 0>(d_plan, blksize)))
101+
return ier;
102+
} else {
103+
if ((ier = cudeconvolve3d<T, 1>(d_plan, blksize)))
104+
return ier;
105+
}
96106

97107
// Step 2: FFT
98108
RETURN_IF_CUDA_ERROR

src/cuda/cufinufft.cu

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,5 +121,7 @@ void cufinufft_default_opts(cufinufft_opts *opts)
121121

122122
// By default, only use device 0
123123
opts->gpu_device_id = 0;
124+
125+
opts->modeord = 0;
124126
}
125127
}

0 commit comments

Comments
 (0)