Skip to content

Commit 5188ada

Browse files
committed
cufinufft with modeord: 0 CMCL-style, 1 FFT-style
1 parent cc8629f commit 5188ada

File tree

8 files changed

+82
-40
lines changed

8 files changed

+82
-40
lines changed

CHANGELOG

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
List of features / changes made / release notes, in reverse chronological order.
22
If not stated, FINUFFT is assumed (cuFINUFFT <=1.3 is listed separately).
33

4+
* cufinufft now supports modeord(type 1,2 only): 0 CMCL-style increasing mode
5+
order, 1 FFT-style mode order.
46
* CPU plan stage prevents now caps # threads at omp_get_max_threads (being 1
57
for single-thread build); warns if this cap was activated (PR 431)
68
* new docs troubleshooting accuracy limitations due to condition number of the

include/cufinufft/cudeconvolve.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,22 +6,22 @@
66
namespace cufinufft {
77
namespace deconvolve {
88
template <typename T>
9-
__global__ void deconvolve_1d(int ms, int nf1, int fw_width, cuda_complex<T> *fw, cuda_complex<T> *fk, T *fwkerhalf1);
9+
__global__ void deconvolve_1d(int ms, int nf1, int fw_width, cuda_complex<T> *fw, cuda_complex<T> *fk, T *fwkerhalf1, int modeord);
1010
template <typename T>
11-
__global__ void amplify_1d(int ms, int nf1, int fw_width, cuda_complex<T> *fw, cuda_complex<T> *fk, T *fwkerhalf2);
11+
__global__ void amplify_1d(int ms, int nf1, int fw_width, cuda_complex<T> *fw, cuda_complex<T> *fk, T *fwkerhalf2, int modeord);
1212
template <typename T>
1313
__global__ void deconvolve_2d(int ms, int mt, int nf1, int nf2, int fw_width, cuda_complex<T> *fw, cuda_complex<T> *fk,
14-
T *fwkerhalf1, T *fwkerhalf2);
14+
T *fwkerhalf1, T *fwkerhalf2, int modeord);
1515
template <typename T>
1616
__global__ void amplify_2d(int ms, int mt, int nf1, int nf2, int fw_width, cuda_complex<T> *fw, cuda_complex<T> *fk,
17-
T *fwkerhalf1, T *fwkerhalf2);
17+
T *fwkerhalf1, T *fwkerhalf2, int modeord);
1818

1919
template <typename T>
2020
__global__ void deconvolve_3d(int ms, int mt, int mu, int nf1, int nf2, int nf3, int fw_width, cuda_complex<T> *fw,
21-
cuda_complex<T> *fk, T *fwkerhalf1, T *fwkerhalf2, T *fwkerhalf3);
21+
cuda_complex<T> *fk, T *fwkerhalf1, T *fwkerhalf2, T *fwkerhalf3, int modeord);
2222
template <typename T>
2323
__global__ void amplify_3d(int ms, int mt, int mu, int nf1, int nf2, int nf3, int fw_width, cuda_complex<T> *fw,
24-
cuda_complex<T> *fk, T *fwkerhalf1, T *fwkerhalf2, T *fwkerhalf3);
24+
cuda_complex<T> *fk, T *fwkerhalf1, T *fwkerhalf2, T *fwkerhalf3, int modeord);
2525

2626
template <typename T>
2727
int cudeconvolve1d(cufinufft_plan_t<T> *d_mem, int blksize);

include/cufinufft_opts.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ typedef struct cufinufft_opts { // see cufinufft_default_opts() for defaults
2626
int gpu_device_id;
2727

2828
void *gpu_stream;
29+
30+
int modeord; // (type 1,2 only): 0 CMCL-style increasing mode order
31+
// 1 FFT-style mode order
2932
} cufinufft_opts;
3033

3134
#endif

python/cufinufft/cufinufft/_cufinufft.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ def _get_NufftOpts():
6363
('gpu_spreadinterponly', c_int),
6464
('gpu_maxbatchsize', c_int),
6565
('gpu_device_id', c_int),
66-
('gpu_stream', c_void_p)
66+
('gpu_stream', c_void_p),
67+
('modeord', c_int)
6768
]
6869
return fields
6970

python/cufinufft/cufinufft/_plan.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,9 @@ class Plan:
6565
memory), ``gpu_sort`` (for ``gpu_method == 1``, 0: no
6666
sort, 1: sort), ``gpu_kerevalmeth`` (0: direct
6767
exp(sqrt), Horner evaluation), ``gpu_device_id`` (GPU
68-
ID), and ``gpu_stream`` (CUDA stream pointer).
68+
ID), ``gpu_stream`` (CUDA stream pointer) and
69+
``modeord`` (0: CMCL-compatible mode ordering,
70+
1: FFT-style mode ordering).
6971
"""
7072

7173
def __init__(self, nufft_type, n_modes, n_trans=1, eps=1e-6, isign=None,

python/cufinufft/tests/test_basic.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
# NOTE: Tests below fail for tolerance 1e-4 (error executing plan).
1010

1111
DTYPES = [np.float32, np.float64]
12-
SHAPES = [(16,), (16, 16), (16, 16, 16)]
12+
SHAPES = [(16,), (16, 16), (16, 16, 16), (19,), (17, 19), (17, 19, 24)]
1313
MS = [256, 1024, 4096]
1414
TOLS = [1e-3, 1e-6]
1515
OUTPUT_ARGS = [False, True]
@@ -47,6 +47,38 @@ def test_type1(to_gpu, to_cpu, dtype, shape, M, tol, output_arg):
4747
utils.verify_type1(k, c, fk, tol)
4848

4949

50+
@pytest.mark.parametrize("dtype", DTYPES)
51+
@pytest.mark.parametrize("shape", SHAPES)
52+
@pytest.mark.parametrize("M", MS)
53+
@pytest.mark.parametrize("tol", TOLS)
54+
@pytest.mark.parametrize("output_arg", OUTPUT_ARGS)
55+
def test_type1_modeord(to_gpu, to_cpu, dtype, shape, M, tol, output_arg):
56+
complex_dtype = utils._complex_dtype(dtype)
57+
58+
k, c = utils.type1_problem(dtype, shape, M)
59+
60+
k_gpu = to_gpu(k)
61+
c_gpu = to_gpu(c)
62+
63+
plan = Plan(1, shape, eps=tol, dtype=complex_dtype, modeord=1)
64+
65+
# Since k_gpu is an array of shape (dim, M), this will expand to
66+
# plan.setpts(k_gpu[0], ..., k_gpu[dim]), allowing us to handle all
67+
# dimensions with the same call.
68+
plan.setpts(*k_gpu)
69+
70+
if output_arg:
71+
fk_gpu = _compat.array_empty_like(c_gpu, shape, dtype=complex_dtype)
72+
plan.execute(c_gpu, out=fk_gpu)
73+
else:
74+
fk_gpu = plan.execute(c_gpu)
75+
76+
fk = to_cpu(fk_gpu)
77+
fk = np.fft.fftshift(fk)
78+
79+
utils.verify_type1(k, c, fk, tol)
80+
81+
5082
@pytest.mark.parametrize("dtype", DTYPES)
5183
@pytest.mark.parametrize("shape", SHAPES)
5284
@pytest.mark.parametrize("M", MS)

src/cuda/cufinufft.cu

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,5 +121,7 @@ void cufinufft_default_opts(cufinufft_opts *opts)
121121

122122
// By default, only use device 0
123123
opts->gpu_device_id = 0;
124+
125+
opts->modeord = 0;
124126
}
125127
}

src/cuda/deconvolve_wrapper.cu

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -11,97 +11,97 @@ namespace cufinufft {
1111
namespace deconvolve {
1212
/* Kernel for copying fw to fk with amplication by prefac/ker */
1313
// Note: assume modeord=0: CMCL-compatible mode ordering in fk (from -N/2 up
14-
// to N/2-1)
14+
// to N/2-1), modeord=1: FFT-compatible mode ordering in fk (from 0 to N/2-1, then -N/2 up to -1).
1515
template <typename T>
16-
__global__ void deconvolve_1d(int ms, int nf1, cuda_complex<T> *fw, cuda_complex<T> *fk, T *fwkerhalf1) {
16+
__global__ void deconvolve_1d(int ms, int nf1, cuda_complex<T> *fw, cuda_complex<T> *fk, T *fwkerhalf1, int modeord) {
1717
for (int i = blockDim.x * blockIdx.x + threadIdx.x; i < ms; i += blockDim.x * gridDim.x) {
18-
int w1 = i - ms / 2 >= 0 ? i - ms / 2 : nf1 + i - ms / 2;
18+
int w1 = ( modeord == 0 ) ? ( (i - ms / 2 >= 0) ? i - ms / 2 : nf1 + i - ms / 2 ) : ( (i - ms + ms / 2 >= 0) ? nf1 + i - ms : i );
1919

20-
T kervalue = fwkerhalf1[abs(i - ms / 2)];
20+
T kervalue = fwkerhalf1[(modeord==0) ? abs(i - ms / 2) : ((i - ms + ms / 2 >= 0) ? ms - i : i)];
2121
fk[i].x = fw[w1].x / kervalue;
2222
fk[i].y = fw[w1].y / kervalue;
2323
}
2424
}
2525

2626
template <typename T>
2727
__global__ void deconvolve_2d(int ms, int mt, int nf1, int nf2, cuda_complex<T> *fw, cuda_complex<T> *fk, T *fwkerhalf1,
28-
T *fwkerhalf2) {
28+
T *fwkerhalf2, int modeord) {
2929
for (int i = blockDim.x * blockIdx.x + threadIdx.x; i < ms * mt; i += blockDim.x * gridDim.x) {
3030
int k1 = i % ms;
3131
int k2 = i / ms;
3232
int outidx = k1 + k2 * ms;
33-
int w1 = k1 - ms / 2 >= 0 ? k1 - ms / 2 : nf1 + k1 - ms / 2;
34-
int w2 = k2 - mt / 2 >= 0 ? k2 - mt / 2 : nf2 + k2 - mt / 2;
33+
int w1 = ( modeord == 0 ) ? ( (k1 - ms / 2 >= 0) ? k1 - ms / 2 : nf1 + k1 - ms / 2 ) : ( (k1 - ms + ms / 2 >= 0) ? nf1 + k1 - ms : k1 );
34+
int w2 = ( modeord == 0 ) ? ( (k2 - mt / 2 >= 0) ? k2 - mt / 2 : nf2 + k2 - mt / 2 ) : ( (k2 - mt + mt / 2 >= 0) ? nf2 + k2 - mt : k2 );
3535
int inidx = w1 + w2 * nf1;
3636

37-
T kervalue = fwkerhalf1[abs(k1 - ms / 2)] * fwkerhalf2[abs(k2 - mt / 2)];
37+
T kervalue = fwkerhalf1[(modeord==0) ? abs(k1 - ms / 2) : ((k1 - ms + ms / 2 >= 0) ? ms - k1 : k1)] * fwkerhalf2[(modeord==0) ? abs(k2 - mt / 2) : ((k2 - mt + mt / 2 >= 0) ? mt - k2 : k2)];
3838
fk[outidx].x = fw[inidx].x / kervalue;
3939
fk[outidx].y = fw[inidx].y / kervalue;
4040
}
4141
}
4242

4343
template <typename T>
4444
__global__ void deconvolve_3d(int ms, int mt, int mu, int nf1, int nf2, int nf3, cuda_complex<T> *fw,
45-
cuda_complex<T> *fk, T *fwkerhalf1, T *fwkerhalf2, T *fwkerhalf3) {
45+
cuda_complex<T> *fk, T *fwkerhalf1, T *fwkerhalf2, T *fwkerhalf3, int modeord) {
4646
for (int i = blockDim.x * blockIdx.x + threadIdx.x; i < ms * mt * mu; i += blockDim.x * gridDim.x) {
4747
int k1 = i % ms;
4848
int k2 = (i / ms) % mt;
4949
int k3 = (i / ms / mt);
5050
int outidx = k1 + k2 * ms + k3 * ms * mt;
51-
int w1 = k1 - ms / 2 >= 0 ? k1 - ms / 2 : nf1 + k1 - ms / 2;
52-
int w2 = k2 - mt / 2 >= 0 ? k2 - mt / 2 : nf2 + k2 - mt / 2;
53-
int w3 = k3 - mu / 2 >= 0 ? k3 - mu / 2 : nf3 + k3 - mu / 2;
51+
int w1 = ( modeord == 0 ) ? ( (k1 - ms / 2 >= 0) ? k1 - ms / 2 : nf1 + k1 - ms / 2 ) : ( (k1 - ms + ms / 2 >= 0) ? nf1 + k1 - ms : k1 );
52+
int w2 = ( modeord == 0 ) ? ( (k2 - mt / 2 >= 0) ? k2 - mt / 2 : nf2 + k2 - mt / 2 ) : ( (k2 - mt + mt / 2 >= 0) ? nf2 + k2 - mt : k2 );
53+
int w3 = ( modeord == 0 ) ? ( (k3 - mu / 2 >= 0) ? k3 - mu / 2 : nf3 + k3 - mu / 2 ) : ( (k3 - mu + mu / 2 >= 0) ? nf3 + k3 - mu : k3 );
5454
int inidx = w1 + w2 * nf1 + w3 * nf1 * nf2;
5555

56-
T kervalue = fwkerhalf1[abs(k1 - ms / 2)] * fwkerhalf2[abs(k2 - mt / 2)] * fwkerhalf3[abs(k3 - mu / 2)];
56+
T kervalue = fwkerhalf1[(modeord==0) ? abs(k1 - ms / 2) : ((k1 - ms + ms / 2 >= 0) ? ms - k1 : k1)] * fwkerhalf2[(modeord==0) ? abs(k2 - mt / 2) : ((k2 - mt + mt / 2 >= 0) ? mt - k2 : k2)] * fwkerhalf3[(modeord==0) ? abs(k3 - mu / 2) : ((k3 - mu + mu / 2 >= 0) ? mu - k3 : k3)];
5757
fk[outidx].x = fw[inidx].x / kervalue;
5858
fk[outidx].y = fw[inidx].y / kervalue;
5959
}
6060
}
6161

6262
/* Kernel for copying fk to fw with same amplication */
6363
template <typename T>
64-
__global__ void amplify_1d(int ms, int nf1, cuda_complex<T> *fw, cuda_complex<T> *fk, T *fwkerhalf1) {
64+
__global__ void amplify_1d(int ms, int nf1, cuda_complex<T> *fw, cuda_complex<T> *fk, T *fwkerhalf1, int modeord) {
6565
for (int i = blockDim.x * blockIdx.x + threadIdx.x; i < ms; i += blockDim.x * gridDim.x) {
66-
int w1 = i - ms / 2 >= 0 ? i - ms / 2 : nf1 + i - ms / 2;
66+
int w1 = ( modeord == 0 ) ? ( (i - ms / 2 >= 0) ? i - ms / 2 : nf1 + i - ms / 2 ) : ( (i - ms + ms / 2 >= 0) ? nf1 + i - ms : i );
6767

68-
T kervalue = fwkerhalf1[abs(i - ms / 2)];
68+
T kervalue = fwkerhalf1[(modeord==0) ? abs(i - ms / 2) : ((i - ms + ms / 2 >= 0) ? ms - i : i)];
6969
fw[w1].x = fk[i].x / kervalue;
7070
fw[w1].y = fk[i].y / kervalue;
7171
}
7272
}
7373

7474
template <typename T>
7575
__global__ void amplify_2d(int ms, int mt, int nf1, int nf2, cuda_complex<T> *fw, cuda_complex<T> *fk, T *fwkerhalf1,
76-
T *fwkerhalf2) {
76+
T *fwkerhalf2, int modeord) {
7777
for (int i = blockDim.x * blockIdx.x + threadIdx.x; i < ms * mt; i += blockDim.x * gridDim.x) {
7878
int k1 = i % ms;
7979
int k2 = i / ms;
8080
int inidx = k1 + k2 * ms;
81-
int w1 = k1 - ms / 2 >= 0 ? k1 - ms / 2 : nf1 + k1 - ms / 2;
82-
int w2 = k2 - mt / 2 >= 0 ? k2 - mt / 2 : nf2 + k2 - mt / 2;
81+
int w1 = ( modeord == 0 ) ? ( (k1 - ms / 2 >= 0) ? k1 - ms / 2 : nf1 + k1 - ms / 2 ) : ( (k1 - ms + ms / 2 >= 0) ? nf1 + k1 - ms : k1 );
82+
int w2 = ( modeord == 0 ) ? ( (k2 - mt / 2 >= 0) ? k2 - mt / 2 : nf2 + k2 - mt / 2 ) : ( (k2 - mt + mt / 2 >= 0) ? nf2 + k2 - mt : k2 );
8383
int outidx = w1 + w2 * nf1;
8484

85-
T kervalue = fwkerhalf1[abs(k1 - ms / 2)] * fwkerhalf2[abs(k2 - mt / 2)];
85+
T kervalue = fwkerhalf1[(modeord==0) ? abs(k1 - ms / 2) : ((k1 - ms + ms / 2 >= 0) ? ms - k1 : k1)] * fwkerhalf2[(modeord==0) ? abs(k2 - mt / 2) : ((k2 - mt + mt / 2 >= 0) ? mt - k2 : k2)];
8686
fw[outidx].x = fk[inidx].x / kervalue;
8787
fw[outidx].y = fk[inidx].y / kervalue;
8888
}
8989
}
9090

9191
template <typename T>
9292
__global__ void amplify_3d(int ms, int mt, int mu, int nf1, int nf2, int nf3, cuda_complex<T> *fw, cuda_complex<T> *fk,
93-
T *fwkerhalf1, T *fwkerhalf2, T *fwkerhalf3) {
93+
T *fwkerhalf1, T *fwkerhalf2, T *fwkerhalf3, int modeord) {
9494
for (int i = blockDim.x * blockIdx.x + threadIdx.x; i < ms * mt * mu; i += blockDim.x * gridDim.x) {
9595
int k1 = i % ms;
9696
int k2 = (i / ms) % mt;
9797
int k3 = (i / ms / mt);
9898
int inidx = k1 + k2 * ms + k3 * ms * mt;
99-
int w1 = k1 - ms / 2 >= 0 ? k1 - ms / 2 : nf1 + k1 - ms / 2;
100-
int w2 = k2 - mt / 2 >= 0 ? k2 - mt / 2 : nf2 + k2 - mt / 2;
101-
int w3 = k3 - mu / 2 >= 0 ? k3 - mu / 2 : nf3 + k3 - mu / 2;
99+
int w1 = ( modeord == 0 ) ? ( (k1 - ms / 2 >= 0) ? k1 - ms / 2 : nf1 + k1 - ms / 2 ) : ( (k1 - ms + ms / 2 >= 0) ? nf1 + k1 - ms : k1 );
100+
int w2 = ( modeord == 0 ) ? ( (k2 - mt / 2 >= 0) ? k2 - mt / 2 : nf2 + k2 - mt / 2 ) : ( (k2 - mt + mt / 2 >= 0) ? nf2 + k2 - mt : k2 );
101+
int w3 = ( modeord == 0 ) ? ( (k3 - mu / 2 >= 0) ? k3 - mu / 2 : nf3 + k3 - mu / 2 ) : ( (k3 - mu + mu / 2 >= 0) ? nf3 + k3 - mu : k3 );
102102
int outidx = w1 + w2 * nf1 + w3 * nf1 * nf2;
103103

104-
T kervalue = fwkerhalf1[abs(k1 - ms / 2)] * fwkerhalf2[abs(k2 - mt / 2)] * fwkerhalf3[abs(k3 - mu / 2)];
104+
T kervalue = fwkerhalf1[(modeord==0) ? abs(k1 - ms / 2) : ((k1 - ms + ms / 2 >= 0) ? ms - k1 : k1)] * fwkerhalf2[(modeord==0) ? abs(k2 - mt / 2) : ((k2 - mt + mt / 2 >= 0) ? mt - k2 : k2)] * fwkerhalf3[(modeord==0) ? abs(k3 - mu / 2) : ((k3 - mu + mu / 2 >= 0) ? mu - k3 : k3)];
105105
fw[outidx].x = fk[inidx].x / kervalue;
106106
fw[outidx].y = fk[inidx].y / kervalue;
107107
}
@@ -125,13 +125,13 @@ int cudeconvolve1d(cufinufft_plan_t<T> *d_plan, int blksize)
125125
if (d_plan->spopts.spread_direction == 1) {
126126
for (int t = 0; t < blksize; t++) {
127127
deconvolve_1d<<<(nmodes + 256 - 1) / 256, 256, 0, stream>>>(ms, nf1, d_plan->fw + t * nf1,
128-
d_plan->fk + t * nmodes, d_plan->fwkerhalf1);
128+
d_plan->fk + t * nmodes, d_plan->fwkerhalf1, d_plan->opts.modeord);
129129
}
130130
} else {
131131
checkCudaErrors(cudaMemsetAsync(d_plan->fw, 0, maxbatchsize * nf1 * sizeof(cuda_complex<T>), stream));
132132
for (int t = 0; t < blksize; t++) {
133133
amplify_1d<<<(nmodes + 256 - 1) / 256, 256, 0, stream>>>(ms, nf1, d_plan->fw + t * nf1,
134-
d_plan->fk + t * nmodes, d_plan->fwkerhalf1);
134+
d_plan->fk + t * nmodes, d_plan->fwkerhalf1, d_plan->opts.modeord);
135135
}
136136
}
137137
return 0;
@@ -158,14 +158,14 @@ int cudeconvolve2d(cufinufft_plan_t<T> *d_plan, int blksize)
158158
for (int t = 0; t < blksize; t++) {
159159
deconvolve_2d<<<(nmodes + 256 - 1) / 256, 256, 0, stream>>>(ms, mt, nf1, nf2, d_plan->fw + t * nf1 * nf2,
160160
d_plan->fk + t * nmodes, d_plan->fwkerhalf1,
161-
d_plan->fwkerhalf2);
161+
d_plan->fwkerhalf2, d_plan->opts.modeord);
162162
}
163163
} else {
164164
checkCudaErrors(cudaMemsetAsync(d_plan->fw, 0, maxbatchsize * nf1 * nf2 * sizeof(cuda_complex<T>), stream));
165165
for (int t = 0; t < blksize; t++) {
166166
amplify_2d<<<(nmodes + 256 - 1) / 256, 256, 0, stream>>>(ms, mt, nf1, nf2, d_plan->fw + t * nf1 * nf2,
167167
d_plan->fk + t * nmodes, d_plan->fwkerhalf1,
168-
d_plan->fwkerhalf2);
168+
d_plan->fwkerhalf2, d_plan->opts.modeord);
169169
}
170170
}
171171
return 0;
@@ -193,15 +193,15 @@ int cudeconvolve3d(cufinufft_plan_t<T> *d_plan, int blksize)
193193
for (int t = 0; t < blksize; t++) {
194194
deconvolve_3d<<<(nmodes + 256 - 1) / 256, 256, 0, stream>>>(
195195
ms, mt, mu, nf1, nf2, nf3, d_plan->fw + t * nf1 * nf2 * nf3, d_plan->fk + t * nmodes,
196-
d_plan->fwkerhalf1, d_plan->fwkerhalf2, d_plan->fwkerhalf3);
196+
d_plan->fwkerhalf1, d_plan->fwkerhalf2, d_plan->fwkerhalf3, d_plan->opts.modeord);
197197
}
198198
} else {
199199
checkCudaErrors(
200200
cudaMemsetAsync(d_plan->fw, 0, maxbatchsize * nf1 * nf2 * nf3 * sizeof(cuda_complex<T>), stream));
201201
for (int t = 0; t < blksize; t++) {
202202
amplify_3d<<<(nmodes + 256 - 1) / 256, 256, 0, stream>>>(
203203
ms, mt, mu, nf1, nf2, nf3, d_plan->fw + t * nf1 * nf2 * nf3, d_plan->fk + t * nmodes,
204-
d_plan->fwkerhalf1, d_plan->fwkerhalf2, d_plan->fwkerhalf3);
204+
d_plan->fwkerhalf1, d_plan->fwkerhalf2, d_plan->fwkerhalf3, d_plan->opts.modeord);
205205
}
206206
}
207207
return 0;

0 commit comments

Comments
 (0)