@@ -74,8 +74,8 @@ static inline T evaluate_kernel(T x, const finufft_spread_opts &opts)
7474template <typename T>
7575int setup_spreader (finufft_spread_opts &opts, T eps, T upsampfac, int kerevalmeth);
7676
77- template <typename T>
78- static __forceinline__ __device__ T evaluate_kernel (T x, T es_c, T es_beta, int ns )
77+ template <typename T, int ns >
78+ static __forceinline__ __device__ T evaluate_kernel (T x, T es_c, T es_beta)
7979/* ES ("exp sqrt") kernel evaluation at single real argument:
8080 phi(x) = exp(beta.sqrt(1 - (2x/n_s)^2)), for |x| < nspread/2
8181 related to an asymptotic approximation to the Kaiser--Bessel, itself an
@@ -88,9 +88,8 @@ static __forceinline__ __device__ T evaluate_kernel(T x, T es_c, T es_beta, int
8888 : 0.0 ;
8989}
9090
91- template <typename T>
92- static __device__ void eval_kernel_vec_horner (T *ker, const T x, const int w,
93- const double upsampfac)
91+ template <typename T, int w>
92+ static __device__ void eval_kernel_vec_horner (T *ker, const T x, const double upsampfac)
9493/* Fill ker[] with Horner piecewise poly approx to [-w/2,w/2] ES kernel eval at
9594 x_j = x + j, for j=0,..,w-1. Thus x in [-w/2,-w/2+1]. w is aka ns.
9695 This is the current evaluation method, since it's faster (except i7 w=16).
@@ -109,11 +108,11 @@ static __device__ void eval_kernel_vec_horner(T *ker, const T x, const int w,
109108 }
110109}
111110
112- template <typename T>
113- static __inline__ __device__ void eval_kernel_vec (T *ker, const T x, const int w ,
114- const T es_c, const T es_beta) {
111+ template <typename T, int w >
112+ static __inline__ __device__ void eval_kernel_vec (T *ker, const T x, const T es_c ,
113+ const T es_beta) {
115114 for (int i = 0 ; i < w; i++) {
116- ker[i] = evaluate_kernel (abs (x + i), es_c, es_beta, w );
115+ ker[i] = evaluate_kernel<T, w> (abs (x + i), es_c, es_beta);
117116 }
118117}
119118
@@ -129,53 +128,53 @@ template<typename T> int cuinterp3d(cufinufft_plan_t<T> *d_plan, int blksize);
129128// Wrappers for methods of spreading
130129template <typename T>
131130int cuspread1d_nuptsdriven_prop (int nf1, int M, cufinufft_plan_t <T> *d_plan);
132- template <typename T>
131+ template <typename T, int ns >
133132int cuspread1d_nuptsdriven (int nf1, int M, cufinufft_plan_t <T> *d_plan, int blksize);
134133template <typename T>
135134int cuspread1d_subprob_prop (int nf1, int M, cufinufft_plan_t <T> *d_plan);
136- template <typename T>
135+ template <typename T, int ns >
137136int cuspread1d_subprob (int nf1, int M, cufinufft_plan_t <T> *d_plan, int blksize);
138137
139138template <typename T>
140139int cuspread2d_nuptsdriven_prop (int nf1, int nf2, int M, cufinufft_plan_t <T> *d_plan);
141- template <typename T>
140+ template <typename T, int ns >
142141int cuspread2d_nuptsdriven (int nf1, int nf2, int M, cufinufft_plan_t <T> *d_plan,
143142 int blksize);
144143template <typename T>
145144int cuspread2d_subprob_prop (int nf1, int nf2, int M, cufinufft_plan_t <T> *d_plan);
146- template <typename T>
145+ template <typename T, int ns >
147146int cuspread2d_subprob (int nf1, int nf2, int m, cufinufft_plan_t <T> *d_plan, int blksize);
148147template <typename T>
149148int cuspread3d_nuptsdriven_prop (int nf1, int nf2, int nf3, int M,
150149 cufinufft_plan_t <T> *d_plan);
151- template <typename T>
150+ template <typename T, int ns >
152151int cuspread3d_nuptsdriven (int nf1, int nf2, int nf3, int M, cufinufft_plan_t <T> *d_plan,
153152 int blksize);
154153template <typename T>
155154int cuspread3d_blockgather_prop (int nf1, int nf2, int nf3, int M,
156155 cufinufft_plan_t <T> *d_plan);
157- template <typename T>
156+ template <typename T, int ns >
158157int cuspread3d_blockgather (int nf1, int nf2, int nf3, int M, cufinufft_plan_t <T> *d_plan,
159158 int blksize);
160159template <typename T>
161160int cuspread3d_subprob_prop (int nf1, int nf2, int nf3, int M,
162161 cufinufft_plan_t <T> *d_plan);
163- template <typename T>
162+ template <typename T, int ns >
164163int cuspread3d_subprob (int nf1, int nf2, int nf3, int M, cufinufft_plan_t <T> *d_plan,
165164 int blksize);
166165
167166// Wrappers for methods of interpolation
168- template <typename T>
167+ template <typename T, int ns >
169168int cuinterp1d_nuptsdriven (int nf1, int M, cufinufft_plan_t <T> *d_plan, int blksize);
170- template <typename T>
169+ template <typename T, int ns >
171170int cuinterp2d_nuptsdriven (int nf1, int nf2, int M, cufinufft_plan_t <T> *d_plan,
172171 int blksize);
173- template <typename T>
172+ template <typename T, int ns >
174173int cuinterp2d_subprob (int nf1, int nf2, int M, cufinufft_plan_t <T> *d_plan, int blksize);
175- template <typename T>
174+ template <typename T, int ns >
176175int cuinterp3d_nuptsdriven (int nf1, int nf2, int nf3, int M, cufinufft_plan_t <T> *d_plan,
177176 int blksize);
178- template <typename T>
177+ template <typename T, int ns >
179178int cuinterp3d_subprob (int nf1, int nf2, int nf3, int M, cufinufft_plan_t <T> *d_plan,
180179 int blksize);
181180
0 commit comments