77#include < finufft/utils_precindep.h>
88
99#include " ker_horner_allw_loop_constexpr.h"
10+
1011#include < xsimd/xsimd.hpp>
1112
12- #include < iostream>
13- #include < math.h>
14- #include < stdio.h>
15- #include < stdlib.h>
13+ #include < cmath>
14+ #include < cstdio>
15+ #include < cstdlib>
1616#include < vector>
1717
1818using namespace std ;
@@ -296,10 +296,11 @@ int indexSort(BIGINT *sort_indices, BIGINT N1, BIGINT N2, BIGINT N3, BIGINT M, F
296296 return did_sort;
297297}
298298
299- int spreadinterpSorted (BIGINT *sort_indices, BIGINT N1, BIGINT N2, BIGINT N3,
300- FLT *data_uniform, BIGINT M, FLT *kx, FLT *ky, FLT *kz,
301- FLT *data_nonuniform, const finufft_spread_opts &opts,
302- int did_sort)
299+ int spreadinterpSorted (const BIGINT *sort_indices, const BIGINT N1, const BIGINT N2,
300+ const BIGINT N3, FLT *data_uniform, const BIGINT M,
301+ FLT *FINUFFT_RESTRICT kx, FLT *FINUFFT_RESTRICT ky,
302+ FLT *FINUFFT_RESTRICT kz, FLT *FINUFFT_RESTRICT data_nonuniform,
303+ const finufft_spread_opts &opts, int did_sort)
303304/* Logic to select the main spreading (dir=1) vs interpolation (dir=2) routine.
304305 See spreadinterp() above for inputs arguments and definitions.
305306 Return value should always be 0 (no error reporting).
@@ -319,7 +320,8 @@ int spreadinterpSorted(BIGINT *sort_indices, BIGINT N1, BIGINT N2, BIGINT N3,
319320
320321// --------------------------------------------------------------------------
321322int spreadSorted (const BIGINT *sort_indices, BIGINT N1, BIGINT N2, BIGINT N3,
322- FLT *data_uniform, BIGINT M, FLT *kx, FLT *ky, FLT *kz,
323+ FLT *FINUFFT_RESTRICT data_uniform, BIGINT M, FLT *FINUFFT_RESTRICT kx,
324+ FLT *FINUFFT_RESTRICT ky, FLT *FINUFFT_RESTRICT kz,
323325 const FLT *data_nonuniform, const finufft_spread_opts &opts,
324326 int did_sort)
325327// Spread NU pts in sorted order to a uniform grid. See spreadinterp() for doc.
@@ -443,10 +445,11 @@ int spreadSorted(const BIGINT *sort_indices, BIGINT N1, BIGINT N2, BIGINT N3,
443445
444446// --------------------------------------------------------------------------
445447template <uint16_t ns, uint16_t kerevalmeth>
446- static int interpSorted_kernel (const BIGINT *sort_indices, const BIGINT N1,
447- const BIGINT N2, const BIGINT N3, const FLT *data_uniform,
448- const BIGINT M, FLT *kx, FLT *ky, FLT *kz,
449- FLT *data_nonuniform, const finufft_spread_opts &opts)
448+ FINUFFT_NEVER_INLINE static int interpSorted_kernel (
449+ const BIGINT *sort_indices, const BIGINT N1, const BIGINT N2, const BIGINT N3,
450+ const FLT *data_uniform, const BIGINT M, FLT *FINUFFT_RESTRICT kx,
451+ FLT *FINUFFT_RESTRICT ky, FLT *FINUFFT_RESTRICT kz,
452+ FLT *FINUFFT_RESTRICT data_nonuniform, const finufft_spread_opts &opts)
450453// Interpolate to NU pts in sorted order from a uniform grid.
451454// See spreadinterp() for doc.
452455{
@@ -476,9 +479,9 @@ static int interpSorted_kernel(const BIGINT *sort_indices, const BIGINT N1,
476479 FLT outbuf[2 * CHUNKSIZE];
477480 // Kernels: static alloc is faster, so we do it for up to 3D...
478481 alignas (alignment) std::array<FLT, 3 * MAX_NSPREAD> kernel_values{0 };
479- FLT *FINUFFT_RESTRICT ker1 = kernel_values.data ();
480- FLT *FINUFFT_RESTRICT ker2 = kernel_values.data () + MAX_NSPREAD;
481- FLT *FINUFFT_RESTRICT ker3 = kernel_values.data () + 2 * MAX_NSPREAD;
482+ auto *FINUFFT_RESTRICT ker1 = kernel_values.data ();
483+ auto *FINUFFT_RESTRICT ker2 = kernel_values.data () + MAX_NSPREAD;
484+ auto *FINUFFT_RESTRICT ker3 = kernel_values.data () + 2 * MAX_NSPREAD;
482485
483486 // Loop over interpolation chunks
484487#pragma omp for schedule(dynamic, 1000) // assign threads to NU targ pts:
@@ -487,7 +490,7 @@ static int interpSorted_kernel(const BIGINT *sort_indices, const BIGINT N1,
487490
488491 {
489492 // Setup buffers for this chunk
490- int bufsize = (i + CHUNKSIZE > M) ? M - i : CHUNKSIZE;
493+ const int bufsize = (i + CHUNKSIZE > M) ? M - i : CHUNKSIZE;
491494 for (int ibuf = 0 ; ibuf < bufsize; ibuf++) {
492495 BIGINT j = sort_indices[i + ibuf];
493496 jlist[ibuf] = j;
@@ -498,20 +501,20 @@ static int interpSorted_kernel(const BIGINT *sort_indices, const BIGINT N1,
498501
499502 // Loop over targets in chunk
500503 for (int ibuf = 0 ; ibuf < bufsize; ibuf++) {
501- FLT xj = xjlist[ibuf];
502- FLT yj = (ndims > 1 ) ? yjlist[ibuf] : 0 ;
503- FLT zj = (ndims > 2 ) ? zjlist[ibuf] : 0 ;
504+ const auto xj = xjlist[ibuf];
505+ const auto yj = (ndims > 1 ) ? yjlist[ibuf] : 0 ;
506+ const auto zj = (ndims > 2 ) ? zjlist[ibuf] : 0 ;
504507
505- FLT * target = outbuf + 2 * ibuf;
508+ auto *FINUFFT_RESTRICT target = outbuf + 2 * ibuf;
506509
507510 // coords (x,y,z), spread block corner index (i1,i2,i3) of current NU targ
508- BIGINT i1 = (BIGINT)std::ceil (xj - ns2); // leftmost grid index
509- BIGINT i2 = (ndims > 1 ) ? (BIGINT)std::ceil (yj - ns2) : 0 ; // min y grid index
510- BIGINT i3 = (ndims > 2 ) ? (BIGINT)std::ceil (zj - ns2) : 0 ; // min z grid index
511+ const auto i1 = (BIGINT)std::ceil (xj - ns2); // leftmost grid index
512+ const auto i2 = (ndims > 1 ) ? (BIGINT)std::ceil (yj - ns2) : 0 ; // min y grid index
513+ const auto i3 = (ndims > 2 ) ? (BIGINT)std::ceil (zj - ns2) : 0 ; // min z grid index
511514
512- FLT x1 = (FLT)i1 - xj; // shift of ker center, in [-w/2,-w/2+1]
513- FLT x2 = (ndims > 1 ) ? (FLT)i2 - yj : 0 ;
514- FLT x3 = (ndims > 2 ) ? (FLT)i3 - zj : 0 ;
515+ const auto x1 = (FLT)i1 - xj; // shift of ker center, in [-w/2,-w/2+1]
516+ const auto x2 = (ndims > 1 ) ? (FLT)i2 - yj : 0 ;
517+ const auto x3 = (ndims > 2 ) ? (FLT)i3 - zj : 0 ;
515518
516519 // eval kernel values patch and use to interpolate from uniform data...
517520 if (!(opts.flags & TF_OMIT_SPREADING)) {
@@ -537,7 +540,7 @@ static int interpSorted_kernel(const BIGINT *sort_indices, const BIGINT N1,
537540
538541 // Copy result buffer to output array
539542 for (int ibuf = 0 ; ibuf < bufsize; ibuf++) {
540- BIGINT j = jlist[ibuf];
543+ const UBIGINT j = jlist[ibuf];
541544 data_nonuniform[2 * j] = outbuf[2 * ibuf];
542545 data_nonuniform[2 * j + 1 ] = outbuf[2 * ibuf + 1 ];
543546 }
@@ -549,9 +552,11 @@ static int interpSorted_kernel(const BIGINT *sort_indices, const BIGINT N1,
549552}
550553
551554template <uint16_t NS>
552- static int interpSorted_dispatch (BIGINT *sort_indices, BIGINT N1, BIGINT N2, BIGINT N3,
553- FLT *data_uniform, BIGINT M, FLT *kx, FLT *ky, FLT *kz,
554- FLT *data_nonuniform, const finufft_spread_opts &opts) {
555+ static int interpSorted_dispatch (
556+ const BIGINT *sort_indices, const BIGINT N1, const BIGINT N2, const BIGINT N3,
557+ FLT *FINUFFT_RESTRICT data_uniform, const BIGINT M, FLT *FINUFFT_RESTRICT kx,
558+ FLT *FINUFFT_RESTRICT ky, FLT *FINUFFT_RESTRICT kz,
559+ FLT *FINUFFT_RESTRICT data_nonuniform, const finufft_spread_opts &opts) {
555560 static_assert (MIN_NSPREAD <= NS <= MAX_NSPREAD,
556561 " NS must be in the range (MIN_NSPREAD, MAX_NSPREAD)" );
557562 if constexpr (NS == MIN_NSPREAD) { // Base case
@@ -578,8 +583,10 @@ static int interpSorted_dispatch(BIGINT *sort_indices, BIGINT N1, BIGINT N2, BIG
578583 }
579584}
580585
581- int interpSorted (BIGINT *sort_indices, BIGINT N1, BIGINT N2, BIGINT N3, FLT *data_uniform,
582- BIGINT M, FLT *kx, FLT *ky, FLT *kz, FLT *data_nonuniform,
586+ int interpSorted (const BIGINT *sort_indices, const BIGINT N1, const BIGINT N2,
587+ const BIGINT N3, FLT *FINUFFT_RESTRICT data_uniform, const BIGINT M,
588+ FLT *FINUFFT_RESTRICT kx, FLT *FINUFFT_RESTRICT ky,
589+ FLT *FINUFFT_RESTRICT kz, FLT *FINUFFT_RESTRICT data_nonuniform,
583590 const finufft_spread_opts &opts) {
584591 return interpSorted_dispatch<MAX_NSPREAD>(sort_indices, N1, N2, N3, data_uniform, M, kx,
585592 ky, kz, data_nonuniform, opts);
@@ -781,7 +788,7 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */
781788
782789template <uint8_t ns>
783790void interp_line (FLT *FINUFFT_RESTRICT target, const FLT *du, const FLT *ker, BIGINT i1,
784- BIGINT N1)
791+ const BIGINT N1)
785792/* 1D interpolate complex values from size-ns block of the du (uniform grid
786793 data) array to a single complex output value "target", using as weights the
787794 1d kernel evaluation list ker1.
@@ -839,7 +846,8 @@ void interp_line(FLT *FINUFFT_RESTRICT target, const FLT *du, const FLT *ker, BI
839846
840847template <uint8_t ns>
841848void interp_square (FLT *FINUFFT_RESTRICT target, const FLT *du, const FLT *ker1,
842- const FLT *ker2, BIGINT i1, BIGINT i2, BIGINT N1, BIGINT N2)
849+ const FLT *ker2, const BIGINT i1, const BIGINT i2, const BIGINT N1,
850+ const BIGINT N2)
843851/* 2D interpolate complex values from a ns*ns block of the du (uniform grid
844852 data) array to a single complex output value "target", using as weights the
845853 ns*ns outer product of the 1d kernel lists ker1 and ker2.
@@ -919,8 +927,8 @@ void interp_square(FLT *FINUFFT_RESTRICT target, const FLT *du, const FLT *ker1,
919927
920928template <uint8_t ns>
921929void interp_cube (FLT *FINUFFT_RESTRICT target, const FLT *du, const FLT *ker1,
922- const FLT *ker2, const FLT *ker3, BIGINT i1, BIGINT i2, BIGINT i3 ,
923- BIGINT N1, BIGINT N2, BIGINT N3)
930+ const FLT *ker2, const FLT *ker3, const BIGINT i1, const BIGINT i2 ,
931+ const BIGINT i3, const BIGINT N1, const BIGINT N2, const BIGINT N3)
924932/* 3D interpolate complex values from a ns*ns*ns block of the du (uniform grid
925933 data) array to a single complex output value "target", using as weights the
926934 ns*ns*ns outer product of the 1d kernel lists ker1, ker2, and ker3.
0 commit comments