Skip to content

Commit aabdb57

Browse files
committed
using const and restrict where possible
1 parent 132d766 commit aabdb57

File tree

2 files changed

+53
-42
lines changed

2 files changed

+53
-42
lines changed

include/finufft/spreadinterp.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,16 +39,19 @@ FINUFFT_EXPORT int FINUFFT_CDECL indexSort(BIGINT *sort_indices, BIGINT N1, BIGI
3939
BIGINT N3, BIGINT M, FLT *kx, FLT *ky, FLT *kz,
4040
const finufft_spread_opts &opts);
4141
FINUFFT_EXPORT int FINUFFT_CDECL interpSorted(
42-
BIGINT *sort_indices, BIGINT N1, BIGINT N2, BIGINT N3, FLT *data_uniform, BIGINT M,
43-
FLT *kx, FLT *ky, FLT *kz, FLT *data_nonuniform, const finufft_spread_opts &opts);
42+
const BIGINT *sort_indices, BIGINT N1, BIGINT N2, BIGINT N3,
43+
FLT *FINUFFT_RESTRICT data_uniform, BIGINT M, FLT *FINUFFT_RESTRICT kx,
44+
FLT *FINUFFT_RESTRICT ky, FLT *FINUFFT_RESTRICT kz,
45+
FLT *FINUFFT_RESTRICT data_nonuniform, const finufft_spread_opts &opts);
4446
FINUFFT_EXPORT int FINUFFT_CDECL spreadSorted(
4547
const BIGINT *sort_indices, BIGINT N1, BIGINT N2, BIGINT N3, FLT *data_uniform,
4648
BIGINT M, FLT *kx, FLT *ky, FLT *kz, const FLT *data_nonuniform,
4749
const finufft_spread_opts &opts, int did_sort);
4850
FINUFFT_EXPORT int FINUFFT_CDECL spreadinterpSorted(
49-
BIGINT *sort_indices, BIGINT N1, BIGINT N2, BIGINT N3, FLT *data_uniform, BIGINT M,
50-
FLT *kx, FLT *ky, FLT *kz, FLT *data_nonuniform, const finufft_spread_opts &opts,
51-
int did_sort);
51+
const BIGINT *sort_indices, BIGINT N1, BIGINT N2, BIGINT N3,
52+
FLT *FINUFFT_RESTRICT data_uniform, BIGINT M, FLT *FINUFFT_RESTRICT kx,
53+
FLT *FINUFFT_RESTRICT ky, FLT *FINUFFT_RESTRICT kz,
54+
FLT *FINUFFT_RESTRICT data_nonuniform, const finufft_spread_opts &opts, int did_sort);
5255
FINUFFT_EXPORT FLT FINUFFT_CDECL evaluate_kernel(FLT x, const finufft_spread_opts &opts);
5356
FINUFFT_EXPORT int FINUFFT_CDECL setup_spreader(finufft_spread_opts &opts, FLT eps,
5457
double upsampfac, int kerevalmeth,

src/spreadinterp.cpp

Lines changed: 45 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@
77
#include <finufft/utils_precindep.h>
88

99
#include "ker_horner_allw_loop_constexpr.h"
10+
1011
#include <xsimd/xsimd.hpp>
1112

12-
#include <iostream>
13-
#include <math.h>
14-
#include <stdio.h>
15-
#include <stdlib.h>
13+
#include <cmath>
14+
#include <cstdio>
15+
#include <cstdlib>
1616
#include <vector>
1717

1818
using namespace std;
@@ -296,10 +296,11 @@ int indexSort(BIGINT *sort_indices, BIGINT N1, BIGINT N2, BIGINT N3, BIGINT M, F
296296
return did_sort;
297297
}
298298

299-
int spreadinterpSorted(BIGINT *sort_indices, BIGINT N1, BIGINT N2, BIGINT N3,
300-
FLT *data_uniform, BIGINT M, FLT *kx, FLT *ky, FLT *kz,
301-
FLT *data_nonuniform, const finufft_spread_opts &opts,
302-
int did_sort)
299+
int spreadinterpSorted(const BIGINT *sort_indices, const BIGINT N1, const BIGINT N2,
300+
const BIGINT N3, FLT *data_uniform, const BIGINT M,
301+
FLT *FINUFFT_RESTRICT kx, FLT *FINUFFT_RESTRICT ky,
302+
FLT *FINUFFT_RESTRICT kz, FLT *FINUFFT_RESTRICT data_nonuniform,
303+
const finufft_spread_opts &opts, int did_sort)
303304
/* Logic to select the main spreading (dir=1) vs interpolation (dir=2) routine.
304305
See spreadinterp() above for inputs arguments and definitions.
305306
Return value should always be 0 (no error reporting).
@@ -319,7 +320,8 @@ int spreadinterpSorted(BIGINT *sort_indices, BIGINT N1, BIGINT N2, BIGINT N3,
319320

320321
// --------------------------------------------------------------------------
321322
int spreadSorted(const BIGINT *sort_indices, BIGINT N1, BIGINT N2, BIGINT N3,
322-
FLT *data_uniform, BIGINT M, FLT *kx, FLT *ky, FLT *kz,
323+
FLT *FINUFFT_RESTRICT data_uniform, BIGINT M, FLT *FINUFFT_RESTRICT kx,
324+
FLT *FINUFFT_RESTRICT ky, FLT *FINUFFT_RESTRICT kz,
323325
const FLT *data_nonuniform, const finufft_spread_opts &opts,
324326
int did_sort)
325327
// Spread NU pts in sorted order to a uniform grid. See spreadinterp() for doc.
@@ -443,10 +445,11 @@ int spreadSorted(const BIGINT *sort_indices, BIGINT N1, BIGINT N2, BIGINT N3,
443445

444446
// --------------------------------------------------------------------------
445447
template<uint16_t ns, uint16_t kerevalmeth>
446-
static int interpSorted_kernel(const BIGINT *sort_indices, const BIGINT N1,
447-
const BIGINT N2, const BIGINT N3, const FLT *data_uniform,
448-
const BIGINT M, FLT *kx, FLT *ky, FLT *kz,
449-
FLT *data_nonuniform, const finufft_spread_opts &opts)
448+
FINUFFT_NEVER_INLINE static int interpSorted_kernel(
449+
const BIGINT *sort_indices, const BIGINT N1, const BIGINT N2, const BIGINT N3,
450+
const FLT *data_uniform, const BIGINT M, FLT *FINUFFT_RESTRICT kx,
451+
FLT *FINUFFT_RESTRICT ky, FLT *FINUFFT_RESTRICT kz,
452+
FLT *FINUFFT_RESTRICT data_nonuniform, const finufft_spread_opts &opts)
450453
// Interpolate to NU pts in sorted order from a uniform grid.
451454
// See spreadinterp() for doc.
452455
{
@@ -476,9 +479,9 @@ static int interpSorted_kernel(const BIGINT *sort_indices, const BIGINT N1,
476479
FLT outbuf[2 * CHUNKSIZE];
477480
// Kernels: static alloc is faster, so we do it for up to 3D...
478481
alignas(alignment) std::array<FLT, 3 * MAX_NSPREAD> kernel_values{0};
479-
FLT *FINUFFT_RESTRICT ker1 = kernel_values.data();
480-
FLT *FINUFFT_RESTRICT ker2 = kernel_values.data() + MAX_NSPREAD;
481-
FLT *FINUFFT_RESTRICT ker3 = kernel_values.data() + 2 * MAX_NSPREAD;
482+
auto *FINUFFT_RESTRICT ker1 = kernel_values.data();
483+
auto *FINUFFT_RESTRICT ker2 = kernel_values.data() + MAX_NSPREAD;
484+
auto *FINUFFT_RESTRICT ker3 = kernel_values.data() + 2 * MAX_NSPREAD;
482485

483486
// Loop over interpolation chunks
484487
#pragma omp for schedule(dynamic, 1000) // assign threads to NU targ pts:
@@ -487,7 +490,7 @@ static int interpSorted_kernel(const BIGINT *sort_indices, const BIGINT N1,
487490

488491
{
489492
// Setup buffers for this chunk
490-
int bufsize = (i + CHUNKSIZE > M) ? M - i : CHUNKSIZE;
493+
const int bufsize = (i + CHUNKSIZE > M) ? M - i : CHUNKSIZE;
491494
for (int ibuf = 0; ibuf < bufsize; ibuf++) {
492495
BIGINT j = sort_indices[i + ibuf];
493496
jlist[ibuf] = j;
@@ -498,20 +501,20 @@ static int interpSorted_kernel(const BIGINT *sort_indices, const BIGINT N1,
498501

499502
// Loop over targets in chunk
500503
for (int ibuf = 0; ibuf < bufsize; ibuf++) {
501-
FLT xj = xjlist[ibuf];
502-
FLT yj = (ndims > 1) ? yjlist[ibuf] : 0;
503-
FLT zj = (ndims > 2) ? zjlist[ibuf] : 0;
504+
const auto xj = xjlist[ibuf];
505+
const auto yj = (ndims > 1) ? yjlist[ibuf] : 0;
506+
const auto zj = (ndims > 2) ? zjlist[ibuf] : 0;
504507

505-
FLT *target = outbuf + 2 * ibuf;
508+
auto *FINUFFT_RESTRICT target = outbuf + 2 * ibuf;
506509

507510
// coords (x,y,z), spread block corner index (i1,i2,i3) of current NU targ
508-
BIGINT i1 = (BIGINT)std::ceil(xj - ns2); // leftmost grid index
509-
BIGINT i2 = (ndims > 1) ? (BIGINT)std::ceil(yj - ns2) : 0; // min y grid index
510-
BIGINT i3 = (ndims > 2) ? (BIGINT)std::ceil(zj - ns2) : 0; // min z grid index
511+
const auto i1 = (BIGINT)std::ceil(xj - ns2); // leftmost grid index
512+
const auto i2 = (ndims > 1) ? (BIGINT)std::ceil(yj - ns2) : 0; // min y grid index
513+
const auto i3 = (ndims > 2) ? (BIGINT)std::ceil(zj - ns2) : 0; // min z grid index
511514

512-
FLT x1 = (FLT)i1 - xj; // shift of ker center, in [-w/2,-w/2+1]
513-
FLT x2 = (ndims > 1) ? (FLT)i2 - yj : 0;
514-
FLT x3 = (ndims > 2) ? (FLT)i3 - zj : 0;
515+
const auto x1 = (FLT)i1 - xj; // shift of ker center, in [-w/2,-w/2+1]
516+
const auto x2 = (ndims > 1) ? (FLT)i2 - yj : 0;
517+
const auto x3 = (ndims > 2) ? (FLT)i3 - zj : 0;
515518

516519
// eval kernel values patch and use to interpolate from uniform data...
517520
if (!(opts.flags & TF_OMIT_SPREADING)) {
@@ -537,7 +540,7 @@ static int interpSorted_kernel(const BIGINT *sort_indices, const BIGINT N1,
537540

538541
// Copy result buffer to output array
539542
for (int ibuf = 0; ibuf < bufsize; ibuf++) {
540-
BIGINT j = jlist[ibuf];
543+
const UBIGINT j = jlist[ibuf];
541544
data_nonuniform[2 * j] = outbuf[2 * ibuf];
542545
data_nonuniform[2 * j + 1] = outbuf[2 * ibuf + 1];
543546
}
@@ -549,9 +552,11 @@ static int interpSorted_kernel(const BIGINT *sort_indices, const BIGINT N1,
549552
}
550553

551554
template<uint16_t NS>
552-
static int interpSorted_dispatch(BIGINT *sort_indices, BIGINT N1, BIGINT N2, BIGINT N3,
553-
FLT *data_uniform, BIGINT M, FLT *kx, FLT *ky, FLT *kz,
554-
FLT *data_nonuniform, const finufft_spread_opts &opts) {
555+
static int interpSorted_dispatch(
556+
const BIGINT *sort_indices, const BIGINT N1, const BIGINT N2, const BIGINT N3,
557+
FLT *FINUFFT_RESTRICT data_uniform, const BIGINT M, FLT *FINUFFT_RESTRICT kx,
558+
FLT *FINUFFT_RESTRICT ky, FLT *FINUFFT_RESTRICT kz,
559+
FLT *FINUFFT_RESTRICT data_nonuniform, const finufft_spread_opts &opts) {
555560
static_assert(MIN_NSPREAD <= NS <= MAX_NSPREAD,
556561
"NS must be in the range (MIN_NSPREAD, MAX_NSPREAD)");
557562
if constexpr (NS == MIN_NSPREAD) { // Base case
@@ -578,8 +583,10 @@ static int interpSorted_dispatch(BIGINT *sort_indices, BIGINT N1, BIGINT N2, BIG
578583
}
579584
}
580585

581-
int interpSorted(BIGINT *sort_indices, BIGINT N1, BIGINT N2, BIGINT N3, FLT *data_uniform,
582-
BIGINT M, FLT *kx, FLT *ky, FLT *kz, FLT *data_nonuniform,
586+
int interpSorted(const BIGINT *sort_indices, const BIGINT N1, const BIGINT N2,
587+
const BIGINT N3, FLT *FINUFFT_RESTRICT data_uniform, const BIGINT M,
588+
FLT *FINUFFT_RESTRICT kx, FLT *FINUFFT_RESTRICT ky,
589+
FLT *FINUFFT_RESTRICT kz, FLT *FINUFFT_RESTRICT data_nonuniform,
583590
const finufft_spread_opts &opts) {
584591
return interpSorted_dispatch<MAX_NSPREAD>(sort_indices, N1, N2, N3, data_uniform, M, kx,
585592
ky, kz, data_nonuniform, opts);
@@ -781,7 +788,7 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */
781788

782789
template<uint8_t ns>
783790
void interp_line(FLT *FINUFFT_RESTRICT target, const FLT *du, const FLT *ker, BIGINT i1,
784-
BIGINT N1)
791+
const BIGINT N1)
785792
/* 1D interpolate complex values from size-ns block of the du (uniform grid
786793
data) array to a single complex output value "target", using as weights the
787794
1d kernel evaluation list ker1.
@@ -839,7 +846,8 @@ void interp_line(FLT *FINUFFT_RESTRICT target, const FLT *du, const FLT *ker, BI
839846

840847
template<uint8_t ns>
841848
void interp_square(FLT *FINUFFT_RESTRICT target, const FLT *du, const FLT *ker1,
842-
const FLT *ker2, BIGINT i1, BIGINT i2, BIGINT N1, BIGINT N2)
849+
const FLT *ker2, const BIGINT i1, const BIGINT i2, const BIGINT N1,
850+
const BIGINT N2)
843851
/* 2D interpolate complex values from a ns*ns block of the du (uniform grid
844852
data) array to a single complex output value "target", using as weights the
845853
ns*ns outer product of the 1d kernel lists ker1 and ker2.
@@ -919,8 +927,8 @@ void interp_square(FLT *FINUFFT_RESTRICT target, const FLT *du, const FLT *ker1,
919927

920928
template<uint8_t ns>
921929
void interp_cube(FLT *FINUFFT_RESTRICT target, const FLT *du, const FLT *ker1,
922-
const FLT *ker2, const FLT *ker3, BIGINT i1, BIGINT i2, BIGINT i3,
923-
BIGINT N1, BIGINT N2, BIGINT N3)
930+
const FLT *ker2, const FLT *ker3, const BIGINT i1, const BIGINT i2,
931+
const BIGINT i3, const BIGINT N1, const BIGINT N2, const BIGINT N3)
924932
/* 3D interpolate complex values from a ns*ns*ns block of the du (uniform grid
925933
data) array to a single complex output value "target", using as weights the
926934
ns*ns*ns outer product of the 1d kernel lists ker1, ker2, and ker3.

0 commit comments

Comments
 (0)