Skip to content

Commit 86ab8aa

Browse files
committed
minor changes
1 parent 405fbc1 commit 86ab8aa

File tree

1 file changed

+7
-4
lines changed

1 file changed

+7
-4
lines changed

src/spreadinterp.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,12 +1063,15 @@ void interp_square(FLT *FINUFFT_RESTRICT target, const FLT *du, const FLT *ker1,
10631063
// new array du_pts to store the du values for the current y line
10641064
std::array<simd_type, line_vectors> line{}, du_pts{};
10651065
// block for first y line, to avoid explicitly initializing line with zeros
1066-
const auto l_ptr_base = du + 2 * (N1 * i2 + i1); // ptr to horiz line start in du
1066+
const auto l_ptr_base = du + 2 * UBIGINT(N1 * i2 + i1); // ptr to horiz line start
1067+
// in du
1068+
for (uint8_t l{0}; l < line_vectors; ++l) {
1069+
du_pts[l] = simd_type::load_unaligned(l * simd_size + l_ptr_base);
1070+
}
10671071
for (uint8_t l{0}; l < line_vectors; ++l) {
10681072
// l is like dx but for ns interleaved
10691073
// no fancy trick needed to multiply real,imag by ker2
1070-
const auto du_pt = simd_type::load_unaligned(l * simd_size + l_ptr_base);
1071-
line[l] = du_pt * simd_type{ker2[0]};
1074+
line[l] = du_pts[l] * simd_type{ker2[0]};
10721075
}
10731076
// add remaining const-y lines to the line (expensive inner loop)
10741077
for (uint8_t dy{1}; dy < ns; dy++) {
@@ -1260,7 +1263,7 @@ void interp_cube(FLT *FINUFFT_RESTRICT target, const FLT *du, const FLT *ker1,
12601263
const auto base_oz = N1 * N2 * UBIGINT(i3); // Move invariant part outside the loop
12611264
for (uint8_t dz{0}; dz < ns; ++dz) {
12621265
const auto oz = base_oz + N1 * N2 * dz; // Only the dz part is inside the loop
1263-
const auto base_du_ptr = du + 2 * (oz + N1 * i2 + UBIGINT(i1));
1266+
const auto base_du_ptr = du + 2 * UBIGINT(oz + N1 * i2 + i1);
12641267
{
12651268
alignas(alignment) std::array<FLT, ker23_size> ker23_scalar{};
12661269
const simd_type ker3_v{ker3[dz]};

0 commit comments

Comments
 (0)