Skip to content

Commit 2a7753d

Browse files
committed
different interleaving method
1 parent 28022fe commit 2a7753d

File tree

1 file changed

+17
-3
lines changed

1 file changed

+17
-3
lines changed

src/spreadinterp.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,7 +1036,11 @@ FINUFFT_NEVER_INLINE void spread_subproblem_1d_kernel(
10361036
// +-----------------------+
10371037
// |re|im|re|im|re|im|re|im|
10381038
// +-----------------------+
1039-
const auto dd_pt = initialize_complex_batch<batch_t>(dd[i * 2], dd[i * 2 + 1]);
1039+
// const auto dd_pt = initialize_complex_batch<batch_t>(dd[i * 2], dd[i * 2 + 1]);
1040+
const auto dd_pt = [dd, i]() constexpr noexcept {
1041+
const batch_t ddi{dd[i * 2]}, ddj{dd[i * 2 + 1]};
1042+
return xsimd::zip_lo(ddi, ddj);
1043+
}();
10401044
// ceil offset, hence rounding, must match that in get_subgrid...
10411045
const auto i1 = BIGINT(std::ceil(kx[i] - ns2)); // fine grid start index
10421046
// FLT(i1) has different semantics and results an extra cast
@@ -1205,7 +1209,12 @@ FINUFFT_NEVER_INLINE static void spread_subproblem_2d_kernel(
12051209
static constexpr auto ns2 = ns * FLT(0.5); // half spread width
12061210
std::fill(du, du + 2 * size1 * size2, 0); // initialized to 0 due to the padding
12071211
for (uint64_t pt = 0; pt < M; pt++) { // loop over NU pts
1208-
const auto dd_pt = initialize_complex_batch<batch_t>(dd[pt * 2], dd[pt * 2 + 1]);
1212+
// const auto dd_pt = initialize_complex_batch<batch_t>(dd[pt * 2], dd[pt * 2 +
1213+
// 1]);
1214+
const auto dd_pt = [dd, pt]() constexpr noexcept {
1215+
const batch_t ddi{dd[pt * 2]}, ddj{dd[pt * 2 + 1]};
1216+
return xsimd::zip_lo(ddi, ddj);
1217+
}();
12091218
// ceil offset, hence rounding, must match that in get_subgrid...
12101219
const auto i1 = (BIGINT)std::ceil(kx[pt] - ns2); // fine grid start indices
12111220
const auto i2 = (BIGINT)std::ceil(ky[pt] - ns2);
@@ -1309,7 +1318,12 @@ FINUFFT_NEVER_INLINE void spread_subproblem_3d_kernel(
13091318
static constexpr auto ns2 = ns * FLT(0.5); // half spread width
13101319
std::fill(du, du + 2 * size1 * size2 * size3, 0);
13111320
for (uint64_t pt = 0; pt < M; pt++) { // loop over NU pts
1312-
const auto dd_pt = initialize_complex_batch<batch_t>(dd[pt * 2], dd[pt * 2 + 1]);
1321+
// const auto dd_pt = initialize_complex_batch<batch_t>(dd[pt * 2], dd[pt * 2 +
1322+
// 1]);
1323+
const auto dd_pt = [dd, pt]() constexpr noexcept {
1324+
const batch_t ddi{dd[pt * 2]}, ddj{dd[pt * 2 + 1]};
1325+
return xsimd::zip_lo(ddi, ddj);
1326+
}();
13131327
// ceil offset, hence rounding, must match that in get_subgrid...
13141328
const auto i1 = (BIGINT)std::ceil(kx[pt] - ns2); // fine grid start indices
13151329
const auto i2 = (BIGINT)std::ceil(ky[pt] - ns2);

0 commit comments

Comments
 (0)