Skip to content

Commit 5d56fa1

Browse files
committed
using fma where possible
1 parent 0ce3cb9 commit 5d56fa1

File tree

1 file changed

+14
-18
lines changed

1 file changed

+14
-18
lines changed

src/spreadinterp.cpp

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -818,32 +818,28 @@ void interp_line(FLT *FINUFFT_RESTRICT target, const FLT *du, const FLT *ker,
818818
*/
819819
{
820820

821-
FLT out[] = {0.0, 0.0};
822-
BIGINT j = i1;
821+
std::array<FLT, 2> out{0};
822+
BIGINT j = i1;
823823
if (FINUFFT_UNLIKELY(i1 < 0)) { // wraps at left
824824
j += N1;
825-
for (UBIGINT dx = 0; dx < -i1; ++dx) {
826-
out[0] += du[2 * j] * ker[dx];
827-
out[1] += du[2 * j + 1] * ker[dx];
828-
++j;
825+
for (UBIGINT dx = 0; dx < -i1; ++dx, ++j) {
826+
out[0] = xsimd::fma(du[2 * j], ker[dx], out[0]);
827+
out[1] = xsimd::fma(du[2 * j + 1], ker[dx], out[1]);
829828
}
830829
j -= N1;
831-
for (UBIGINT dx = -i1; dx < ns; ++dx) {
832-
out[0] += du[2 * j] * ker[dx];
833-
out[1] += du[2 * j + 1] * ker[dx];
834-
++j;
830+
for (UBIGINT dx = -i1; dx < ns; ++dx, ++j) {
831+
out[0] = xsimd::fma(du[2 * j], ker[dx], out[0]);
832+
out[1] = xsimd::fma(du[2 * j + 1], ker[dx], out[1]);
835833
}
836834
} else if (FINUFFT_UNLIKELY(i1 + ns >= N1)) { // wraps at right
837-
for (int dx = 0; dx < N1 - i1; ++dx) {
838-
out[0] += du[2 * j] * ker[dx];
839-
out[1] += du[2 * j + 1] * ker[dx];
840-
++j;
835+
for (int dx = 0; dx < N1 - i1; ++dx, ++j) {
836+
out[0] = xsimd::fma(du[2 * j], ker[dx], out[0]);
837+
out[1] = xsimd::fma(du[2 * j + 1], ker[dx], out[1]);
841838
}
842839
j -= N1;
843-
for (UBIGINT dx = N1 - i1; dx < ns; ++dx) {
844-
out[0] += du[2 * j] * ker[dx];
845-
out[1] += du[2 * j + 1] * ker[dx];
846-
++j;
840+
for (UBIGINT dx = N1 - i1; dx < ns; ++dx, ++j) {
841+
out[0] = xsimd::fma(du[2 * j], ker[dx], out[0]);
842+
out[1] = xsimd::fma(du[2 * j + 1], ker[dx], out[1]);
847843
}
848844
} else { // doesn't wrap
849845
using arch_t = typename simd_type::arch_type;

0 commit comments

Comments
 (0)