@@ -818,32 +818,28 @@ void interp_line(FLT *FINUFFT_RESTRICT target, const FLT *du, const FLT *ker,
818818*/
819819{
820820
821- FLT out[] = { 0.0 , 0. 0 };
822- BIGINT j = i1;
821+ std::array< FLT, 2 > out{ 0 };
822+ BIGINT j = i1;
823823 if (FINUFFT_UNLIKELY (i1 < 0 )) { // wraps at left
824824 j += N1;
825- for (UBIGINT dx = 0 ; dx < -i1; ++dx) {
826- out[0 ] += du[2 * j] * ker[dx];
827- out[1 ] += du[2 * j + 1 ] * ker[dx];
828- ++j;
825+ for (UBIGINT dx = 0 ; dx < -i1; ++dx, ++j) {
826+ out[0 ] = xsimd::fma (du[2 * j], ker[dx], out[0 ]);
827+ out[1 ] = xsimd::fma (du[2 * j + 1 ], ker[dx], out[1 ]);
829828 }
830829 j -= N1;
831- for (UBIGINT dx = -i1; dx < ns; ++dx) {
832- out[0 ] += du[2 * j] * ker[dx];
833- out[1 ] += du[2 * j + 1 ] * ker[dx];
834- ++j;
830+ for (UBIGINT dx = -i1; dx < ns; ++dx, ++j) {
831+ out[0 ] = xsimd::fma (du[2 * j], ker[dx], out[0 ]);
832+ out[1 ] = xsimd::fma (du[2 * j + 1 ], ker[dx], out[1 ]);
835833 }
836834 } else if (FINUFFT_UNLIKELY (i1 + ns >= N1)) { // wraps at right
837- for (int dx = 0 ; dx < N1 - i1; ++dx) {
838- out[0 ] += du[2 * j] * ker[dx];
839- out[1 ] += du[2 * j + 1 ] * ker[dx];
840- ++j;
835+ for (int dx = 0 ; dx < N1 - i1; ++dx, ++j) {
836+ out[0 ] = xsimd::fma (du[2 * j], ker[dx], out[0 ]);
837+ out[1 ] = xsimd::fma (du[2 * j + 1 ], ker[dx], out[1 ]);
841838 }
842839 j -= N1;
843- for (UBIGINT dx = N1 - i1; dx < ns; ++dx) {
844- out[0 ] += du[2 * j] * ker[dx];
845- out[1 ] += du[2 * j + 1 ] * ker[dx];
846- ++j;
840+ for (UBIGINT dx = N1 - i1; dx < ns; ++dx, ++j) {
841+ out[0 ] = xsimd::fma (du[2 * j], ker[dx], out[0 ]);
842+ out[1 ] = xsimd::fma (du[2 * j + 1 ], ker[dx], out[1 ]);
847843 }
848844 } else { // doesn't wrap
849845 using arch_t = typename simd_type::arch_type;
0 commit comments