@@ -798,7 +798,7 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */
798798 const simd_type zv (z);
799799 const simd_type z2v = zv * zv;
800800
801- // no xsimd::select neeeded if tail is zero
801+ // no xsimd::shuffle neeeded if tail is zero
802802 if constexpr (tail) {
803803 // some xsimd constant for shuffle
804804 static constexpr auto shuffle_batch =
@@ -819,8 +819,11 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */
819819 simd_type::load_aligned (padded_coeffs[j + 1 ].data () + i);
820820 k_even = xsimd::fma (k_even, z2v, cji_even);
821821 }
822+ // left part
822823 xsimd::fma (k_odd, zv, k_even).store_aligned (ker + i);
824+ // right part symmetric to the left part
823825 if (offset >= (w + 1 ) / 2 ) {
826+ // to use aligned store, we need shuffle the previous k_sym and current k_sym
824827 k_prev = k_sym;
825828 k_sym = xsimd::fma (k_odd, -zv, k_even);
826829 xsimd::shuffle (k_sym, k_prev, shuffle_batch).store_aligned (ker + offset);
@@ -846,8 +849,11 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */
846849 simd_type::load_aligned (padded_coeffs[j + 1 ].data () + i);
847850 k_even = xsimd::fma (k_even, z2v, cji_even);
848851 }
852+ // left part
849853 xsimd::fma (k_odd, zv, k_even).store_aligned (ker + i);
854+ // right part symmetric to the left part
850855 if (offset >= w / 2 ) {
856+ // reverse the order for symmetric part
851857 xsimd::swizzle (xsimd::fma (k_odd, -zv, k_even), reverse_batch)
852858 .store_aligned (ker + offset);
853859 }
0 commit comments