@@ -796,8 +796,8 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */
796796 static constexpr uint8_t if_odd_degree = ((nc + 1 ) % 2 );
797797 static constexpr uint8_t offset_start = tail ? w - tail : w - simd_size;
798798 static constexpr uint8_t end_idx = (w + (tail > 0 )) / 2 ;
799- const simd_type zv (z) ;
800- const simd_type z2v = zv * zv;
799+ const simd_type zv{z} ;
800+ const simd_type z2v{ zv * zv} ;
801801
802802 // some xsimd constant for shuffle or inverse
803803 static constexpr auto shuffle_batch = []() constexpr noexcept {
@@ -811,21 +811,24 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */
811811 }();
812812
813813 // process simd vecs
814- simd_type k_odd, k_even, k_prev, k_sym{0 };
815- for (uint8_t i = 0 , offset = offset_start; i < end_idx;
814+ struct EmptySimd {};
815+ // these exist only if tail > 0
816+ typename std::conditional<(tail > 0 ), simd_type, EmptySimd>::type k_prev, k_sym;
817+ if constexpr (tail) k_sym = {0 };
818+ for (uint8_t i{0 }, offset = offset_start; i < end_idx;
816819 i += simd_size, offset -= simd_size) {
817- k_odd = [i]() constexpr noexcept {
820+ auto k_odd = [i]() constexpr noexcept {
818821 if constexpr (if_odd_degree) {
819822 return simd_type::load_aligned (padded_coeffs[0 ].data () + i);
820823 } else {
821824 return simd_type{0 };
822825 }
823826 }();
824- k_even = simd_type::load_aligned (padded_coeffs[if_odd_degree].data () + i);
825- for (uint8_t j = 1 + if_odd_degree; j < nc; j += 2 ) {
827+ auto k_even = simd_type::load_aligned (padded_coeffs[if_odd_degree].data () + i);
828+ for (uint8_t j{ 1 + if_odd_degree} ; j < nc; j += 2 ) {
826829 const auto cji_odd = simd_type::load_aligned (padded_coeffs[j].data () + i);
827- k_odd = xsimd::fma (k_odd, z2v, cji_odd);
828830 const auto cji_even = simd_type::load_aligned (padded_coeffs[j + 1 ].data () + i);
831+ k_odd = xsimd::fma (k_odd, z2v, cji_odd);
829832 k_even = xsimd::fma (k_even, z2v, cji_even);
830833 }
831834 // left part
@@ -845,7 +848,6 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */
845848 }
846849 } else {
847850 const simd_type zv (z);
848-
849851 for (uint8_t i = 0 ; i < w; i += simd_size) {
850852 auto k = simd_type::load_aligned (padded_coeffs[0 ].data () + i);
851853 for (uint8_t j = 1 ; j < nc; ++j) {
@@ -855,7 +857,6 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */
855857 k.store_aligned (ker + i);
856858 }
857859 }
858-
859860 return ;
860861 }
861862 // insert the auto-generated code which expects z, w args, writes to ker...
0 commit comments