Skip to content

Commit e30a3fa

Browse files
authored
Revert "Horner's rule for polynomial evaluation with symmetry idea diccussed …"
This reverts commit 4ea0096.
1 parent 4ea0096 commit e30a3fa

File tree

1 file changed

+8
-77
lines changed

1 file changed

+8
-77
lines changed

src/spreadinterp.cpp

Lines changed: 8 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,6 @@ namespace { // anonymous namespace for internal structs equivalent to declaring
2424
// static
2525
struct zip_low;
2626
struct zip_hi;
27-
template<unsigned cap> struct reverse_index;
28-
template<unsigned cap> struct shuffle_index;
2927
struct select_even;
3028
struct select_odd;
3129
// forward declaration to clean up the code and be able to use this everywhere in the file
@@ -779,80 +777,23 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */
779777
const FLT z = std::fma(FLT(2.0), x, FLT(w - 1)); // scale so local grid offset z in
780778
// [-1,1]
781779
if (opts.upsampfac == 2.0) { // floating point equality is fine here
782-
using arch_t = typename simd_type::arch_type;
783-
static constexpr auto alignment = arch_t::alignment();
780+
static constexpr auto alignment = simd_type::arch_type::alignment();
784781
static constexpr auto simd_size = simd_type::size;
785782
static constexpr auto padded_ns = (w + simd_size - 1) & ~(simd_size - 1);
786783
static constexpr auto nc = nc200<w>();
787784
static constexpr auto horner_coeffs = get_horner_coeffs_200<FLT, w>();
788-
static constexpr auto use_ker_sym = (simd_size < w);
789785

790786
alignas(alignment) static constexpr auto padded_coeffs =
791787
pad_2D_array_with_zeros<FLT, nc, w, padded_ns>(horner_coeffs);
792788

793-
// use kernel symmetry trick if w > simd_size
794-
if constexpr (use_ker_sym) {
795-
static constexpr uint8_t tail = w % simd_size;
796-
static constexpr uint8_t if_odd_degree = ((nc + 1) % 2);
797-
static constexpr uint8_t offset_start = tail ? w - tail : w - simd_size;
798-
static constexpr uint8_t end_idx = (w + (tail > 0)) / 2;
799-
const simd_type zv{z};
800-
const auto z2v = zv * zv;
801-
802-
// some xsimd constant for shuffle or inverse
803-
static constexpr auto shuffle_batch = []() constexpr noexcept {
804-
if constexpr (tail) {
805-
return xsimd::make_batch_constant<xsimd::as_unsigned_integer_t<FLT>, arch_t,
806-
shuffle_index<tail>>();
807-
} else {
808-
return xsimd::make_batch_constant<xsimd::as_unsigned_integer_t<FLT>, arch_t,
809-
reverse_index<simd_size>>();
810-
}
811-
}();
812-
813-
// process simd vecs
814-
simd_type k_prev, k_sym{0};
815-
for (uint8_t i{0}, offset = offset_start; i < end_idx;
816-
i += simd_size, offset -= simd_size) {
817-
auto k_odd = [i]() constexpr noexcept {
818-
if constexpr (if_odd_degree) {
819-
return simd_type::load_aligned(padded_coeffs[0].data() + i);
820-
} else {
821-
return simd_type{0};
822-
}
823-
}();
824-
auto k_even = simd_type::load_aligned(padded_coeffs[if_odd_degree].data() + i);
825-
for (uint8_t j{1 + if_odd_degree}; j < nc; j += 2) {
826-
const auto cji_odd = simd_type::load_aligned(padded_coeffs[j].data() + i);
827-
const auto cji_even = simd_type::load_aligned(padded_coeffs[j + 1].data() + i);
828-
k_odd = xsimd::fma(k_odd, z2v, cji_odd);
829-
k_even = xsimd::fma(k_even, z2v, cji_even);
830-
}
831-
// left part
832-
xsimd::fma(k_odd, zv, k_even).store_aligned(ker + i);
833-
// right part symmetric to the left part
834-
if (offset >= end_idx) {
835-
if constexpr (tail) {
836-
// to use aligned store, we need shuffle the previous k_sym and current k_sym
837-
k_prev = k_sym;
838-
k_sym = xsimd::fnma(k_odd, zv, k_even);
839-
xsimd::shuffle(k_sym, k_prev, shuffle_batch).store_aligned(ker + offset);
840-
} else {
841-
xsimd::swizzle(xsimd::fnma(k_odd, zv, k_even), shuffle_batch)
842-
.store_aligned(ker + offset);
843-
}
844-
}
845-
}
846-
} else {
847-
const simd_type zv(z);
848-
for (uint8_t i = 0; i < w; i += simd_size) {
849-
auto k = simd_type::load_aligned(padded_coeffs[0].data() + i);
850-
for (uint8_t j = 1; j < nc; ++j) {
851-
const auto cji = simd_type::load_aligned(padded_coeffs[j].data() + i);
852-
k = xsimd::fma(k, zv, cji);
853-
}
854-
k.store_aligned(ker + i);
789+
const simd_type zv(z);
790+
for (uint8_t i = 0; i < w; i += simd_size) {
791+
auto k = simd_type::load_aligned(padded_coeffs[0].data() + i);
792+
for (uint8_t j = 1; j < nc; ++j) {
793+
const auto cji = simd_type::load_aligned(padded_coeffs[j].data() + i);
794+
k = xsimd::fma(k, zv, cji);
855795
}
796+
k.store_aligned(ker + i);
856797
}
857798
return;
858799
}
@@ -2227,16 +2168,6 @@ struct zip_hi {
22272168
return (size + index) / 2;
22282169
}
22292170
};
2230-
template<unsigned cap> struct reverse_index {
2231-
static constexpr unsigned get(unsigned index, const unsigned size) {
2232-
return index < cap ? (cap - 1 - index) : index;
2233-
}
2234-
};
2235-
template<unsigned cap> struct shuffle_index {
2236-
static constexpr unsigned get(unsigned index, const unsigned size) {
2237-
return index < cap ? (cap - 1 - index) : size + size + cap - 1 - index;
2238-
}
2239-
};
22402171

22412172
struct select_even {
22422173
static constexpr unsigned get(unsigned index, unsigned /*size*/) { return index * 2; }

0 commit comments

Comments
 (0)