|
7 | 7 | /* Date: 25. Mar. 16 */ |
8 | 8 | /**************************************************************************/ |
9 | 9 |
|
| 10 | +#include <array> |
| 11 | +#include <tuple> |
10 | 12 | #include "ngcore_api.hpp" |
11 | | - |
12 | 13 | #include "simd_generic.hpp" |
13 | 14 |
|
| 15 | + |
14 | 16 | #ifndef __CUDA_ARCH__ |
15 | 17 |
|
16 | 18 | #ifdef NETGEN_ARCH_AMD64 |
@@ -87,6 +89,69 @@ namespace ngcore |
87 | 89 | // return SIMD<double,4>(HSum(s1), HSum(s2), HSum(s3), HSum(s4)); |
88 | 90 | return SIMD<double,4>(HSum(s1, s2), HSum(s3,s4)); |
89 | 91 | } |
| 92 | + |
| 93 | + |
| 94 | + |
| 95 | + template <typename T, size_t S> class MakeSimdCl; |
| 96 | + |
| 97 | + template <typename T, size_t S> |
| 98 | + auto MakeSimd (std::array<T,S> aa) { return MakeSimdCl(aa).Get(); } |
| 99 | + |
| 100 | + |
| 101 | + template <typename T, size_t S> |
| 102 | + class MakeSimdCl |
| 103 | + { |
| 104 | + std::array<T,S> a; |
| 105 | + public: |
| 106 | + MakeSimdCl (std::array<T,S> aa) : a(aa) { ; } |
| 107 | + auto Get() const |
| 108 | + { |
| 109 | + SIMD<T,S> sa( [this] (auto i) { return (this->a)[i]; }); |
| 110 | + return sa; |
| 111 | + } |
| 112 | + }; |
| 113 | + |
| 114 | + |
| 115 | + |
| 116 | + |
| 117 | + template <typename Tfirst, size_t S, typename ...Trest> |
| 118 | + class MakeSimdCl<std::tuple<Tfirst,Trest...>,S> |
| 119 | + { |
| 120 | + std::array<std::tuple<Tfirst,Trest...>,S> a; |
| 121 | + public: |
| 122 | + MakeSimdCl (std::array<std::tuple<Tfirst,Trest...>,S> aa) : a(aa) { ; } |
| 123 | + auto Get() const |
| 124 | + { |
| 125 | + std::array<Tfirst,S> a0; |
| 126 | + for (int i = 0; i < S; i++) |
| 127 | + a0[i] = std::get<0> (a[i]); |
| 128 | + |
| 129 | + if constexpr (std::tuple_size<std::tuple<Tfirst,Trest...>>::value == 1) |
| 130 | + { |
| 131 | + return std::tuple(MakeSimd(a0)); |
| 132 | + } |
| 133 | + else |
| 134 | + { |
| 135 | + std::array<std::tuple<Trest...>,S> arest; |
| 136 | + for (int i = 0; i < S; i++) |
| 137 | + arest[i] = skip_first(a[i]); |
| 138 | + |
| 139 | + return std::tuple_cat ( std::tuple (MakeSimd(a0)), MakeSimd(arest) ); |
| 140 | + } |
| 141 | + } |
| 142 | + |
| 143 | + template <typename... Ts> |
| 144 | + static auto skip_first(const std::tuple<Ts...>& t) { |
| 145 | + return std::apply([](auto first, auto... rest) { |
| 146 | + return std::make_tuple(rest...); |
| 147 | + }, t); |
| 148 | + } |
| 149 | + }; |
| 150 | + |
| 151 | + |
| 152 | + |
| 153 | + |
| 154 | + |
90 | 155 | } |
91 | 156 |
|
92 | 157 |
|
|
0 commit comments