12
12
// for use in optimized ExecuTorch ops. Template specializations of BFloat16
13
13
// are excluded.
14
14
15
- #include < executorch/kernels/optimized /vec/vec.h>
15
+ #include < ATen/cpu /vec/vec.h>
16
16
17
17
#include < executorch/kernels/optimized/utils/math_utils.h>
18
18
#include < executorch/runtime/platform/compiler.h>
@@ -47,12 +47,12 @@ void AddMoments(
47
47
template <typename T>
48
48
ET_INLINE void AddMomentsVec (
49
49
int64_t m0_add,
50
- const executorch ::vec::Vectorized<T>& m1_add,
51
- const executorch ::vec::Vectorized<T>& m2_add,
50
+ const at ::vec::Vectorized<T>& m1_add,
51
+ const at ::vec::Vectorized<T>& m2_add,
52
52
int64_t & m0,
53
- executorch ::vec::Vectorized<T>& m1,
54
- executorch ::vec::Vectorized<T>& m2) {
55
- using Vec = executorch ::vec::Vectorized<T>;
53
+ at ::vec::Vectorized<T>& m1,
54
+ at ::vec::Vectorized<T>& m2) {
55
+ using Vec = at ::vec::Vectorized<T>;
56
56
const int64_t n = m0 + m0_add;
57
57
const T c =
58
58
n == 0 ? static_cast <T>(0 ) : static_cast <T>(m0_add) / static_cast <T>(n);
@@ -67,11 +67,11 @@ template <typename T>
67
67
inline void UpdateMomentsVec (
68
68
int64_t m0,
69
69
const T* X_ptr,
70
- const std::array<executorch ::vec::Vectorized<acc_t <T>>, kChunkSize >& c_vecs,
70
+ const std::array<at ::vec::Vectorized<acc_t <T>>, kChunkSize >& c_vecs,
71
71
int64_t & m0_stk0,
72
- executorch ::vec::Vectorized<acc_t <T>>& m1_stk0,
73
- executorch ::vec::Vectorized<acc_t <T>>& m2_stk0) {
74
- using Vec = executorch ::vec::Vectorized<acc_t <T>>;
72
+ at ::vec::Vectorized<acc_t <T>>& m1_stk0,
73
+ at ::vec::Vectorized<acc_t <T>>& m2_stk0) {
74
+ using Vec = at ::vec::Vectorized<acc_t <T>>;
75
75
Vec m1_vec (0 );
76
76
Vec m2_vec (0 );
77
77
for (int64_t j = 0 ; j < m0; ++j) {
@@ -92,13 +92,13 @@ std::pair<acc_t<T>, acc_t<T>>
92
92
RowwiseMomentsImpl (const T* X, int64_t N, int64_t ddof = 0 ) {
93
93
using T_ACC = acc_t <T>;
94
94
95
- constexpr int64_t kVecSize = executorch ::vec::Vectorized<T>::size ();
96
- constexpr int64_t kAccVecSize = executorch ::vec::Vectorized<T_ACC>::size ();
95
+ constexpr int64_t kVecSize = at ::vec::Vectorized<T>::size ();
96
+ constexpr int64_t kAccVecSize = at ::vec::Vectorized<T_ACC>::size ();
97
97
const int64_t n = N / kVecSize ;
98
98
const int64_t m = executorch::utils::divup (n, kChunkSize );
99
99
const int64_t depth = executorch::utils::CeilLog2 (m);
100
100
101
- using Vec = executorch ::vec::Vectorized<T_ACC>;
101
+ using Vec = at ::vec::Vectorized<T_ACC>;
102
102
const Vec kZeroVec (T_ACC (0 ));
103
103
std::array<int64_t , kMaxDepth > m0_stk;
104
104
std::array<Vec, kMaxDepth > m1_stk;
@@ -168,7 +168,7 @@ RowwiseMomentsImpl(const T* X, int64_t N, int64_t ddof = 0) {
168
168
template <typename T>
169
169
std::pair<acc_t <T>, acc_t <T>>
170
170
RowwiseMoments (const T* X, int64_t N, int64_t ddof = 0 ) {
171
- using Vec = executorch ::vec::Vectorized<T>;
171
+ using Vec = at ::vec::Vectorized<T>;
172
172
constexpr int64_t kVecSize = Vec::size ();
173
173
const int64_t n = N / kVecSize ;
174
174
const int64_t m = executorch::utils::divup (n, kChunkSize );
0 commit comments