1212// for use in optimized ExecuTorch ops. Template specializations of BFloat16
1313// are excluded.
1414
15- #include < executorch/kernels/optimized /vec/vec.h>
15+ #include < ATen/cpu /vec/vec.h>
1616
1717#include < executorch/kernels/optimized/utils/math_utils.h>
1818#include < executorch/runtime/platform/compiler.h>
@@ -47,12 +47,12 @@ void AddMoments(
4747template <typename T>
4848ET_INLINE void AddMomentsVec (
4949 int64_t m0_add,
50- const executorch ::vec::Vectorized<T>& m1_add,
51- const executorch ::vec::Vectorized<T>& m2_add,
50+ const at ::vec::Vectorized<T>& m1_add,
51+ const at ::vec::Vectorized<T>& m2_add,
5252 int64_t & m0,
53- executorch ::vec::Vectorized<T>& m1,
54- executorch ::vec::Vectorized<T>& m2) {
55- using Vec = executorch ::vec::Vectorized<T>;
53+ at ::vec::Vectorized<T>& m1,
54+ at ::vec::Vectorized<T>& m2) {
55+ using Vec = at ::vec::Vectorized<T>;
5656 const int64_t n = m0 + m0_add;
5757 const T c =
5858 n == 0 ? static_cast <T>(0 ) : static_cast <T>(m0_add) / static_cast <T>(n);
@@ -67,11 +67,11 @@ template <typename T>
6767inline void UpdateMomentsVec (
6868 int64_t m0,
6969 const T* X_ptr,
70- const std::array<executorch ::vec::Vectorized<acc_t <T>>, kChunkSize >& c_vecs,
70+ const std::array<at ::vec::Vectorized<acc_t <T>>, kChunkSize >& c_vecs,
7171 int64_t & m0_stk0,
72- executorch ::vec::Vectorized<acc_t <T>>& m1_stk0,
73- executorch ::vec::Vectorized<acc_t <T>>& m2_stk0) {
74- using Vec = executorch ::vec::Vectorized<acc_t <T>>;
72+ at ::vec::Vectorized<acc_t <T>>& m1_stk0,
73+ at ::vec::Vectorized<acc_t <T>>& m2_stk0) {
74+ using Vec = at ::vec::Vectorized<acc_t <T>>;
7575 Vec m1_vec (0 );
7676 Vec m2_vec (0 );
7777 for (int64_t j = 0 ; j < m0; ++j) {
@@ -92,13 +92,13 @@ std::pair<acc_t<T>, acc_t<T>>
9292RowwiseMomentsImpl (const T* X, int64_t N, int64_t ddof = 0 ) {
9393 using T_ACC = acc_t <T>;
9494
95- constexpr int64_t kVecSize = executorch ::vec::Vectorized<T>::size ();
96- constexpr int64_t kAccVecSize = executorch ::vec::Vectorized<T_ACC>::size ();
95+ constexpr int64_t kVecSize = at ::vec::Vectorized<T>::size ();
96+ constexpr int64_t kAccVecSize = at ::vec::Vectorized<T_ACC>::size ();
9797 const int64_t n = N / kVecSize ;
9898 const int64_t m = executorch::utils::divup (n, kChunkSize );
9999 const int64_t depth = executorch::utils::CeilLog2 (m);
100100
101- using Vec = executorch ::vec::Vectorized<T_ACC>;
101+ using Vec = at ::vec::Vectorized<T_ACC>;
102102 const Vec kZeroVec (T_ACC (0 ));
103103 std::array<int64_t , kMaxDepth > m0_stk;
104104 std::array<Vec, kMaxDepth > m1_stk;
@@ -168,7 +168,7 @@ RowwiseMomentsImpl(const T* X, int64_t N, int64_t ddof = 0) {
168168template <typename T>
169169std::pair<acc_t <T>, acc_t <T>>
170170RowwiseMoments (const T* X, int64_t N, int64_t ddof = 0 ) {
171- using Vec = executorch ::vec::Vectorized<T>;
171+ using Vec = at ::vec::Vectorized<T>;
172172 constexpr int64_t kVecSize = Vec::size ();
173173 const int64_t n = N / kVecSize ;
174174 const int64_t m = executorch::utils::divup (n, kChunkSize );
0 commit comments