@@ -62,47 +62,53 @@ op_dot::direct_dot_generic(const uword n_elem, const eT* const A, const eT* cons
6262
6363
6464
65- // ! generic version for non-complex values with forced optimisation under GCC
66- template <typename eT>
67- #if defined(ARMA_REAL_GCC) && !defined(ARMA_DONT_FORCE_OPTIMISE_DOT)
68- __attribute__ ((optimize(" O3" , " fast-math" )))
69- #endif
70- inline
71- typename arma_not_cx<eT>::result
72- op_dot::direct_dot_generic_force_optimise (const uword n_elem, const eT* const A, const eT* const B)
73- {
74- arma_debug_sigprint ();
75-
76- #if defined(__FAST_MATH__)
77- {
78- eT val = eT (0 );
79-
80- for (uword i=0 ; i < n_elem; ++i) { val += (A[i] * B[i]); }
81-
82- return val;
83- }
84- #else
85- {
86- eT val1 = eT (0 );
87- eT val2 = eT (0 );
88-
89- uword i, j;
90-
91- for (i=0 , j=1 ; j < n_elem; i+=2 , j+=2 )
92- {
93- val1 += (A[i] * B[i]);
94- val2 += (A[j] * B[j]);
95- }
96-
97- if (i < n_elem)
98- {
99- val1 += (A[i] * B[i]);
100- }
101-
102- return (val1 + val2);
103- }
104- #endif
105- }
65+ // //! generic version for non-complex values with forced SIMD optimisation under OpenMP
66+ // template<typename eT>
67+ // inline
68+ // typename arma_not_cx<eT>::result
69+ // op_dot::direct_dot_generic_force_optimise(const uword n_elem, const eT* const A, const eT* const B)
70+ // {
71+ // arma_debug_sigprint();
72+ //
73+ // #if defined(ARMA_USE_OPENMP)
74+ // {
75+ // eT val = eT(0);
76+ //
77+ // #pragma omp simd
78+ // for(uword i=0; i < n_elem; ++i) { val += (A[i] * B[i]); }
79+ //
80+ // return val;
81+ // }
82+ // #elif defined(__FAST_MATH__)
83+ // {
84+ // eT val = eT(0);
85+ //
86+ // for(uword i=0; i < n_elem; ++i) { val += (A[i] * B[i]); }
87+ //
88+ // return val;
89+ // }
90+ // #else
91+ // {
92+ // eT val1 = eT(0);
93+ // eT val2 = eT(0);
94+ //
95+ // uword i, j;
96+ //
97+ // for(i=0, j=1; j < n_elem; i+=2, j+=2)
98+ // {
99+ // val1 += (A[i] * B[i]);
100+ // val2 += (A[j] * B[j]);
101+ // }
102+ //
103+ // if(i < n_elem)
104+ // {
105+ // val1 += (A[i] * B[i]);
106+ // }
107+ //
108+ // return (val1 + val2);
109+ // }
110+ // #endif
111+ // }
106112
107113
108114
@@ -209,7 +215,9 @@ op_dot::direct_dot(const uword n_elem, const eT* const A, const eT* const B)
209215 {
210216 arma_debug_sigprint ();
211217
212- return (n_elem <= 32u ) ? op_dot::direct_dot_generic (n_elem, A, B) : op_dot::direct_dot_generic_force_optimise (n_elem, A, B);
218+ // return (n_elem <= 32u) ? op_dot::direct_dot_generic(n_elem, A, B) : op_dot::direct_dot_generic_force_optimise(n_elem, A, B);
219+
220+ return op_dot::direct_dot_generic (n_elem, A, B);
213221 }
214222
215223
0 commit comments