@@ -70,6 +70,29 @@ LR::KernelXC::KernelXC(const ModulePW::PW_Basis& rho_basis,
7070#endif
7171}
7272
73+ template <typename T>
74+ inline void add_op (const T* const src1, const T* const src2, T* const dst, const int size)
75+ {
76+ #ifdef _OPENMP
77+ #pragma omp parallel for schedule(static, 4096)
78+ #endif
79+ for (int i = 0 ;i < size;++i)
80+ {
81+ dst[i] = src1[i] + src2[i];
82+ }
83+ }
84+ template <typename T>
85+ inline void add_op (const std::vector<T>& src1, const std::vector<T>& src2, std::vector<T>& dst)
86+ {
87+ assert (dst.size () >= src1.size () && src2.size () >= src1.size ());
88+ add_op (src1.data (), src2.data (), dst.data (), src1.size ());
89+ }
90+ template <typename T>
91+ inline void add_assign_op (const std::vector<T>& src, std::vector<T>& dst)
92+ {
93+ add_op (src, dst, dst);
94+ }
95+
7396#ifdef USE_LIBXC
7497void LR::KernelXC::f_xc_libxc (const int & nspin, const double & omega, const double & tpiba, const double * const * const rho_gs, const double * const rho_core)
7598{
@@ -192,20 +215,12 @@ void LR::KernelXC::f_xc_libxc(const int& nspin, const double& omega, const doubl
192215 break ;
193216 }
194217 // add onto the total components
195- auto omp_add_vector = [](const std::vector<double >& src, std::vector<double >& dst)
196- {
197- assert (src.size () == dst.size ());
198- #ifdef _OPENMP
199- #pragma omp parallel for schedule(static, 4096)
200- #endif
201- for (size_t i = 0 ; i < src.size (); ++i) { dst[i] += src[i]; }
202- };
203218 // auto start = std::chrono::high_resolution_clock::now();
204- omp_add_vector (vrho_tmp, this ->vrho_ );
205- omp_add_vector (v2rho2_tmp, this ->v2rho2_ );
206- omp_add_vector (vsigma_tmp, this ->vsigma_ );
207- omp_add_vector (v2rhosigma_tmp, this ->v2rhosigma_ );
208- omp_add_vector (v2sigma2_tmp, this ->v2sigma2_ );
219+ add_assign_op (vrho_tmp, this ->vrho_ );
220+ add_assign_op (v2rho2_tmp, this ->v2rho2_ );
221+ add_assign_op (vsigma_tmp, this ->vsigma_ );
222+ add_assign_op (v2rhosigma_tmp, this ->v2rhosigma_ );
223+ add_assign_op (v2sigma2_tmp, this ->v2sigma2_ );
209224 // auto end = std::chrono::high_resolution_clock::now();
210225 // auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
211226 // std::cout << "Time elapsed adding XC components: " << duration.count() << " ms\n";
@@ -229,14 +244,14 @@ void LR::KernelXC::f_xc_libxc(const int& nspin, const double& omega, const doubl
229244 // 1. $2f^{\rho\sigma}*\nabla\rho$
230245 this ->v2rhosigma_2drho_ .resize (nrxx);
231246#ifdef _OPENMP
232- #pragma omp parallel for
247+ #pragma omp parallel for schedule(static, 4096)
233248#endif
234249 for (size_t i = 0 ; i < nrxx; ++i) { this ->v2rhosigma_2drho_ [i] = gradrho[0 ][i] * v2rs[i] * 2 .; }
235250
236251 // 2. $4f^{\sigma\sigma}*\nabla\rho$
237252 this ->v2sigma2_4drho_ .resize (nrxx);
238253#ifdef _OPENMP
239- #pragma omp parallel for
254+ #pragma omp parallel for schedule(static, 4096)
240255#endif
241256 for (size_t i = 0 ; i < nrxx; ++i) { this ->v2sigma2_4drho_ [i] = gradrho[0 ][i] * v2s2[i] * 4 .; }
242257 }
0 commit comments