diff --git a/source/module_lr/potentials/xc_kernel.cpp b/source/module_lr/potentials/xc_kernel.cpp index f5fa4f8fc8..7f7275ac54 100644 --- a/source/module_lr/potentials/xc_kernel.cpp +++ b/source/module_lr/potentials/xc_kernel.cpp @@ -5,11 +5,15 @@ #include "module_lr/utils/lr_util.h" #include "module_lr/utils/lr_util_xc.hpp" #include +#include #include "module_io/cube_io.h" #ifdef USE_LIBXC #include #include "module_hamilt_general/module_xc/xc_functional_libxc.h" #endif +#ifdef _OPENMP +#include +#endif LR::KernelXC::KernelXC(const ModulePW::PW_Basis& rho_basis, const UnitCell& ucell, @@ -66,6 +70,29 @@ LR::KernelXC::KernelXC(const ModulePW::PW_Basis& rho_basis, #endif } +template +inline void add_op(const T* const src1, const T* const src2, T* const dst, const int size) +{ +#ifdef _OPENMP +#pragma omp parallel for schedule(static, 4096) +#endif + for (int i = 0;i < size;++i) + { + dst[i] = src1[i] + src2[i]; + } +} +template +inline void add_op(const std::vector& src1, const std::vector& src2, std::vector& dst) +{ + assert(dst.size() >= src1.size() && src2.size() >= src1.size()); + add_op(src1.data(), src2.data(), dst.data(), src1.size()); +} +template +inline void add_assign_op(const std::vector& src, std::vector& dst) +{ + add_op(src, dst, dst); +} + #ifdef USE_LIBXC void LR::KernelXC::f_xc_libxc(const int& nspin, const double& omega, const double& tpiba, const double* const* const rho_gs, const double* const rho_core) { @@ -115,7 +142,8 @@ void LR::KernelXC::f_xc_libxc(const int& nspin, const double& omega, const doubl #ifdef _OPENMP #pragma omp parallel for schedule(static, 1024) #endif - for (int ir = 0; ir < nrxx; ++ir) rhor[ir] = rho[ir * nspin + is]; + for (int ir = 0; ir < nrxx; ++ir) { rhor[ir] = rho[ir * nspin + is]; +} gradrho[is].resize(nrxx); LR_Util::grad(rhor.data(), gradrho[is].data(), rho_basis_, tpiba); } @@ -126,8 +154,9 @@ void LR::KernelXC::f_xc_libxc(const int& nspin, const double& omega, const doubl #ifdef _OPENMP #pragma omp parallel for schedule(static, 1024) #endif - for (int ir = 0; ir < nrxx; ++ir) + for (int ir = 0; ir < nrxx; ++ir) { sigma[ir] = gradrho[0][ir] * gradrho[0][ir]; +} } else { @@ -144,13 +173,13 @@ void LR::KernelXC::f_xc_libxc(const int& nspin, const double& omega, const doubl } // ----------------------------------------------------------------------------------- //==================== XC Kernels (f_xc)============================= - this->vrho_.resize(nspin * nrxx); - this->v2rho2_.resize(((1 == nspin) ? 1 : 3) * nrxx);//(nrxx* ((1 == nspin) ? 1 : 3)): 00, 01, 11 + this->vrho_.resize(nspin * nrxx, 0.); + this->v2rho2_.resize(((1 == nspin) ? 1 : 3) * nrxx, 0.);//(nrxx* ((1 == nspin) ? 1 : 3)): 00, 01, 11 if (is_gga) { - this->vsigma_.resize(((1 == nspin) ? 1 : 3) * nrxx);//(nrxx*): 2 for rho * 3 for sigma: 00, 01, 02, 10, 11, 12 - this->v2rhosigma_.resize(((1 == nspin) ? 1 : 6) * nrxx); //(nrxx*): 2 for rho * 3 for sigma: 00, 01, 02, 10, 11, 12 - this->v2sigma2_.resize(((1 == nspin) ? 1 : 6) * nrxx); //(nrxx* ((1 == nspin) ? 1 : 6)): 00, 01, 02, 11, 12, 22 + this->vsigma_.resize(((1 == nspin) ? 1 : 3) * nrxx, 0.);//(nrxx*): 2 for rho * 3 for sigma: 00, 01, 02, 10, 11, 12 + this->v2rhosigma_.resize(((1 == nspin) ? 1 : 6) * nrxx, 0.); //(nrxx*): 2 for rho * 3 for sigma: 00, 01, 02, 10, 11, 12 + this->v2sigma2_.resize(((1 == nspin) ? 1 : 6) * nrxx, 0.); //(nrxx* ((1 == nspin) ? 1 : 6)): 00, 01, 02, 11, 12, 22 } //MetaGGA ... @@ -163,105 +192,85 @@ void LR::KernelXC::f_xc_libxc(const int& nspin, const double& omega, const doubl //cut off grho if not LDA (future subfunc) + // Libxc interfaces overwrite (instead of add onto) the output arrays, so we need temporary copies + std::vector vrho_tmp(this->vrho_.size()); + std::vector v2rho2_tmp(this->v2rho2_.size()); + std::vector vsigma_tmp(this->vsigma_.size()); + std::vector v2rhosigma_tmp(this->v2rhosigma_.size()); + std::vector v2sigma2_tmp(this->v2sigma2_.size()); switch (func.info->family) { case XC_FAMILY_LDA: - xc_lda_vxc(&func, nrxx, rho.data(), vrho_.data()); - xc_lda_fxc(&func, nrxx, rho.data(), v2rho2_.data()); + xc_lda_vxc(&func, nrxx, rho.data(), vrho_tmp.data()); + xc_lda_fxc(&func, nrxx, rho.data(), v2rho2_tmp.data()); break; case XC_FAMILY_GGA: case XC_FAMILY_HYB_GGA: - xc_gga_vxc(&func, nrxx, rho.data(), sigma.data(), vrho_.data(), vsigma_.data()); - xc_gga_fxc(&func, nrxx, rho.data(), sigma.data(), v2rho2_.data(), v2rhosigma_.data(), v2sigma2_.data()); + xc_gga_vxc(&func, nrxx, rho.data(), sigma.data(), vrho_tmp.data(), vsigma_tmp.data()); + xc_gga_fxc(&func, nrxx, rho.data(), sigma.data(), v2rho2_tmp.data(), v2rhosigma_tmp.data(), v2sigma2_tmp.data()); break; default: throw std::domain_error("func.info->family =" + std::to_string(func.info->family) + " unfinished in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); break; } - // some formulas for GGA - if (func.info->family == XC_FAMILY_GGA || func.info->family == XC_FAMILY_HYB_GGA) - { - const std::vector& v2r2 = this->v2rho2_; - const std::vector& v2rs = this->v2rhosigma_; - const std::vector& v2s2 = this->v2sigma2_; - const std::vector& vs = this->vsigma_; - const double tpiba2 = tpiba * tpiba; + // add onto the total components + // auto start = std::chrono::high_resolution_clock::now(); + add_assign_op(vrho_tmp, this->vrho_); + add_assign_op(v2rho2_tmp, this->v2rho2_); + add_assign_op(vsigma_tmp, this->vsigma_); + add_assign_op(v2rhosigma_tmp, this->v2rhosigma_); + add_assign_op(v2sigma2_tmp, this->v2sigma2_); + // auto end = std::chrono::high_resolution_clock::now(); + // auto duration = std::chrono::duration_cast(end - start); + // std::cout << "Time elapsed adding XC components: " << duration.count() << " ms\n"; + } // end for( xc_func_type &func : funcs ) + + XC_Functional_Libxc::finish_func(funcs); - if (1 == nspin) + // some postprocess if there're GGA funcs in the list + if (is_gga) + { + const std::vector& v2r2 = this->v2rho2_; + const std::vector& v2rs = this->v2rhosigma_; + const std::vector& v2s2 = this->v2sigma2_; + const std::vector& vs = this->vsigma_; + const double tpiba2 = tpiba * tpiba; + + if (nspin == 1) + { + // 0. drho + this->drho_gs_ = gradrho[0]; + // 1. $2f^{\rho\sigma}*\nabla\rho$ + this->v2rhosigma_2drho_.resize(nrxx); +#ifdef _OPENMP +#pragma omp parallel for schedule(static, 4096) +#endif + for (size_t i = 0; i < nrxx; ++i) { - using V3 = ModuleBase::Vector3; - // 0. drho - this->drho_gs_ = gradrho[0]; - // 1. $2f^{\rho\sigma}*\nabla\rho$ - this->v2rhosigma_2drho_.resize(nrxx); - std::transform(gradrho[0].begin(), gradrho[0].end(), v2rs.begin(), this->v2rhosigma_2drho_.begin(), - [](const V3& a, const V3& b) {return a * b * 2.; }); - // 2. $4f^{\sigma\sigma}*\nabla\rho$ - this->v2sigma2_4drho_.resize(nrxx); - std::transform(sigma.begin(), sigma.end(), v2s2.begin(), v2sigma2_4drho_.begin(), - [](const V3& a, const V3& b) {return a * b * 4.; }); + this->v2rhosigma_2drho_[i] = gradrho[0][i] * v2rs[i] * 2.; } - // else if (2 == nspin) // wrong, to be fixed - // { - // // 1. $\nabla\cdot(f^{\rho\sigma}*\nabla\rho)$ - // std::vector div_v2rhosigma_gdrho_r(3 * nrxx); - // std::vector> v2rhosigma_gdrho_r(3 * nrxx); - // for (int ir = 0; ir < nrxx; ++ir) - // { - // v2rhosigma_gdrho_r[ir] = gradrho[0][ir] * v2rs.at(ir * 6) * 4.0 - // + gradrho[1][ir] * v2rs.at(ir * 6 + 1) * 2.0; //up-up - // v2rhosigma_gdrho_r[nrxx + ir] = gradrho[0][ir] * (v2rs.at(ir * 6 + 3) * 2.0 + v2rs.at(ir * 6 + 1)) - // + gradrho[1][ir] * (v2rs.at(ir * 6 + 2) * 2.0 + v2rs.at(ir * 6 + 4)); //up-down - // v2rhosigma_gdrho_r[2 * nrxx + ir] = gradrho[1][ir] * v2rs.at(ir * 6 + 5) * 4.0 - // + gradrho[0][ir] * v2rs.at(ir * 6 + 4) * 2.0; //down-down - // } - // for (int isig = 0;isig < 3;++isig) - // XC_Functional::grad_dot(v2rhosigma_gdrho_r.data() + isig * nrxx, div_v2rhosigma_gdrho_r.data() + isig * nrxx, chg_gs.rhopw, tpiba); - // // 2. $\nabla^2(f^{\sigma\sigma}*\sigma)$ - // std::vector v2sigma2_sigma_r(3 * nrxx); - // for (int ir = 0; ir < nrxx; ++ir) - // { - // v2sigma2_sigma_r[ir] = v2s2.at(ir * 6) * sigma[ir * 3] * 4.0 - // + v2s2.at(ir * 6 + 1) * sigma[ir * 3 + 1] * 4.0 - // + v2s2.at(ir * 6 + 3) * sigma[ir * 3 + 2]; //up-up - // v2sigma2_sigma_r[nrxx + ir] = v2s2.at(ir * 6 + 1) * sigma[ir * 3] * 2.0 - // + v2s2.at(ir * 6 + 4) * sigma[ir * 3 + 2] * 2.0 - // + (v2s2.at(ir * 6 + 2) * 4.0 + v2s2.at(ir * 6 + 3)) * sigma[ir * 3 + 1]; //up-down - // v2sigma2_sigma_r[2 * nrxx + ir] = v2s2.at(ir * 6 + 5) * sigma[ir * 3 + 2] * 4.0 - // + v2s2.at(ir * 6 + 4) * sigma[ir * 3 + 1] * 4.0 - // + v2s2.at(ir * 6 + 3) * sigma[ir * 3]; //down-down - // } - // for (int isig = 0;isig < 3;++isig) - // LR_Util::lapl(v2sigma2_sigma_r.data() + isig * nrxx, v2sigma2_sigma_r.data() + isig * nrxx, *(chg_gs.rhopw), tpiba2); - // // 3. $\nabla^2(v^\sigma)$ - // std::vector lap_vsigma(3 * nrxx); - // for (int ir = 0;ir < nrxx;++ir) - // { - // lap_vsigma[ir] = vs.at(ir * 3) * 2.0; - // lap_vsigma[nrxx + ir] = vs.at(ir * 3 + 1); - // lap_vsigma[2 * nrxx + ir] = vs.at(ir * 3 + 2) * 2.0; - // } - // for (int isig = 0;isig < 3;++isig) - // LR_Util::lapl(lap_vsigma.data() + isig * nrxx, lap_vsigma.data() + isig * nrxx, *(chg_gs.rhopw), tpiba2); - // // add to v2rho2_ - // BlasConnector::axpy(3 * nrxx, 1.0, v2r2.data(), 1, to_mul_rho_.data(), 1); - // BlasConnector::axpy(3 * nrxx, -1.0, div_v2rhosigma_gdrho_r.data(), 1, to_mul_rho_.data(), 1); - // BlasConnector::axpy(3 * nrxx, 1.0, v2sigma2_sigma_r.data(), 1, to_mul_rho_.data(), 1); - // BlasConnector::axpy(3 * nrxx, 1.0, lap_vsigma.data(), 1, to_mul_rho_.data(), 1); - // } - else + + // 2. $4f^{\sigma\sigma}*\nabla\rho$ + this->v2sigma2_4drho_.resize(nrxx); +#ifdef _OPENMP +#pragma omp parallel for schedule(static, 4096) +#endif + for (size_t i = 0; i < nrxx; ++i) { - throw std::domain_error("nspin =" + std::to_string(nspin) - + " unfinished in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + this->v2sigma2_4drho_[i] = gradrho[0][i] * v2s2[i] * 4.; } } - } // end for( xc_func_type &func : funcs ) - XC_Functional_Libxc::finish_func(funcs); - - if (1 == PARAM.inp.nspin || 2 == PARAM.inp.nspin) return; + else + { + throw std::domain_error("nspin =" + std::to_string(nspin) + + " unfinished in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } + } + if (PARAM.inp.nspin == 1 || PARAM.inp.nspin == 2) { + return; // else if (4 == PARAM.inp.nspin) - else//NSPIN != 1,2,4 is not supported + } else//NSPIN != 1,2,4 is not supported { throw std::domain_error("PARAM.inp.nspin =" + std::to_string(PARAM.inp.nspin) + " unfinished in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); diff --git a/tests/integrate/291_NO_KP_LR/result.ref b/tests/integrate/291_NO_KP_LR/result.ref index 722b3fff61..b85ad819a5 100644 --- a/tests/integrate/291_NO_KP_LR/result.ref +++ b/tests/integrate/291_NO_KP_LR/result.ref @@ -1 +1 @@ -totexcitationenergyref 0.784274 \ No newline at end of file +totexcitationenergyref 0.755653 \ No newline at end of file diff --git a/tests/integrate/391_NO_GO_LR/result.ref b/tests/integrate/391_NO_GO_LR/result.ref index 850f003f46..5c65fd6538 100644 --- a/tests/integrate/391_NO_GO_LR/result.ref +++ b/tests/integrate/391_NO_GO_LR/result.ref @@ -1 +1 @@ -totexcitationenergyref 2.641190 \ No newline at end of file +totexcitationenergyref 2.510666 \ No newline at end of file diff --git a/tests/integrate/393_NO_GO_ULR_HF/INPUT b/tests/integrate/393_NO_GO_ULR_HF/INPUT index e9acbcda6d..c62c45b94e 100644 --- a/tests/integrate/393_NO_GO_ULR_HF/INPUT +++ b/tests/integrate/393_NO_GO_ULR_HF/INPUT @@ -27,12 +27,12 @@ mixing_type pulay mixing_beta 0.4 mixing_gg0 0 -dft_functional lda +dft_functional hf exx_real_number 1 exx_ccp_rmesh_times 1 lr_nstates 3 -xc_kernel lda +xc_kernel hf lr_solver lapack lr_thr 1e-2 lr_unrestricted 1 diff --git a/tests/integrate/393_NO_GO_ULR_HF/result.ref b/tests/integrate/393_NO_GO_ULR_HF/result.ref index e395c0fb9d..947ae2baba 100644 --- a/tests/integrate/393_NO_GO_ULR_HF/result.ref +++ b/tests/integrate/393_NO_GO_ULR_HF/result.ref @@ -1 +1 @@ -totexcitationenergyref 1.940703 \ No newline at end of file +totexcitationenergyref 2.187535 \ No newline at end of file