Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 96 additions & 87 deletions source/module_lr/potentials/xc_kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,15 @@
#include "module_lr/utils/lr_util.h"
#include "module_lr/utils/lr_util_xc.hpp"
#include <set>
#include <chrono>
#include "module_io/cube_io.h"
#ifdef USE_LIBXC
#include <xc.h>
#include "module_hamilt_general/module_xc/xc_functional_libxc.h"
#endif
#ifdef _OPENMP
#include <omp.h>
#endif

LR::KernelXC::KernelXC(const ModulePW::PW_Basis& rho_basis,
const UnitCell& ucell,
Expand Down Expand Up @@ -66,6 +70,29 @@ LR::KernelXC::KernelXC(const ModulePW::PW_Basis& rho_basis,
#endif
}

template <typename T>
inline void add_op(const T* const src1, const T* const src2, T* const dst, const int size)
{
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 4096)
#endif
for (int i = 0;i < size;++i)
{
dst[i] = src1[i] + src2[i];
}
}
template <typename T>
inline void add_op(const std::vector<T>& src1, const std::vector<T>& src2, std::vector<T>& dst)
{
assert(dst.size() >= src1.size() && src2.size() >= src1.size());
add_op(src1.data(), src2.data(), dst.data(), src1.size());
}
template <typename T>
inline void add_assign_op(const std::vector<T>& src, std::vector<T>& dst)
{
add_op(src, dst, dst);
}

#ifdef USE_LIBXC
void LR::KernelXC::f_xc_libxc(const int& nspin, const double& omega, const double& tpiba, const double* const* const rho_gs, const double* const rho_core)
{
Expand Down Expand Up @@ -115,7 +142,8 @@ void LR::KernelXC::f_xc_libxc(const int& nspin, const double& omega, const doubl
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 1024)
#endif
for (int ir = 0; ir < nrxx; ++ir) rhor[ir] = rho[ir * nspin + is];
for (int ir = 0; ir < nrxx; ++ir) { rhor[ir] = rho[ir * nspin + is];
}
gradrho[is].resize(nrxx);
LR_Util::grad(rhor.data(), gradrho[is].data(), rho_basis_, tpiba);
}
Expand All @@ -126,8 +154,9 @@ void LR::KernelXC::f_xc_libxc(const int& nspin, const double& omega, const doubl
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 1024)
#endif
for (int ir = 0; ir < nrxx; ++ir)
for (int ir = 0; ir < nrxx; ++ir) {
sigma[ir] = gradrho[0][ir] * gradrho[0][ir];
}
}
else
{
Expand All @@ -144,13 +173,13 @@ void LR::KernelXC::f_xc_libxc(const int& nspin, const double& omega, const doubl
}
// -----------------------------------------------------------------------------------
//==================== XC Kernels (f_xc)=============================
this->vrho_.resize(nspin * nrxx);
this->v2rho2_.resize(((1 == nspin) ? 1 : 3) * nrxx);//(nrxx* ((1 == nspin) ? 1 : 3)): 00, 01, 11
this->vrho_.resize(nspin * nrxx, 0.);
this->v2rho2_.resize(((1 == nspin) ? 1 : 3) * nrxx, 0.);//(nrxx* ((1 == nspin) ? 1 : 3)): 00, 01, 11
if (is_gga)
{
this->vsigma_.resize(((1 == nspin) ? 1 : 3) * nrxx);//(nrxx*): 2 for rho * 3 for sigma: 00, 01, 02, 10, 11, 12
this->v2rhosigma_.resize(((1 == nspin) ? 1 : 6) * nrxx); //(nrxx*): 2 for rho * 3 for sigma: 00, 01, 02, 10, 11, 12
this->v2sigma2_.resize(((1 == nspin) ? 1 : 6) * nrxx); //(nrxx* ((1 == nspin) ? 1 : 6)): 00, 01, 02, 11, 12, 22
this->vsigma_.resize(((1 == nspin) ? 1 : 3) * nrxx, 0.);//(nrxx*): 2 for rho * 3 for sigma: 00, 01, 02, 10, 11, 12
this->v2rhosigma_.resize(((1 == nspin) ? 1 : 6) * nrxx, 0.); //(nrxx*): 2 for rho * 3 for sigma: 00, 01, 02, 10, 11, 12
this->v2sigma2_.resize(((1 == nspin) ? 1 : 6) * nrxx, 0.); //(nrxx* ((1 == nspin) ? 1 : 6)): 00, 01, 02, 11, 12, 22
}
//MetaGGA ...

Expand All @@ -163,105 +192,85 @@ void LR::KernelXC::f_xc_libxc(const int& nspin, const double& omega, const doubl

//cut off grho if not LDA (future subfunc)

// Libxc interfaces overwrite (instead of add onto) the output arrays, so we need temporary copies
std::vector<double> vrho_tmp(this->vrho_.size());
std::vector<double> v2rho2_tmp(this->v2rho2_.size());
std::vector<double> vsigma_tmp(this->vsigma_.size());
std::vector<double> v2rhosigma_tmp(this->v2rhosigma_.size());
std::vector<double> v2sigma2_tmp(this->v2sigma2_.size());
switch (func.info->family)
{
case XC_FAMILY_LDA:
xc_lda_vxc(&func, nrxx, rho.data(), vrho_.data());
xc_lda_fxc(&func, nrxx, rho.data(), v2rho2_.data());
xc_lda_vxc(&func, nrxx, rho.data(), vrho_tmp.data());
xc_lda_fxc(&func, nrxx, rho.data(), v2rho2_tmp.data());
break;
case XC_FAMILY_GGA:
case XC_FAMILY_HYB_GGA:
xc_gga_vxc(&func, nrxx, rho.data(), sigma.data(), vrho_.data(), vsigma_.data());
xc_gga_fxc(&func, nrxx, rho.data(), sigma.data(), v2rho2_.data(), v2rhosigma_.data(), v2sigma2_.data());
xc_gga_vxc(&func, nrxx, rho.data(), sigma.data(), vrho_tmp.data(), vsigma_tmp.data());
xc_gga_fxc(&func, nrxx, rho.data(), sigma.data(), v2rho2_tmp.data(), v2rhosigma_tmp.data(), v2sigma2_tmp.data());
break;
default:
throw std::domain_error("func.info->family =" + std::to_string(func.info->family)
+ " unfinished in " + std::string(__FILE__) + " line " + std::to_string(__LINE__));
break;
}
// some formulas for GGA
if (func.info->family == XC_FAMILY_GGA || func.info->family == XC_FAMILY_HYB_GGA)
{
const std::vector<double>& v2r2 = this->v2rho2_;
const std::vector<double>& v2rs = this->v2rhosigma_;
const std::vector<double>& v2s2 = this->v2sigma2_;
const std::vector<double>& vs = this->vsigma_;
const double tpiba2 = tpiba * tpiba;
// add onto the total components
// auto start = std::chrono::high_resolution_clock::now();
add_assign_op(vrho_tmp, this->vrho_);
add_assign_op(v2rho2_tmp, this->v2rho2_);
add_assign_op(vsigma_tmp, this->vsigma_);
add_assign_op(v2rhosigma_tmp, this->v2rhosigma_);
add_assign_op(v2sigma2_tmp, this->v2sigma2_);
// auto end = std::chrono::high_resolution_clock::now();
// auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
// std::cout << "Time elapsed adding XC components: " << duration.count() << " ms\n";
} // end for( xc_func_type &func : funcs )

XC_Functional_Libxc::finish_func(funcs);

if (1 == nspin)
// some postprocess if there're GGA funcs in the list
if (is_gga)
{
const std::vector<double>& v2r2 = this->v2rho2_;
const std::vector<double>& v2rs = this->v2rhosigma_;
const std::vector<double>& v2s2 = this->v2sigma2_;
const std::vector<double>& vs = this->vsigma_;
const double tpiba2 = tpiba * tpiba;

if (nspin == 1)
{
// 0. drho
this->drho_gs_ = gradrho[0];
// 1. $2f^{\rho\sigma}*\nabla\rho$
this->v2rhosigma_2drho_.resize(nrxx);
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 4096)
#endif
for (size_t i = 0; i < nrxx; ++i)
{
using V3 = ModuleBase::Vector3<double>;
// 0. drho
this->drho_gs_ = gradrho[0];
// 1. $2f^{\rho\sigma}*\nabla\rho$
this->v2rhosigma_2drho_.resize(nrxx);
std::transform(gradrho[0].begin(), gradrho[0].end(), v2rs.begin(), this->v2rhosigma_2drho_.begin(),
[](const V3& a, const V3& b) {return a * b * 2.; });
// 2. $4f^{\sigma\sigma}*\nabla\rho$
this->v2sigma2_4drho_.resize(nrxx);
std::transform(sigma.begin(), sigma.end(), v2s2.begin(), v2sigma2_4drho_.begin(),
[](const V3& a, const V3& b) {return a * b * 4.; });
this->v2rhosigma_2drho_[i] = gradrho[0][i] * v2rs[i] * 2.;
}
// else if (2 == nspin) // wrong, to be fixed
// {
// // 1. $\nabla\cdot(f^{\rho\sigma}*\nabla\rho)$
// std::vector<double> div_v2rhosigma_gdrho_r(3 * nrxx);
// std::vector<ModuleBase::Vector3<double>> v2rhosigma_gdrho_r(3 * nrxx);
// for (int ir = 0; ir < nrxx; ++ir)
// {
// v2rhosigma_gdrho_r[ir] = gradrho[0][ir] * v2rs.at(ir * 6) * 4.0
// + gradrho[1][ir] * v2rs.at(ir * 6 + 1) * 2.0; //up-up
// v2rhosigma_gdrho_r[nrxx + ir] = gradrho[0][ir] * (v2rs.at(ir * 6 + 3) * 2.0 + v2rs.at(ir * 6 + 1))
// + gradrho[1][ir] * (v2rs.at(ir * 6 + 2) * 2.0 + v2rs.at(ir * 6 + 4)); //up-down
// v2rhosigma_gdrho_r[2 * nrxx + ir] = gradrho[1][ir] * v2rs.at(ir * 6 + 5) * 4.0
// + gradrho[0][ir] * v2rs.at(ir * 6 + 4) * 2.0; //down-down
// }
// for (int isig = 0;isig < 3;++isig)
// XC_Functional::grad_dot(v2rhosigma_gdrho_r.data() + isig * nrxx, div_v2rhosigma_gdrho_r.data() + isig * nrxx, chg_gs.rhopw, tpiba);
// // 2. $\nabla^2(f^{\sigma\sigma}*\sigma)$
// std::vector<double> v2sigma2_sigma_r(3 * nrxx);
// for (int ir = 0; ir < nrxx; ++ir)
// {
// v2sigma2_sigma_r[ir] = v2s2.at(ir * 6) * sigma[ir * 3] * 4.0
// + v2s2.at(ir * 6 + 1) * sigma[ir * 3 + 1] * 4.0
// + v2s2.at(ir * 6 + 3) * sigma[ir * 3 + 2]; //up-up
// v2sigma2_sigma_r[nrxx + ir] = v2s2.at(ir * 6 + 1) * sigma[ir * 3] * 2.0
// + v2s2.at(ir * 6 + 4) * sigma[ir * 3 + 2] * 2.0
// + (v2s2.at(ir * 6 + 2) * 4.0 + v2s2.at(ir * 6 + 3)) * sigma[ir * 3 + 1]; //up-down
// v2sigma2_sigma_r[2 * nrxx + ir] = v2s2.at(ir * 6 + 5) * sigma[ir * 3 + 2] * 4.0
// + v2s2.at(ir * 6 + 4) * sigma[ir * 3 + 1] * 4.0
// + v2s2.at(ir * 6 + 3) * sigma[ir * 3]; //down-down
// }
// for (int isig = 0;isig < 3;++isig)
// LR_Util::lapl(v2sigma2_sigma_r.data() + isig * nrxx, v2sigma2_sigma_r.data() + isig * nrxx, *(chg_gs.rhopw), tpiba2);
// // 3. $\nabla^2(v^\sigma)$
// std::vector<double> lap_vsigma(3 * nrxx);
// for (int ir = 0;ir < nrxx;++ir)
// {
// lap_vsigma[ir] = vs.at(ir * 3) * 2.0;
// lap_vsigma[nrxx + ir] = vs.at(ir * 3 + 1);
// lap_vsigma[2 * nrxx + ir] = vs.at(ir * 3 + 2) * 2.0;
// }
// for (int isig = 0;isig < 3;++isig)
// LR_Util::lapl(lap_vsigma.data() + isig * nrxx, lap_vsigma.data() + isig * nrxx, *(chg_gs.rhopw), tpiba2);
// // add to v2rho2_
// BlasConnector::axpy(3 * nrxx, 1.0, v2r2.data(), 1, to_mul_rho_.data(), 1);
// BlasConnector::axpy(3 * nrxx, -1.0, div_v2rhosigma_gdrho_r.data(), 1, to_mul_rho_.data(), 1);
// BlasConnector::axpy(3 * nrxx, 1.0, v2sigma2_sigma_r.data(), 1, to_mul_rho_.data(), 1);
// BlasConnector::axpy(3 * nrxx, 1.0, lap_vsigma.data(), 1, to_mul_rho_.data(), 1);
// }
else

// 2. $4f^{\sigma\sigma}*\nabla\rho$
this->v2sigma2_4drho_.resize(nrxx);
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 4096)
#endif
for (size_t i = 0; i < nrxx; ++i)
{
throw std::domain_error("nspin =" + std::to_string(nspin)
+ " unfinished in " + std::string(__FILE__) + " line " + std::to_string(__LINE__));
this->v2sigma2_4drho_[i] = gradrho[0][i] * v2s2[i] * 4.;
}
}
} // end for( xc_func_type &func : funcs )
XC_Functional_Libxc::finish_func(funcs);

if (1 == PARAM.inp.nspin || 2 == PARAM.inp.nspin) return;
else
{
throw std::domain_error("nspin =" + std::to_string(nspin)
+ " unfinished in " + std::string(__FILE__) + " line " + std::to_string(__LINE__));
}
}
if (PARAM.inp.nspin == 1 || PARAM.inp.nspin == 2) {
return;
// else if (4 == PARAM.inp.nspin)
else//NSPIN != 1,2,4 is not supported
} else//NSPIN != 1,2,4 is not supported
{
throw std::domain_error("PARAM.inp.nspin =" + std::to_string(PARAM.inp.nspin)
+ " unfinished in " + std::string(__FILE__) + " line " + std::to_string(__LINE__));
Expand Down
2 changes: 1 addition & 1 deletion tests/integrate/291_NO_KP_LR/result.ref
Original file line number Diff line number Diff line change
@@ -1 +1 @@
totexcitationenergyref 0.784274
totexcitationenergyref 0.755653
2 changes: 1 addition & 1 deletion tests/integrate/391_NO_GO_LR/result.ref
Original file line number Diff line number Diff line change
@@ -1 +1 @@
totexcitationenergyref 2.641190
totexcitationenergyref 2.510666
4 changes: 2 additions & 2 deletions tests/integrate/393_NO_GO_ULR_HF/INPUT
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ mixing_type pulay
mixing_beta 0.4
mixing_gg0 0

dft_functional lda
dft_functional hf
exx_real_number 1
exx_ccp_rmesh_times 1

lr_nstates 3
xc_kernel lda
xc_kernel hf
lr_solver lapack
lr_thr 1e-2
lr_unrestricted 1
Expand Down
2 changes: 1 addition & 1 deletion tests/integrate/393_NO_GO_ULR_HF/result.ref
Original file line number Diff line number Diff line change
@@ -1 +1 @@
totexcitationenergyref 1.940703
totexcitationenergyref 2.187535
Loading