Skip to content

Commit df7e8a5

Browse files
Fix the overwriting between xc kernel components (#5654)
* fix the overwriting between xc kernel components * [pre-commit.ci lite] apply automatic fixes * use for-loop with omp * add duration test * replace lambda by individual function * expand the for-loop * small change --------- Co-authored-by: pre-commit-ci-lite[bot] <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com>
1 parent ce1fecf commit df7e8a5

File tree

5 files changed

+101
-92
lines changed

5 files changed

+101
-92
lines changed

source/module_lr/potentials/xc_kernel.cpp

Lines changed: 96 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,15 @@
55
#include "module_lr/utils/lr_util.h"
66
#include "module_lr/utils/lr_util_xc.hpp"
77
#include <set>
8+
#include <chrono>
89
#include "module_io/cube_io.h"
910
#ifdef USE_LIBXC
1011
#include <xc.h>
1112
#include "module_hamilt_general/module_xc/xc_functional_libxc.h"
1213
#endif
14+
#ifdef _OPENMP
15+
#include <omp.h>
16+
#endif
1317

1418
LR::KernelXC::KernelXC(const ModulePW::PW_Basis& rho_basis,
1519
const UnitCell& ucell,
@@ -66,6 +70,29 @@ LR::KernelXC::KernelXC(const ModulePW::PW_Basis& rho_basis,
6670
#endif
6771
}
6872

73+
template <typename T>
74+
inline void add_op(const T* const src1, const T* const src2, T* const dst, const int size)
75+
{
76+
#ifdef _OPENMP
77+
#pragma omp parallel for schedule(static, 4096)
78+
#endif
79+
for (int i = 0;i < size;++i)
80+
{
81+
dst[i] = src1[i] + src2[i];
82+
}
83+
}
84+
template <typename T>
85+
inline void add_op(const std::vector<T>& src1, const std::vector<T>& src2, std::vector<T>& dst)
86+
{
87+
assert(dst.size() >= src1.size() && src2.size() >= src1.size());
88+
add_op(src1.data(), src2.data(), dst.data(), src1.size());
89+
}
90+
template <typename T>
91+
inline void add_assign_op(const std::vector<T>& src, std::vector<T>& dst)
92+
{
93+
add_op(src, dst, dst);
94+
}
95+
6996
#ifdef USE_LIBXC
7097
void LR::KernelXC::f_xc_libxc(const int& nspin, const double& omega, const double& tpiba, const double* const* const rho_gs, const double* const rho_core)
7198
{
@@ -115,7 +142,8 @@ void LR::KernelXC::f_xc_libxc(const int& nspin, const double& omega, const doubl
115142
#ifdef _OPENMP
116143
#pragma omp parallel for schedule(static, 1024)
117144
#endif
118-
for (int ir = 0; ir < nrxx; ++ir) rhor[ir] = rho[ir * nspin + is];
145+
for (int ir = 0; ir < nrxx; ++ir) { rhor[ir] = rho[ir * nspin + is];
146+
}
119147
gradrho[is].resize(nrxx);
120148
LR_Util::grad(rhor.data(), gradrho[is].data(), rho_basis_, tpiba);
121149
}
@@ -126,8 +154,9 @@ void LR::KernelXC::f_xc_libxc(const int& nspin, const double& omega, const doubl
126154
#ifdef _OPENMP
127155
#pragma omp parallel for schedule(static, 1024)
128156
#endif
129-
for (int ir = 0; ir < nrxx; ++ir)
157+
for (int ir = 0; ir < nrxx; ++ir) {
130158
sigma[ir] = gradrho[0][ir] * gradrho[0][ir];
159+
}
131160
}
132161
else
133162
{
@@ -144,13 +173,13 @@ void LR::KernelXC::f_xc_libxc(const int& nspin, const double& omega, const doubl
144173
}
145174
// -----------------------------------------------------------------------------------
146175
//==================== XC Kernels (f_xc)=============================
147-
this->vrho_.resize(nspin * nrxx);
148-
this->v2rho2_.resize(((1 == nspin) ? 1 : 3) * nrxx);//(nrxx* ((1 == nspin) ? 1 : 3)): 00, 01, 11
176+
this->vrho_.resize(nspin * nrxx, 0.);
177+
this->v2rho2_.resize(((1 == nspin) ? 1 : 3) * nrxx, 0.);//(nrxx* ((1 == nspin) ? 1 : 3)): 00, 01, 11
149178
if (is_gga)
150179
{
151-
this->vsigma_.resize(((1 == nspin) ? 1 : 3) * nrxx);//(nrxx*): 2 for rho * 3 for sigma: 00, 01, 02, 10, 11, 12
152-
this->v2rhosigma_.resize(((1 == nspin) ? 1 : 6) * nrxx); //(nrxx*): 2 for rho * 3 for sigma: 00, 01, 02, 10, 11, 12
153-
this->v2sigma2_.resize(((1 == nspin) ? 1 : 6) * nrxx); //(nrxx* ((1 == nspin) ? 1 : 6)): 00, 01, 02, 11, 12, 22
180+
this->vsigma_.resize(((1 == nspin) ? 1 : 3) * nrxx, 0.);//(nrxx*): 2 for rho * 3 for sigma: 00, 01, 02, 10, 11, 12
181+
this->v2rhosigma_.resize(((1 == nspin) ? 1 : 6) * nrxx, 0.); //(nrxx*): 2 for rho * 3 for sigma: 00, 01, 02, 10, 11, 12
182+
this->v2sigma2_.resize(((1 == nspin) ? 1 : 6) * nrxx, 0.); //(nrxx* ((1 == nspin) ? 1 : 6)): 00, 01, 02, 11, 12, 22
154183
}
155184
//MetaGGA ...
156185

@@ -163,105 +192,85 @@ void LR::KernelXC::f_xc_libxc(const int& nspin, const double& omega, const doubl
163192

164193
//cut off grho if not LDA (future subfunc)
165194

195+
// Libxc interfaces overwrite (instead of add onto) the output arrays, so we need temporary copies
196+
std::vector<double> vrho_tmp(this->vrho_.size());
197+
std::vector<double> v2rho2_tmp(this->v2rho2_.size());
198+
std::vector<double> vsigma_tmp(this->vsigma_.size());
199+
std::vector<double> v2rhosigma_tmp(this->v2rhosigma_.size());
200+
std::vector<double> v2sigma2_tmp(this->v2sigma2_.size());
166201
switch (func.info->family)
167202
{
168203
case XC_FAMILY_LDA:
169-
xc_lda_vxc(&func, nrxx, rho.data(), vrho_.data());
170-
xc_lda_fxc(&func, nrxx, rho.data(), v2rho2_.data());
204+
xc_lda_vxc(&func, nrxx, rho.data(), vrho_tmp.data());
205+
xc_lda_fxc(&func, nrxx, rho.data(), v2rho2_tmp.data());
171206
break;
172207
case XC_FAMILY_GGA:
173208
case XC_FAMILY_HYB_GGA:
174-
xc_gga_vxc(&func, nrxx, rho.data(), sigma.data(), vrho_.data(), vsigma_.data());
175-
xc_gga_fxc(&func, nrxx, rho.data(), sigma.data(), v2rho2_.data(), v2rhosigma_.data(), v2sigma2_.data());
209+
xc_gga_vxc(&func, nrxx, rho.data(), sigma.data(), vrho_tmp.data(), vsigma_tmp.data());
210+
xc_gga_fxc(&func, nrxx, rho.data(), sigma.data(), v2rho2_tmp.data(), v2rhosigma_tmp.data(), v2sigma2_tmp.data());
176211
break;
177212
default:
178213
throw std::domain_error("func.info->family =" + std::to_string(func.info->family)
179214
+ " unfinished in " + std::string(__FILE__) + " line " + std::to_string(__LINE__));
180215
break;
181216
}
182-
// some formulas for GGA
183-
if (func.info->family == XC_FAMILY_GGA || func.info->family == XC_FAMILY_HYB_GGA)
184-
{
185-
const std::vector<double>& v2r2 = this->v2rho2_;
186-
const std::vector<double>& v2rs = this->v2rhosigma_;
187-
const std::vector<double>& v2s2 = this->v2sigma2_;
188-
const std::vector<double>& vs = this->vsigma_;
189-
const double tpiba2 = tpiba * tpiba;
217+
// add onto the total components
218+
// auto start = std::chrono::high_resolution_clock::now();
219+
add_assign_op(vrho_tmp, this->vrho_);
220+
add_assign_op(v2rho2_tmp, this->v2rho2_);
221+
add_assign_op(vsigma_tmp, this->vsigma_);
222+
add_assign_op(v2rhosigma_tmp, this->v2rhosigma_);
223+
add_assign_op(v2sigma2_tmp, this->v2sigma2_);
224+
// auto end = std::chrono::high_resolution_clock::now();
225+
// auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
226+
// std::cout << "Time elapsed adding XC components: " << duration.count() << " ms\n";
227+
} // end for( xc_func_type &func : funcs )
228+
229+
XC_Functional_Libxc::finish_func(funcs);
190230

191-
if (1 == nspin)
231+
// some postprocess if there're GGA funcs in the list
232+
if (is_gga)
233+
{
234+
const std::vector<double>& v2r2 = this->v2rho2_;
235+
const std::vector<double>& v2rs = this->v2rhosigma_;
236+
const std::vector<double>& v2s2 = this->v2sigma2_;
237+
const std::vector<double>& vs = this->vsigma_;
238+
const double tpiba2 = tpiba * tpiba;
239+
240+
if (nspin == 1)
241+
{
242+
// 0. drho
243+
this->drho_gs_ = gradrho[0];
244+
// 1. $2f^{\rho\sigma}*\nabla\rho$
245+
this->v2rhosigma_2drho_.resize(nrxx);
246+
#ifdef _OPENMP
247+
#pragma omp parallel for schedule(static, 4096)
248+
#endif
249+
for (size_t i = 0; i < nrxx; ++i)
192250
{
193-
using V3 = ModuleBase::Vector3<double>;
194-
// 0. drho
195-
this->drho_gs_ = gradrho[0];
196-
// 1. $2f^{\rho\sigma}*\nabla\rho$
197-
this->v2rhosigma_2drho_.resize(nrxx);
198-
std::transform(gradrho[0].begin(), gradrho[0].end(), v2rs.begin(), this->v2rhosigma_2drho_.begin(),
199-
[](const V3& a, const V3& b) {return a * b * 2.; });
200-
// 2. $4f^{\sigma\sigma}*\nabla\rho$
201-
this->v2sigma2_4drho_.resize(nrxx);
202-
std::transform(sigma.begin(), sigma.end(), v2s2.begin(), v2sigma2_4drho_.begin(),
203-
[](const V3& a, const V3& b) {return a * b * 4.; });
251+
this->v2rhosigma_2drho_[i] = gradrho[0][i] * v2rs[i] * 2.;
204252
}
205-
// else if (2 == nspin) // wrong, to be fixed
206-
// {
207-
// // 1. $\nabla\cdot(f^{\rho\sigma}*\nabla\rho)$
208-
// std::vector<double> div_v2rhosigma_gdrho_r(3 * nrxx);
209-
// std::vector<ModuleBase::Vector3<double>> v2rhosigma_gdrho_r(3 * nrxx);
210-
// for (int ir = 0; ir < nrxx; ++ir)
211-
// {
212-
// v2rhosigma_gdrho_r[ir] = gradrho[0][ir] * v2rs.at(ir * 6) * 4.0
213-
// + gradrho[1][ir] * v2rs.at(ir * 6 + 1) * 2.0; //up-up
214-
// v2rhosigma_gdrho_r[nrxx + ir] = gradrho[0][ir] * (v2rs.at(ir * 6 + 3) * 2.0 + v2rs.at(ir * 6 + 1))
215-
// + gradrho[1][ir] * (v2rs.at(ir * 6 + 2) * 2.0 + v2rs.at(ir * 6 + 4)); //up-down
216-
// v2rhosigma_gdrho_r[2 * nrxx + ir] = gradrho[1][ir] * v2rs.at(ir * 6 + 5) * 4.0
217-
// + gradrho[0][ir] * v2rs.at(ir * 6 + 4) * 2.0; //down-down
218-
// }
219-
// for (int isig = 0;isig < 3;++isig)
220-
// XC_Functional::grad_dot(v2rhosigma_gdrho_r.data() + isig * nrxx, div_v2rhosigma_gdrho_r.data() + isig * nrxx, chg_gs.rhopw, tpiba);
221-
// // 2. $\nabla^2(f^{\sigma\sigma}*\sigma)$
222-
// std::vector<double> v2sigma2_sigma_r(3 * nrxx);
223-
// for (int ir = 0; ir < nrxx; ++ir)
224-
// {
225-
// v2sigma2_sigma_r[ir] = v2s2.at(ir * 6) * sigma[ir * 3] * 4.0
226-
// + v2s2.at(ir * 6 + 1) * sigma[ir * 3 + 1] * 4.0
227-
// + v2s2.at(ir * 6 + 3) * sigma[ir * 3 + 2]; //up-up
228-
// v2sigma2_sigma_r[nrxx + ir] = v2s2.at(ir * 6 + 1) * sigma[ir * 3] * 2.0
229-
// + v2s2.at(ir * 6 + 4) * sigma[ir * 3 + 2] * 2.0
230-
// + (v2s2.at(ir * 6 + 2) * 4.0 + v2s2.at(ir * 6 + 3)) * sigma[ir * 3 + 1]; //up-down
231-
// v2sigma2_sigma_r[2 * nrxx + ir] = v2s2.at(ir * 6 + 5) * sigma[ir * 3 + 2] * 4.0
232-
// + v2s2.at(ir * 6 + 4) * sigma[ir * 3 + 1] * 4.0
233-
// + v2s2.at(ir * 6 + 3) * sigma[ir * 3]; //down-down
234-
// }
235-
// for (int isig = 0;isig < 3;++isig)
236-
// LR_Util::lapl(v2sigma2_sigma_r.data() + isig * nrxx, v2sigma2_sigma_r.data() + isig * nrxx, *(chg_gs.rhopw), tpiba2);
237-
// // 3. $\nabla^2(v^\sigma)$
238-
// std::vector<double> lap_vsigma(3 * nrxx);
239-
// for (int ir = 0;ir < nrxx;++ir)
240-
// {
241-
// lap_vsigma[ir] = vs.at(ir * 3) * 2.0;
242-
// lap_vsigma[nrxx + ir] = vs.at(ir * 3 + 1);
243-
// lap_vsigma[2 * nrxx + ir] = vs.at(ir * 3 + 2) * 2.0;
244-
// }
245-
// for (int isig = 0;isig < 3;++isig)
246-
// LR_Util::lapl(lap_vsigma.data() + isig * nrxx, lap_vsigma.data() + isig * nrxx, *(chg_gs.rhopw), tpiba2);
247-
// // add to v2rho2_
248-
// BlasConnector::axpy(3 * nrxx, 1.0, v2r2.data(), 1, to_mul_rho_.data(), 1);
249-
// BlasConnector::axpy(3 * nrxx, -1.0, div_v2rhosigma_gdrho_r.data(), 1, to_mul_rho_.data(), 1);
250-
// BlasConnector::axpy(3 * nrxx, 1.0, v2sigma2_sigma_r.data(), 1, to_mul_rho_.data(), 1);
251-
// BlasConnector::axpy(3 * nrxx, 1.0, lap_vsigma.data(), 1, to_mul_rho_.data(), 1);
252-
// }
253-
else
253+
254+
// 2. $4f^{\sigma\sigma}*\nabla\rho$
255+
this->v2sigma2_4drho_.resize(nrxx);
256+
#ifdef _OPENMP
257+
#pragma omp parallel for schedule(static, 4096)
258+
#endif
259+
for (size_t i = 0; i < nrxx; ++i)
254260
{
255-
throw std::domain_error("nspin =" + std::to_string(nspin)
256-
+ " unfinished in " + std::string(__FILE__) + " line " + std::to_string(__LINE__));
261+
this->v2sigma2_4drho_[i] = gradrho[0][i] * v2s2[i] * 4.;
257262
}
258263
}
259-
} // end for( xc_func_type &func : funcs )
260-
XC_Functional_Libxc::finish_func(funcs);
261-
262-
if (1 == PARAM.inp.nspin || 2 == PARAM.inp.nspin) return;
264+
else
265+
{
266+
throw std::domain_error("nspin =" + std::to_string(nspin)
267+
+ " unfinished in " + std::string(__FILE__) + " line " + std::to_string(__LINE__));
268+
}
269+
}
270+
if (PARAM.inp.nspin == 1 || PARAM.inp.nspin == 2) {
271+
return;
263272
// else if (4 == PARAM.inp.nspin)
264-
else//NSPIN != 1,2,4 is not supported
273+
} else//NSPIN != 1,2,4 is not supported
265274
{
266275
throw std::domain_error("PARAM.inp.nspin =" + std::to_string(PARAM.inp.nspin)
267276
+ " unfinished in " + std::string(__FILE__) + " line " + std::to_string(__LINE__));
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
totexcitationenergyref 0.784274
1+
totexcitationenergyref 0.755653
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
totexcitationenergyref 2.641190
1+
totexcitationenergyref 2.510666

tests/integrate/393_NO_GO_ULR_HF/INPUT

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,12 @@ mixing_type pulay
2727
mixing_beta 0.4
2828
mixing_gg0 0
2929

30-
dft_functional lda
30+
dft_functional hf
3131
exx_real_number 1
3232
exx_ccp_rmesh_times 1
3333

3434
lr_nstates 3
35-
xc_kernel lda
35+
xc_kernel hf
3636
lr_solver lapack
3737
lr_thr 1e-2
3838
lr_unrestricted 1
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
totexcitationenergyref 1.940703
1+
totexcitationenergyref 2.187535

0 commit comments

Comments
 (0)