Skip to content

Commit 7f433e0

Browse files
Format
1 parent 5446405 commit 7f433e0

File tree

2 files changed

+77
-56
lines changed

2 files changed

+77
-56
lines changed

source/module_hamilt_pw/hamilt_pwdft/operator_pw/op_exx_pw.cpp

Lines changed: 39 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ void OperatorEXXPW<T, Device>::act_op(const int nbands,
273273
setmem_complex_op()(psi_mq_real, 0, wfcpw->nrxx);
274274

275275
} // end of iq
276-
auto h_psi_nk = tmhpsi + n_iband * nbasis;
276+
T* h_psi_nk = tmhpsi + n_iband * nbasis;
277277
Real hybrid_alpha = GlobalC::exx_info.info_global.hybrid_alpha;
278278
wfcpw->real_to_recip(ctx, h_psi_real, h_psi_nk, this->ik, true, hybrid_alpha);
279279
setmem_complex_op()(h_psi_real, 0, rhopw->nrxx);
@@ -297,7 +297,7 @@ void OperatorEXXPW<T, Device>::act_op_ace(const int nbands,
297297

298298
// std::cout << "act_op_ace" << std::endl;
299299
// hpsi += -Xi^\dagger * Xi * psi
300-
auto Xi_ace = Xi_ace_k[this->ik];
300+
T* Xi_ace = Xi_ace_k[this->ik];
301301
int nbands_tot = psi.get_nbands();
302302
int nbasis_max = psi.get_nbasis();
303303
// T* hpsi = nullptr;
@@ -545,7 +545,8 @@ void OperatorEXXPW<T, Device>::multiply_potential(T *density_recip, int ik, int
545545
#endif
546546
for (int ig = 0; ig < npw; ig++)
547547
{
548-
density_recip[ig] *= pot[ik * nks * npw + iq * npw + ig];
548+
int ig_kq = ik * nks * npw + iq * npw + ig;
549+
density_recip[ig] *= pot[ig_kq];
549550

550551
}
551552

@@ -557,7 +558,7 @@ const T *OperatorEXXPW<T, Device>::get_pw(const int m, const int iq) const
557558
{
558559
// return pws[iq].get() + m * wfcpw->npwk[iq];
559560
psi.fix_kb(iq, m);
560-
auto psi_mq = psi.get_pointer();
561+
T* psi_mq = psi.get_pointer();
561562
return psi_mq;
562563
}
563564

@@ -586,7 +587,9 @@ OperatorEXXPW<T, Device>::OperatorEXXPW(const OperatorEXXPW<T_in, Device_in> *op
586587
template <typename T, typename Device>
587588
void OperatorEXXPW<T, Device>::get_potential() const
588589
{
589-
Real nqs_half1 = 0.5 * kv->nmp[0], nqs_half2 = 0.5 * kv->nmp[1], nqs_half3 = 0.5 * kv->nmp[2];
590+
Real nqs_half1 = 0.5 * kv->nmp[0];
591+
Real nqs_half2 = 0.5 * kv->nmp[1];
592+
Real nqs_half3 = 0.5 * kv->nmp[2];
590593

591594
int nks = wfcpw->nks, npw = rhopw->npw;
592595
double tpiba2 = tpiba * tpiba;
@@ -595,24 +598,26 @@ void OperatorEXXPW<T, Device>::get_potential() const
595598
{
596599
for (int iq = 0; iq < nks; iq++)
597600
{
598-
auto k_c = wfcpw->kvec_c[ik];
599-
auto k_d = wfcpw->kvec_d[ik];
600-
auto q_c = wfcpw->kvec_c[iq];
601-
auto q_d = wfcpw->kvec_d[iq];
601+
const ModuleBase::Vector3<double> k_c = wfcpw->kvec_c[ik];
602+
const ModuleBase::Vector3<double> k_d = wfcpw->kvec_d[ik];
603+
const ModuleBase::Vector3<double> q_c = wfcpw->kvec_c[iq];
604+
const ModuleBase::Vector3<double> q_d = wfcpw->kvec_d[iq];
602605

603606
#ifdef _OPENMP
604607
#pragma omp parallel for schedule(static)
605608
#endif
606609
for (int ig = 0; ig < rhopw->npw; ig++)
607610
{
608-
auto g_d = rhopw->gdirect[ig];
609-
auto kqg_d = k_d - q_d + g_d;
611+
const ModuleBase::Vector3<double> g_d = rhopw->gdirect[ig];
612+
const ModuleBase::Vector3<double> kqg_d = k_d - q_d + g_d;
610613
Real grid_factor = 1;
611614
if (gamma_extrapolation)
612615
{
613616
// if isint(kqg_d[0] * nqs_half1) && isint(kqg_d[1] * nqs_half2) && isint(kqg_d[2] * nqs_half3)
614617
auto isint = [](double x) { return std::abs(x - std::round(x)) < 1e-6; };
615-
if (isint(kqg_d[0] * nqs_half1) && isint(kqg_d[1] * nqs_half2) && isint(kqg_d[2] * nqs_half3))
618+
if (isint(kqg_d[0] * nqs_half1) &&
619+
isint(kqg_d[1] * nqs_half2) &&
620+
isint(kqg_d[2] * nqs_half3))
616621
{
617622
grid_factor = 0;
618623
}
@@ -622,6 +627,8 @@ void OperatorEXXPW<T, Device>::get_potential() const
622627
}
623628
}
624629

630+
const int ig_kq = ik * nks * npw + iq * npw + ig;
631+
625632
Real gg = (k_c - q_c + rhopw->gcar[ig]).norm2() * tpiba2;
626633
Real hse_omega2 = GlobalC::exx_info.info_global.hse_omega * GlobalC::exx_info.info_global.hse_omega;
627634
// if (kqgcar2 > 1e-12) // vasp uses 1/40 of the smallest (k spacing)**2
@@ -631,28 +638,29 @@ void OperatorEXXPW<T, Device>::get_potential() const
631638
// if (PARAM.inp.dft_functional == "hse")
632639
if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Erfc)
633640
{
634-
pot[ik * nks * npw + iq * npw + ig] = fac * (1.0 - std::exp(-gg / 4.0 / hse_omega2)) * grid_factor;
641+
pot[ig_kq] = fac * (1.0 - std::exp(-gg / 4.0 / hse_omega2)) * grid_factor;
635642
}
636643
else if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Erf)
637644
{
638-
pot[ik * nks * npw + iq * npw + ig] = fac * (std::exp(-gg / 4.0 / hse_omega2)) * grid_factor;
645+
pot[ig_kq] = fac * (std::exp(-gg / 4.0 / hse_omega2)) * grid_factor;
639646
}
640647
else
641648
{
642-
pot[ik * nks * npw + iq * npw + ig] = fac * grid_factor;
649+
pot[ig_kq] = fac * grid_factor;
643650
}
644651
}
645652
// }
646653
else
647654
{
648655
// if (PARAM.inp.dft_functional == "hse")
649-
if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Erfc && !gamma_extrapolation)
656+
if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Erfc &&
657+
!gamma_extrapolation)
650658
{
651-
pot[ik * nks * npw + iq * npw + ig] = exx_div - ModuleBase::PI * ModuleBase::e2 / hse_omega2;
659+
pot[ig_kq] = exx_div - ModuleBase::PI * ModuleBase::e2 / hse_omega2;
652660
}
653661
else
654662
{
655-
pot[ik * nks * npw + iq * npw + ig] = exx_div;
663+
pot[ig_kq] = exx_div;
656664
}
657665
}
658666
// assert(is_finite(density_recip[ig]));
@@ -669,7 +677,9 @@ void OperatorEXXPW<T, Device>::exx_divergence()
669677
return;
670678
}
671679

672-
Real nqs_half1 = 0.5 * kv->nmp[0], nqs_half2 = 0.5 * kv->nmp[1], nqs_half3 = 0.5 * kv->nmp[2];
680+
Real nqs_half1 = 0.5 * kv->nmp[0];
681+
Real nqs_half2 = 0.5 * kv->nmp[1];
682+
Real nqs_half3 = 0.5 * kv->nmp[2];
673683

674684
// here we follow the exx_divergence subroutine in q-e (PW/src/exx_base.f90)
675685
double alpha = 10.0 / wfcpw->gk_ecut;
@@ -680,21 +690,23 @@ void OperatorEXXPW<T, Device>::exx_divergence()
680690
// temporarily for all k points, should be replaced to q points later
681691
for (int ik = 0; ik < wfcpw->nks; ik++)
682692
{
683-
auto k_c = wfcpw->kvec_c[ik];
684-
auto k_d = wfcpw->kvec_d[ik];
693+
const ModuleBase::Vector3<double> k_c = wfcpw->kvec_c[ik];
694+
const ModuleBase::Vector3<double> k_d = wfcpw->kvec_d[ik];
685695
#ifdef _OPENMP
686696
#pragma omp parallel for reduction(+:div)
687697
#endif
688698
for (int ig = 0; ig < rhopw->npw; ig++)
689699
{
690-
auto q_c = k_c + rhopw->gcar[ig];
691-
auto q_d = k_d + rhopw->gdirect[ig];
700+
const ModuleBase::Vector3<double> q_c = k_c + rhopw->gcar[ig];
701+
const ModuleBase::Vector3<double> q_d = k_d + rhopw->gdirect[ig];
692702
double qq = q_c.norm2();
693703
Real grid_factor = 1;
694704
if (gamma_extrapolation)
695705
{
696706
auto isint = [](double x) { return std::abs(x - std::round(x)) < 1e-6; };
697-
if (isint(q_d[0] * nqs_half1) && isint(q_d[1] * nqs_half2) && isint(q_d[2] * nqs_half3))
707+
if (isint(q_d[0] * nqs_half1) &&
708+
isint(q_d[1] * nqs_half2) &&
709+
isint(q_d[2] * nqs_half3))
698710
{
699711
grid_factor = 0;
700712
}
@@ -797,14 +809,14 @@ double OperatorEXXPW<T, Device>::cal_exx_energy_ace(psi::Psi<T, Device> *ppsi_)
797809
setmem_complex_op()(h_psi_ace, 0, psi_.get_nbands() * psi_.get_nbasis());
798810
*ik_ = i;
799811
psi_.fix_kb(i, 0);
800-
auto psi_i = psi_.get_pointer();
812+
T* psi_i = psi_.get_pointer();
801813
act_op_ace(psi_.get_nbands(), psi_.get_nbasis(), 1, psi_i, h_psi_ace, 0, true);
802814

803815
for (int nband = 0; nband < psi_.get_nbands(); nband++)
804816
{
805817
psi_.fix_kb(i, nband);
806-
auto psi_i_n = psi_.get_pointer();
807-
auto hpsi_i_n = h_psi_ace + nband * psi_.get_nbasis();
818+
T* psi_i_n = psi_.get_pointer();
819+
T* hpsi_i_n = h_psi_ace + nband * psi_.get_nbasis();
808820
double wg_i_n = (*wg)(i, nband);
809821
// Eexx += dot(psi_i_n, h_psi_i_n)
810822
Eexx += dot_op()(psi_.get_nbasis(), psi_i_n, hpsi_i_n, false) * wg_i_n * 2;

source/module_hamilt_pw/hamilt_pwdft/stress_func_exx.cpp

Lines changed: 38 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ void Stress_PW<FPTYPE, Device>::stress_exx(ModuleBase::matrix& sigma,
99
const K_Vectors *p_kv,
1010
const psi::Psi<complex<FPTYPE>, Device>* d_psi_in, const UnitCell& ucell)
1111
{
12-
double nqs_half1 = 0.5 * p_kv->nmp[0], nqs_half2 = 0.5 * p_kv->nmp[1], nqs_half3 = 0.5 * p_kv->nmp[2];
12+
double nqs_half1 = 0.5 * p_kv->nmp[0];
13+
double nqs_half2 = 0.5 * p_kv->nmp[1];
14+
double nqs_half3 = 0.5 * p_kv->nmp[2];
1315
bool gamma_extrapolation = PARAM.inp.exx_gamma_extrapolation;
1416
if (!p_kv->get_is_mp())
1517
{
@@ -68,20 +70,22 @@ void Stress_PW<FPTYPE, Device>::stress_exx(ModuleBase::matrix& sigma,
6870
// temporarily for all k points, should be replaced to q points later
6971
for (int ik = 0; ik < wfcpw->nks; ik++)
7072
{
71-
auto k_c = wfcpw->kvec_c[ik];
72-
auto k_d = wfcpw->kvec_d[ik];
73+
const ModuleBase::Vector3<double> k_c = wfcpw->kvec_c[ik];
74+
const ModuleBase::Vector3<double> k_d = wfcpw->kvec_d[ik];
7375
#ifdef _OPENMP
7476
#pragma omp parallel for reduction(+:div)
7577
#endif
7678
for (int ig = 0; ig < rhopw->npw; ig++)
7779
{
78-
auto q_c = k_c + rhopw->gcar[ig];
79-
auto q_d = k_d + rhopw->gdirect[ig];
80+
const ModuleBase::Vector3<double> q_c = k_c + rhopw->gcar[ig];
81+
const ModuleBase::Vector3<double> q_d = k_d + rhopw->gdirect[ig];
8082
double qq = q_c.norm2();
8183
double grid_factor = 1;
8284
if (gamma_extrapolation)
8385
{
84-
if (isint(q_d[0] * nqs_half1) && isint(q_d[1] * nqs_half2) && isint(q_d[2] * nqs_half3))
86+
if (isint(q_d[0] * nqs_half1) &&
87+
isint(q_d[1] * nqs_half2) &&
88+
isint(q_d[2] * nqs_half3))
8589
{
8690
grid_factor = 0;
8791
}
@@ -136,9 +140,9 @@ void Stress_PW<FPTYPE, Device>::stress_exx(ModuleBase::matrix& sigma,
136140
{
137141
double hse_omega = GlobalC::exx_info.info_global.hse_omega;
138142
double omega2 = hse_omega * hse_omega;
139-
#ifdef _OPENMP
140-
#pragma omp parallel for reduction(+:aa)
141-
#endif
143+
#ifdef _OPENMP
144+
#pragma omp parallel for reduction(+:aa)
145+
#endif
142146
for (int i = 0; i < nqq; i++)
143147
{
144148
double q = dq * (i+0.5);
@@ -158,22 +162,24 @@ void Stress_PW<FPTYPE, Device>::stress_exx(ModuleBase::matrix& sigma,
158162
{
159163
for (int iq = 0; iq < nks; iq++)
160164
{
161-
auto k_c = wfcpw->kvec_c[ik];
162-
auto k_d = wfcpw->kvec_d[ik];
163-
auto q_c = wfcpw->kvec_c[iq];
164-
auto q_d = wfcpw->kvec_d[iq];
165+
const ModuleBase::Vector3<double> k_c = wfcpw->kvec_c[ik];
166+
const ModuleBase::Vector3<double> k_d = wfcpw->kvec_d[ik];
167+
const ModuleBase::Vector3<double> q_c = wfcpw->kvec_c[iq];
168+
const ModuleBase::Vector3<double> q_d = wfcpw->kvec_d[iq];
165169

166170
#ifdef _OPENMP
167171
#pragma omp parallel for schedule(static)
168172
#endif
169173
for (int ig = 0; ig < rhopw->npw; ig++)
170174
{
171-
auto g_d = rhopw->gdirect[ig];
172-
auto kqg_d = k_d - q_d + g_d;
175+
const ModuleBase::Vector3<double> g_d = rhopw->gdirect[ig];
176+
const ModuleBase::Vector3<double> kqg_d = k_d - q_d + g_d;
173177
Real grid_factor = 1;
174178
if (gamma_extrapolation)
175179
{
176-
if (isint(kqg_d[0] * nqs_half1) && isint(kqg_d[1] * nqs_half2) && isint(kqg_d[2] * nqs_half3))
180+
if (isint(kqg_d[0] * nqs_half1) &&
181+
isint(kqg_d[1] * nqs_half2) &&
182+
isint(kqg_d[2] * nqs_half3))
177183
{
178184
grid_factor = 0;
179185
}
@@ -183,6 +189,8 @@ void Stress_PW<FPTYPE, Device>::stress_exx(ModuleBase::matrix& sigma,
183189
}
184190
}
185191

192+
const int ig_kq = ik * nks * rhopw->npw + iq * rhopw->npw + ig;
193+
186194
Real gg = (k_c - q_c + rhopw->gcar[ig]).norm2() * tpiba2;
187195
Real hse_omega2 = GlobalC::exx_info.info_global.hse_omega * GlobalC::exx_info.info_global.hse_omega;
188196
// if (kqgcar2 > 1e-12) // vasp uses 1/40 of the smallest (k spacing)**2
@@ -192,19 +200,19 @@ void Stress_PW<FPTYPE, Device>::stress_exx(ModuleBase::matrix& sigma,
192200
// if (PARAM.inp.dft_functional == "hse")
193201
if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Erfc)
194202
{
195-
pot[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = fac * (1.0 - std::exp(-gg / 4.0 / hse_omega2)) * grid_factor;
196-
pot_stress[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = (1.0 - (1.0 + gg / 4.0 / hse_omega2) * std::exp(-gg / 4.0 / hse_omega2)) / (1.0 - std::exp(-gg / 4.0 / hse_omega2)) / gg;
203+
pot[ig_kq] = fac * (1.0 - std::exp(-gg / 4.0 / hse_omega2)) * grid_factor;
204+
pot_stress[ig_kq] = (1.0 - (1.0 + gg / 4.0 / hse_omega2) * std::exp(-gg / 4.0 / hse_omega2)) / (1.0 - std::exp(-gg / 4.0 / hse_omega2)) / gg;
197205
}
198206
else if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Erf)
199207
{
200208
ModuleBase::WARNING("Stress_PW", "Stress for Erf is not implemented yet");
201-
pot[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = fac * grid_factor;
202-
pot_stress[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = 1.0 / gg;
209+
pot[ig_kq] = fac * grid_factor;
210+
pot_stress[ig_kq] = 1.0 / gg;
203211
}
204212
else if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Hf)
205213
{
206-
pot[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = fac * grid_factor;
207-
pot_stress[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = 1.0 / gg;
214+
pot[ig_kq] = fac * grid_factor;
215+
pot_stress[ig_kq] = 1.0 / gg;
208216
}
209217
}
210218
// }
@@ -213,13 +221,13 @@ void Stress_PW<FPTYPE, Device>::stress_exx(ModuleBase::matrix& sigma,
213221
// if (PARAM.inp.dft_functional == "hse")
214222
if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Erfc && !gamma_extrapolation)
215223
{
216-
pot[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = - ModuleBase::PI * ModuleBase::e2 / hse_omega2; // maybe we should add a exx_div here, but q-e does not do that
217-
pot_stress[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = 1 / 4.0 / hse_omega2;
224+
pot[ig_kq] = - ModuleBase::PI * ModuleBase::e2 / hse_omega2; // maybe we should add a exx_div here, but q-e does not do that
225+
pot_stress[ig_kq] = 1 / 4.0 / hse_omega2;
218226
}
219227
else
220228
{
221-
pot[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = exx_div;
222-
pot_stress[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = 0;
229+
pot[ig_kq] = exx_div;
230+
pot_stress[ig_kq] = 0;
223231
}
224232
}
225233
// assert(is_finite(density_recip[ig]));
@@ -273,13 +281,14 @@ void Stress_PW<FPTYPE, Device>::stress_exx(ModuleBase::matrix& sigma,
273281
#endif
274282
for (int ig = 0; ig < rhopw->npw; ig++)
275283
{
276-
auto kqg = wfcpw->kvec_c[ik] - wfcpw->kvec_c[iq] + rhopw->gcar[ig];
284+
const ModuleBase::Vector3<double> kqg = wfcpw->kvec_c[ik] - wfcpw->kvec_c[iq] + rhopw->gcar[ig];
277285
double kqg_alpha = kqg[alpha] * tpiba;
278286
double kqg_beta = kqg[beta] * tpiba;
279287
// equation 10 of 10.1103/PhysRevB.73.125120
280288
double density_recip2 = std::real(density_recip[ig] * std::conj(density_recip[ig]));
281-
double pot_local = pot[ig + iq * rhopw->npw + ik * rhopw->npw * nqs];
282-
double pot_stress_local = pot_stress[ig + iq * rhopw->npw + ik * rhopw->npw * nqs];
289+
const int idx = ig + iq * rhopw->npw + ik * rhopw->npw * nqs;
290+
double pot_local = pot[idx];
291+
double pot_stress_local = pot_stress[idx];
283292
// if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Hf)
284293
// {
285294
sigma_ab_loc += density_recip2 * pot_local * (kqg_alpha * kqg_beta * pot_stress_local - delta_ab) ;

0 commit comments

Comments
 (0)