Format

Flying-dragon-boxing · Flying-dragon-boxing · commit 7f433e04c1df · 2025-04-08T15:09:15.000+08:00
diff --git a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/op_exx_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/op_exx_pw.cpp
@@ -273,7 +273,7 @@ void OperatorEXXPW<T, Device>::act_op(const int nbands,
             setmem_complex_op()(psi_mq_real, 0, wfcpw->nrxx);
 
         } // end of iq
-        auto h_psi_nk = tmhpsi + n_iband * nbasis;
+        T* h_psi_nk = tmhpsi + n_iband * nbasis;
         Real hybrid_alpha = GlobalC::exx_info.info_global.hybrid_alpha;
         wfcpw->real_to_recip(ctx, h_psi_real, h_psi_nk, this->ik, true, hybrid_alpha);
         setmem_complex_op()(h_psi_real, 0, rhopw->nrxx);
@@ -297,7 +297,7 @@ void OperatorEXXPW<T, Device>::act_op_ace(const int nbands,
 
 //    std::cout << "act_op_ace" << std::endl;
     // hpsi += -Xi^\dagger * Xi * psi
-    auto Xi_ace = Xi_ace_k[this->ik];
+    T* Xi_ace = Xi_ace_k[this->ik];
     int nbands_tot = psi.get_nbands();
     int nbasis_max = psi.get_nbasis();
 //    T* hpsi = nullptr;
@@ -545,7 +545,8 @@ void OperatorEXXPW<T, Device>::multiply_potential(T *density_recip, int ik, int
     #endif
     for (int ig = 0; ig < npw; ig++)
     {
-        density_recip[ig] *= pot[ik * nks * npw + iq * npw + ig];
+        int ig_kq = ik * nks * npw + iq * npw + ig;
+        density_recip[ig] *= pot[ig_kq];
 
     }
 
@@ -557,7 +558,7 @@ const T *OperatorEXXPW<T, Device>::get_pw(const int m, const int iq) const
 {
     // return pws[iq].get() + m * wfcpw->npwk[iq];
     psi.fix_kb(iq, m);
-    auto psi_mq = psi.get_pointer();
+    T* psi_mq = psi.get_pointer();
     return psi_mq;
 }
 
@@ -586,7 +587,9 @@ OperatorEXXPW<T, Device>::OperatorEXXPW(const OperatorEXXPW<T_in, Device_in> *op
 template <typename T, typename Device>
 void OperatorEXXPW<T, Device>::get_potential() const
 {
-    Real nqs_half1 = 0.5 * kv->nmp[0], nqs_half2 = 0.5 * kv->nmp[1], nqs_half3 = 0.5 * kv->nmp[2];
+    Real nqs_half1 = 0.5 * kv->nmp[0];
+    Real nqs_half2 = 0.5 * kv->nmp[1];
+    Real nqs_half3 = 0.5 * kv->nmp[2];
 
     int nks = wfcpw->nks, npw = rhopw->npw;
     double tpiba2 = tpiba * tpiba;
@@ -595,24 +598,26 @@ void OperatorEXXPW<T, Device>::get_potential() const
     {
         for (int iq = 0; iq < nks; iq++)
         {
-            auto k_c = wfcpw->kvec_c[ik];
-            auto k_d = wfcpw->kvec_d[ik];
-            auto q_c = wfcpw->kvec_c[iq];
-            auto q_d = wfcpw->kvec_d[iq];
+            const ModuleBase::Vector3<double> k_c = wfcpw->kvec_c[ik];
+            const ModuleBase::Vector3<double> k_d = wfcpw->kvec_d[ik];
+            const ModuleBase::Vector3<double> q_c = wfcpw->kvec_c[iq];
+            const ModuleBase::Vector3<double> q_d = wfcpw->kvec_d[iq];
 
             #ifdef _OPENMP
             #pragma omp parallel for schedule(static)
             #endif
             for (int ig = 0; ig < rhopw->npw; ig++)
             {
-                auto g_d = rhopw->gdirect[ig];
-                auto kqg_d = k_d - q_d + g_d;
+                const ModuleBase::Vector3<double> g_d = rhopw->gdirect[ig];
+                const ModuleBase::Vector3<double> kqg_d = k_d - q_d + g_d;
                 Real grid_factor = 1;
                 if (gamma_extrapolation)
                 {
                     // if isint(kqg_d[0] * nqs_half1) && isint(kqg_d[1] * nqs_half2) && isint(kqg_d[2] * nqs_half3)
                     auto isint = [](double x) { return std::abs(x - std::round(x)) < 1e-6; };
-                    if (isint(kqg_d[0] * nqs_half1) && isint(kqg_d[1] * nqs_half2) && isint(kqg_d[2] * nqs_half3))
+                    if (isint(kqg_d[0] * nqs_half1) &&
+                        isint(kqg_d[1] * nqs_half2) &&
+                        isint(kqg_d[2] * nqs_half3))
                     {
                         grid_factor = 0;
                     }
@@ -622,6 +627,8 @@ void OperatorEXXPW<T, Device>::get_potential() const
                     }
                 }
 
+                const int ig_kq = ik * nks * npw + iq * npw + ig;
+
                 Real gg = (k_c - q_c + rhopw->gcar[ig]).norm2() * tpiba2;
                 Real hse_omega2 = GlobalC::exx_info.info_global.hse_omega * GlobalC::exx_info.info_global.hse_omega;
                 // if (kqgcar2 > 1e-12) // vasp uses 1/40 of the smallest (k spacing)**2
@@ -631,28 +638,29 @@ void OperatorEXXPW<T, Device>::get_potential() const
                     // if (PARAM.inp.dft_functional == "hse")
                     if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Erfc)
                     {
-                        pot[ik * nks * npw + iq * npw + ig] = fac * (1.0 - std::exp(-gg / 4.0 / hse_omega2)) * grid_factor;
+                        pot[ig_kq] = fac * (1.0 - std::exp(-gg / 4.0 / hse_omega2)) * grid_factor;
                     }
                     else if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Erf)
                     {
-                        pot[ik * nks * npw + iq * npw + ig] = fac * (std::exp(-gg / 4.0 / hse_omega2)) * grid_factor;
+                        pot[ig_kq] = fac * (std::exp(-gg / 4.0 / hse_omega2)) * grid_factor;
                     }
                     else
                     {
-                        pot[ik * nks * npw + iq * npw + ig] = fac * grid_factor;
+                        pot[ig_kq] = fac * grid_factor;
                     }
                 }
                 // }
                 else
                 {
                     // if (PARAM.inp.dft_functional == "hse")
-                    if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Erfc && !gamma_extrapolation)
+                    if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Erfc &&
+                        !gamma_extrapolation)
                     {
-                        pot[ik * nks * npw + iq * npw + ig] = exx_div - ModuleBase::PI * ModuleBase::e2 / hse_omega2;
+                        pot[ig_kq] = exx_div - ModuleBase::PI * ModuleBase::e2 / hse_omega2;
                     }
                     else
                     {
-                        pot[ik * nks * npw + iq * npw + ig] = exx_div;
+                        pot[ig_kq] = exx_div;
                     }
                 }
                 // assert(is_finite(density_recip[ig]));
@@ -669,7 +677,9 @@ void OperatorEXXPW<T, Device>::exx_divergence()
         return;
     }
 
-    Real nqs_half1 = 0.5 * kv->nmp[0], nqs_half2 = 0.5 * kv->nmp[1], nqs_half3 = 0.5 * kv->nmp[2];
+    Real nqs_half1 = 0.5 * kv->nmp[0];
+    Real nqs_half2 = 0.5 * kv->nmp[1];
+    Real nqs_half3 = 0.5 * kv->nmp[2];
 
     // here we follow the exx_divergence subroutine in q-e (PW/src/exx_base.f90)
     double alpha = 10.0 / wfcpw->gk_ecut;
@@ -680,21 +690,23 @@ void OperatorEXXPW<T, Device>::exx_divergence()
     // temporarily for all k points, should be replaced to q points later
     for (int ik = 0; ik < wfcpw->nks; ik++)
     {
-        auto k_c = wfcpw->kvec_c[ik];
-        auto k_d = wfcpw->kvec_d[ik];
+        const ModuleBase::Vector3<double> k_c = wfcpw->kvec_c[ik];
+        const ModuleBase::Vector3<double> k_d = wfcpw->kvec_d[ik];
 #ifdef _OPENMP
 #pragma omp parallel for reduction(+:div)
 #endif
         for (int ig = 0; ig < rhopw->npw; ig++)
         {
-            auto q_c = k_c + rhopw->gcar[ig];
-            auto q_d = k_d + rhopw->gdirect[ig];
+            const ModuleBase::Vector3<double> q_c = k_c + rhopw->gcar[ig];
+            const ModuleBase::Vector3<double> q_d = k_d + rhopw->gdirect[ig];
             double qq = q_c.norm2();
             Real grid_factor = 1;
             if (gamma_extrapolation)
             {
                 auto isint = [](double x) { return std::abs(x - std::round(x)) < 1e-6; };
-                if (isint(q_d[0] * nqs_half1) && isint(q_d[1] * nqs_half2) && isint(q_d[2] * nqs_half3))
+                if (isint(q_d[0] * nqs_half1) &&
+                    isint(q_d[1] * nqs_half2) &&
+                    isint(q_d[2] * nqs_half3))
                 {
                     grid_factor = 0;
                 }
@@ -797,14 +809,14 @@ double OperatorEXXPW<T, Device>::cal_exx_energy_ace(psi::Psi<T, Device> *ppsi_)
         setmem_complex_op()(h_psi_ace, 0, psi_.get_nbands() * psi_.get_nbasis());
         *ik_ = i;
         psi_.fix_kb(i, 0);
-        auto psi_i = psi_.get_pointer();
+        T* psi_i = psi_.get_pointer();
         act_op_ace(psi_.get_nbands(), psi_.get_nbasis(), 1, psi_i, h_psi_ace, 0, true);
 
         for (int nband = 0; nband < psi_.get_nbands(); nband++)
         {
             psi_.fix_kb(i, nband);
-            auto psi_i_n = psi_.get_pointer();
-            auto hpsi_i_n = h_psi_ace + nband * psi_.get_nbasis();
+            T* psi_i_n = psi_.get_pointer();
+            T* hpsi_i_n = h_psi_ace + nband * psi_.get_nbasis();
             double wg_i_n = (*wg)(i, nband);
             // Eexx += dot(psi_i_n, h_psi_i_n)
             Eexx += dot_op()(psi_.get_nbasis(), psi_i_n, hpsi_i_n, false) * wg_i_n * 2;
diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_exx.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_exx.cpp
@@ -9,7 +9,9 @@ void Stress_PW<FPTYPE, Device>::stress_exx(ModuleBase::matrix& sigma,
                                            const K_Vectors *p_kv,
                                            const psi::Psi<complex<FPTYPE>, Device>* d_psi_in, const UnitCell& ucell)
 {
-    double nqs_half1 = 0.5 * p_kv->nmp[0], nqs_half2 = 0.5 * p_kv->nmp[1], nqs_half3 = 0.5 * p_kv->nmp[2];
+    double nqs_half1 = 0.5 * p_kv->nmp[0];
+    double nqs_half2 = 0.5 * p_kv->nmp[1];
+    double nqs_half3 = 0.5 * p_kv->nmp[2];
     bool gamma_extrapolation = PARAM.inp.exx_gamma_extrapolation;
     if (!p_kv->get_is_mp())
     {
@@ -68,20 +70,22 @@ void Stress_PW<FPTYPE, Device>::stress_exx(ModuleBase::matrix& sigma,
         // temporarily for all k points, should be replaced to q points later
         for (int ik = 0; ik < wfcpw->nks; ik++)
         {
-            auto k_c = wfcpw->kvec_c[ik];
-            auto k_d = wfcpw->kvec_d[ik];
+            const ModuleBase::Vector3<double> k_c = wfcpw->kvec_c[ik];
+            const ModuleBase::Vector3<double> k_d = wfcpw->kvec_d[ik];
 #ifdef _OPENMP
 #pragma omp parallel for reduction(+:div)
 #endif
             for (int ig = 0; ig < rhopw->npw; ig++)
             {
-                auto q_c = k_c + rhopw->gcar[ig];
-                auto q_d = k_d + rhopw->gdirect[ig];
+                const ModuleBase::Vector3<double> q_c = k_c + rhopw->gcar[ig];
+                const ModuleBase::Vector3<double> q_d = k_d + rhopw->gdirect[ig];
                 double qq = q_c.norm2();
                 double grid_factor = 1;
                 if (gamma_extrapolation)
                 {
-                    if (isint(q_d[0] * nqs_half1) && isint(q_d[1] * nqs_half2) && isint(q_d[2] * nqs_half3))
+                    if (isint(q_d[0] * nqs_half1) &&
+                        isint(q_d[1] * nqs_half2) &&
+                        isint(q_d[2] * nqs_half3))
                     {
                         grid_factor = 0;
                     }
@@ -136,9 +140,9 @@ void Stress_PW<FPTYPE, Device>::stress_exx(ModuleBase::matrix& sigma,
         {
             double hse_omega = GlobalC::exx_info.info_global.hse_omega;
             double omega2 = hse_omega * hse_omega;
-#ifdef _OPENMP
-#pragma omp parallel for reduction(+:aa)
-#endif
+            #ifdef _OPENMP
+            #pragma omp parallel for reduction(+:aa)
+            #endif
             for (int i = 0; i < nqq; i++)
             {
                 double q = dq * (i+0.5);
@@ -158,22 +162,24 @@ void Stress_PW<FPTYPE, Device>::stress_exx(ModuleBase::matrix& sigma,
     {
         for (int iq = 0; iq < nks; iq++)
         {
-            auto k_c = wfcpw->kvec_c[ik];
-            auto k_d = wfcpw->kvec_d[ik];
-            auto q_c = wfcpw->kvec_c[iq];
-            auto q_d = wfcpw->kvec_d[iq];
+            const ModuleBase::Vector3<double> k_c = wfcpw->kvec_c[ik];
+            const ModuleBase::Vector3<double> k_d = wfcpw->kvec_d[ik];
+            const ModuleBase::Vector3<double> q_c = wfcpw->kvec_c[iq];
+            const ModuleBase::Vector3<double> q_d = wfcpw->kvec_d[iq];
 
             #ifdef _OPENMP
             #pragma omp parallel for schedule(static)
             #endif
             for (int ig = 0; ig < rhopw->npw; ig++)
             {
-                auto g_d = rhopw->gdirect[ig];
-                auto kqg_d = k_d - q_d + g_d;
+                const ModuleBase::Vector3<double> g_d = rhopw->gdirect[ig];
+                const ModuleBase::Vector3<double> kqg_d = k_d - q_d + g_d;
                 Real grid_factor = 1;
                 if (gamma_extrapolation)
                 {
-                    if (isint(kqg_d[0] * nqs_half1) && isint(kqg_d[1] * nqs_half2) && isint(kqg_d[2] * nqs_half3))
+                    if (isint(kqg_d[0] * nqs_half1) &&
+                        isint(kqg_d[1] * nqs_half2) &&
+                        isint(kqg_d[2] * nqs_half3))
                     {
                         grid_factor = 0;
                     }
@@ -183,6 +189,8 @@ void Stress_PW<FPTYPE, Device>::stress_exx(ModuleBase::matrix& sigma,
                     }
                 }
 
+                const int ig_kq = ik * nks * rhopw->npw + iq * rhopw->npw + ig;
+
                 Real gg = (k_c - q_c + rhopw->gcar[ig]).norm2() * tpiba2;
                 Real hse_omega2 = GlobalC::exx_info.info_global.hse_omega * GlobalC::exx_info.info_global.hse_omega;
                 // if (kqgcar2 > 1e-12) // vasp uses 1/40 of the smallest (k spacing)**2
@@ -192,19 +200,19 @@ void Stress_PW<FPTYPE, Device>::stress_exx(ModuleBase::matrix& sigma,
                     // if (PARAM.inp.dft_functional == "hse")
                     if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Erfc)
                     {
-                        pot[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = fac * (1.0 - std::exp(-gg / 4.0 / hse_omega2)) * grid_factor;
-                        pot_stress[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = (1.0 - (1.0 + gg / 4.0 / hse_omega2) * std::exp(-gg / 4.0 / hse_omega2)) / (1.0 - std::exp(-gg / 4.0 / hse_omega2)) / gg;
+                        pot[ig_kq] = fac * (1.0 - std::exp(-gg / 4.0 / hse_omega2)) * grid_factor;
+                        pot_stress[ig_kq] = (1.0 - (1.0 + gg / 4.0 / hse_omega2) * std::exp(-gg / 4.0 / hse_omega2)) / (1.0 - std::exp(-gg / 4.0 / hse_omega2)) / gg;
                     }
                     else if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Erf)
                     {
                         ModuleBase::WARNING("Stress_PW", "Stress for Erf is not implemented yet");
-                        pot[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = fac * grid_factor;
-                        pot_stress[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = 1.0 / gg;
+                        pot[ig_kq] = fac * grid_factor;
+                        pot_stress[ig_kq] = 1.0 / gg;
                     }
                     else if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Hf)
                     {
-                        pot[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = fac * grid_factor;
-                        pot_stress[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = 1.0 / gg;
+                        pot[ig_kq] = fac * grid_factor;
+                        pot_stress[ig_kq] = 1.0 / gg;
                     }
                 }
                 // }
@@ -213,13 +221,13 @@ void Stress_PW<FPTYPE, Device>::stress_exx(ModuleBase::matrix& sigma,
                     // if (PARAM.inp.dft_functional == "hse")
                     if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Erfc && !gamma_extrapolation)
                     {
-                        pot[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = - ModuleBase::PI * ModuleBase::e2 / hse_omega2; // maybe we should add a exx_div here, but q-e does not do that
-                        pot_stress[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = 1 / 4.0 / hse_omega2;
+                        pot[ig_kq] = - ModuleBase::PI * ModuleBase::e2 / hse_omega2; // maybe we should add a exx_div here, but q-e does not do that
+                        pot_stress[ig_kq] = 1 / 4.0 / hse_omega2;
                     }
                     else
                     {
-                        pot[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = exx_div;
-                        pot_stress[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = 0;
+                        pot[ig_kq] = exx_div;
+                        pot_stress[ig_kq] = 0;
                     }
                 }
                 // assert(is_finite(density_recip[ig]));
@@ -273,13 +281,14 @@ void Stress_PW<FPTYPE, Device>::stress_exx(ModuleBase::matrix& sigma,
                             #endif
                             for (int ig = 0; ig < rhopw->npw; ig++)
                             {
-                                auto kqg = wfcpw->kvec_c[ik] - wfcpw->kvec_c[iq] + rhopw->gcar[ig];
+                                const ModuleBase::Vector3<double> kqg = wfcpw->kvec_c[ik] - wfcpw->kvec_c[iq] + rhopw->gcar[ig];
                                 double kqg_alpha = kqg[alpha] * tpiba;
                                 double kqg_beta = kqg[beta] * tpiba;
                                 // equation 10 of 10.1103/PhysRevB.73.125120
                                 double density_recip2 = std::real(density_recip[ig] * std::conj(density_recip[ig]));
-                                double pot_local = pot[ig + iq * rhopw->npw + ik * rhopw->npw * nqs];
-                                double pot_stress_local = pot_stress[ig + iq * rhopw->npw + ik * rhopw->npw * nqs];
+                                const int idx = ig + iq * rhopw->npw + ik * rhopw->npw * nqs;
+                                double pot_local = pot[idx];
+                                double pot_stress_local = pot_stress[idx];
                                 // if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Hf)
                                 // {
                                     sigma_ab_loc += density_recip2 * pot_local * (kqg_alpha * kqg_beta * pot_stress_local - delta_ab) ;

Original file line number	Diff line number	Diff line change
`@@ -273,7 +273,7 @@ void OperatorEXXPW<T, Device>::act_op(const int nbands,`
`273`	`273`	`setmem_complex_op()(psi_mq_real, 0, wfcpw->nrxx);`
`274`	`274`
`275`	`275`	`} // end of iq`
`276`		`- auto h_psi_nk = tmhpsi + n_iband * nbasis;`
	`276`	`+ T* h_psi_nk = tmhpsi + n_iband * nbasis;`
`277`	`277`	`Real hybrid_alpha = GlobalC::exx_info.info_global.hybrid_alpha;`
`278`	`278`	`wfcpw->real_to_recip(ctx, h_psi_real, h_psi_nk, this->ik, true, hybrid_alpha);`
`279`	`279`	`setmem_complex_op()(h_psi_real, 0, rhopw->nrxx);`
`@@ -297,7 +297,7 @@ void OperatorEXXPW<T, Device>::act_op_ace(const int nbands,`
`297`	`297`
`298`	`298`	`// std::cout << "act_op_ace" << std::endl;`
`299`	`299`	`// hpsi += -Xi^\dagger * Xi * psi`
`300`		`- auto Xi_ace = Xi_ace_k[this->ik];`
	`300`	`+ T* Xi_ace = Xi_ace_k[this->ik];`
`301`	`301`	`int nbands_tot = psi.get_nbands();`
`302`	`302`	`int nbasis_max = psi.get_nbasis();`
`303`	`303`	`// T* hpsi = nullptr;`
`@@ -545,7 +545,8 @@ void OperatorEXXPW<T, Device>::multiply_potential(T *density_recip, int ik, int`
`545`	`545`	`#endif`
`546`	`546`	`for (int ig = 0; ig < npw; ig++)`
`547`	`547`	`{`
`548`		`- density_recip[ig] = pot[ik nks * npw + iq * npw + ig];`
	`548`	`+ int ig_kq = ik * nks * npw + iq * npw + ig;`
	`549`	`+ density_recip[ig] *= pot[ig_kq];`
`549`	`550`
`550`	`551`	`}`
`551`	`552`
`@@ -557,7 +558,7 @@ const T *OperatorEXXPW<T, Device>::get_pw(const int m, const int iq) const`
`557`	`558`	`{`
`558`	`559`	`// return pws[iq].get() + m * wfcpw->npwk[iq];`
`559`	`560`	`psi.fix_kb(iq, m);`
`560`		`- auto psi_mq = psi.get_pointer();`
	`561`	`+ T* psi_mq = psi.get_pointer();`
`561`	`562`	`return psi_mq;`
`562`	`563`	`}`
`563`	`564`
`@@ -586,7 +587,9 @@ OperatorEXXPW<T, Device>::OperatorEXXPW(const OperatorEXXPW<T_in, Device_in> *op`
`586`	`587`	`template <typename T, typename Device>`
`587`	`588`	`void OperatorEXXPW<T, Device>::get_potential() const`
`588`	`589`	`{`
`589`		`- Real nqs_half1 = 0.5 * kv->nmp[0], nqs_half2 = 0.5 * kv->nmp[1], nqs_half3 = 0.5 * kv->nmp[2];`
	`590`	`+ Real nqs_half1 = 0.5 * kv->nmp[0];`
	`591`	`+ Real nqs_half2 = 0.5 * kv->nmp[1];`
	`592`	`+ Real nqs_half3 = 0.5 * kv->nmp[2];`
`590`	`593`
`591`	`594`	`int nks = wfcpw->nks, npw = rhopw->npw;`
`592`	`595`	`double tpiba2 = tpiba * tpiba;`
`@@ -595,24 +598,26 @@ void OperatorEXXPW<T, Device>::get_potential() const`
`595`	`598`	`{`
`596`	`599`	`for (int iq = 0; iq < nks; iq++)`
`597`	`600`	`{`
`598`		`- auto k_c = wfcpw->kvec_c[ik];`
`599`		`- auto k_d = wfcpw->kvec_d[ik];`
`600`		`- auto q_c = wfcpw->kvec_c[iq];`
`601`		`- auto q_d = wfcpw->kvec_d[iq];`
	`601`	`+ const ModuleBase::Vector3<double> k_c = wfcpw->kvec_c[ik];`
	`602`	`+ const ModuleBase::Vector3<double> k_d = wfcpw->kvec_d[ik];`
	`603`	`+ const ModuleBase::Vector3<double> q_c = wfcpw->kvec_c[iq];`
	`604`	`+ const ModuleBase::Vector3<double> q_d = wfcpw->kvec_d[iq];`
`602`	`605`
`603`	`606`	`#ifdef _OPENMP`
`604`	`607`	`#pragma omp parallel for schedule(static)`
`605`	`608`	`#endif`
`606`	`609`	`for (int ig = 0; ig < rhopw->npw; ig++)`
`607`	`610`	`{`
`608`		`- auto g_d = rhopw->gdirect[ig];`
`609`		`- auto kqg_d = k_d - q_d + g_d;`
	`611`	`+ const ModuleBase::Vector3<double> g_d = rhopw->gdirect[ig];`
	`612`	`+ const ModuleBase::Vector3<double> kqg_d = k_d - q_d + g_d;`
`610`	`613`	`Real grid_factor = 1;`
`611`	`614`	`if (gamma_extrapolation)`
`612`	`615`	`{`
`613`	`616`	`// if isint(kqg_d[0] * nqs_half1) && isint(kqg_d[1] * nqs_half2) && isint(kqg_d[2] * nqs_half3)`
`614`	`617`	`auto isint = [](double x) { return std::abs(x - std::round(x)) < 1e-6; };`
`615`		`- if (isint(kqg_d[0] * nqs_half1) && isint(kqg_d[1] * nqs_half2) && isint(kqg_d[2] * nqs_half3))`
	`618`	`+ if (isint(kqg_d[0] * nqs_half1) &&`
	`619`	`+ isint(kqg_d[1] * nqs_half2) &&`
	`620`	`+ isint(kqg_d[2] * nqs_half3))`
`616`	`621`	`{`
`617`	`622`	`grid_factor = 0;`
`618`	`623`	`}`
`@@ -622,6 +627,8 @@ void OperatorEXXPW<T, Device>::get_potential() const`
`622`	`627`	`}`
`623`	`628`	`}`
`624`	`629`
	`630`	`+ const int ig_kq = ik * nks * npw + iq * npw + ig;`
	`631`	`+`
`625`	`632`	`Real gg = (k_c - q_c + rhopw->gcar[ig]).norm2() * tpiba2;`
`626`	`633`	`Real hse_omega2 = GlobalC::exx_info.info_global.hse_omega * GlobalC::exx_info.info_global.hse_omega;`
`627`	`634`	`// if (kqgcar2 > 1e-12) // vasp uses 1/40 of the smallest (k spacing)**2`
`@@ -631,28 +638,29 @@ void OperatorEXXPW<T, Device>::get_potential() const`
`631`	`638`	`// if (PARAM.inp.dft_functional == "hse")`
`632`	`639`	`if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Erfc)`
`633`	`640`	`{`
`634`		`- pot[ik * nks * npw + iq * npw + ig] = fac * (1.0 - std::exp(-gg / 4.0 / hse_omega2)) * grid_factor;`
	`641`	`+ pot[ig_kq] = fac * (1.0 - std::exp(-gg / 4.0 / hse_omega2)) * grid_factor;`
`635`	`642`	`}`
`636`	`643`	`else if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Erf)`
`637`	`644`	`{`
`638`		`- pot[ik * nks * npw + iq * npw + ig] = fac * (std::exp(-gg / 4.0 / hse_omega2)) * grid_factor;`
	`645`	`+ pot[ig_kq] = fac * (std::exp(-gg / 4.0 / hse_omega2)) * grid_factor;`
`639`	`646`	`}`
`640`	`647`	`else`
`641`	`648`	`{`
`642`		`- pot[ik * nks * npw + iq * npw + ig] = fac * grid_factor;`
	`649`	`+ pot[ig_kq] = fac * grid_factor;`
`643`	`650`	`}`
`644`	`651`	`}`
`645`	`652`	`// }`
`646`	`653`	`else`
`647`	`654`	`{`
`648`	`655`	`// if (PARAM.inp.dft_functional == "hse")`
`649`		`- if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Erfc && !gamma_extrapolation)`
	`656`	`+ if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Erfc &&`
	`657`	`+ !gamma_extrapolation)`
`650`	`658`	`{`
`651`		`- pot[ik * nks * npw + iq * npw + ig] = exx_div - ModuleBase::PI * ModuleBase::e2 / hse_omega2;`
	`659`	`+ pot[ig_kq] = exx_div - ModuleBase::PI * ModuleBase::e2 / hse_omega2;`
`652`	`660`	`}`
`653`	`661`	`else`
`654`	`662`	`{`
`655`		`- pot[ik * nks * npw + iq * npw + ig] = exx_div;`
	`663`	`+ pot[ig_kq] = exx_div;`
`656`	`664`	`}`
`657`	`665`	`}`
`658`	`666`	`// assert(is_finite(density_recip[ig]));`
`@@ -669,7 +677,9 @@ void OperatorEXXPW<T, Device>::exx_divergence()`
`669`	`677`	`return;`
`670`	`678`	`}`
`671`	`679`
`672`		`- Real nqs_half1 = 0.5 * kv->nmp[0], nqs_half2 = 0.5 * kv->nmp[1], nqs_half3 = 0.5 * kv->nmp[2];`
	`680`	`+ Real nqs_half1 = 0.5 * kv->nmp[0];`
	`681`	`+ Real nqs_half2 = 0.5 * kv->nmp[1];`
	`682`	`+ Real nqs_half3 = 0.5 * kv->nmp[2];`
`673`	`683`
`674`	`684`	`// here we follow the exx_divergence subroutine in q-e (PW/src/exx_base.f90)`
`675`	`685`	`double alpha = 10.0 / wfcpw->gk_ecut;`
`@@ -680,21 +690,23 @@ void OperatorEXXPW<T, Device>::exx_divergence()`
`680`	`690`	`// temporarily for all k points, should be replaced to q points later`
`681`	`691`	`for (int ik = 0; ik < wfcpw->nks; ik++)`
`682`	`692`	`{`
`683`		`- auto k_c = wfcpw->kvec_c[ik];`
`684`		`- auto k_d = wfcpw->kvec_d[ik];`
	`693`	`+ const ModuleBase::Vector3<double> k_c = wfcpw->kvec_c[ik];`
	`694`	`+ const ModuleBase::Vector3<double> k_d = wfcpw->kvec_d[ik];`
`685`	`695`	`#ifdef _OPENMP`
`686`	`696`	`#pragma omp parallel for reduction(+:div)`
`687`	`697`	`#endif`
`688`	`698`	`for (int ig = 0; ig < rhopw->npw; ig++)`
`689`	`699`	`{`
`690`		`- auto q_c = k_c + rhopw->gcar[ig];`
`691`		`- auto q_d = k_d + rhopw->gdirect[ig];`
	`700`	`+ const ModuleBase::Vector3<double> q_c = k_c + rhopw->gcar[ig];`
	`701`	`+ const ModuleBase::Vector3<double> q_d = k_d + rhopw->gdirect[ig];`
`692`	`702`	`double qq = q_c.norm2();`
`693`	`703`	`Real grid_factor = 1;`
`694`	`704`	`if (gamma_extrapolation)`
`695`	`705`	`{`
`696`	`706`	`auto isint = [](double x) { return std::abs(x - std::round(x)) < 1e-6; };`
`697`		`- if (isint(q_d[0] * nqs_half1) && isint(q_d[1] * nqs_half2) && isint(q_d[2] * nqs_half3))`
	`707`	`+ if (isint(q_d[0] * nqs_half1) &&`
	`708`	`+ isint(q_d[1] * nqs_half2) &&`
	`709`	`+ isint(q_d[2] * nqs_half3))`
`698`	`710`	`{`
`699`	`711`	`grid_factor = 0;`
`700`	`712`	`}`
`@@ -797,14 +809,14 @@ double OperatorEXXPW<T, Device>::cal_exx_energy_ace(psi::Psi<T, Device> *ppsi_)`
`797`	`809`	`setmem_complex_op()(h_psi_ace, 0, psi_.get_nbands() * psi_.get_nbasis());`
`798`	`810`	`*ik_ = i;`
`799`	`811`	`psi_.fix_kb(i, 0);`
`800`		`- auto psi_i = psi_.get_pointer();`
	`812`	`+ T* psi_i = psi_.get_pointer();`
`801`	`813`	`act_op_ace(psi_.get_nbands(), psi_.get_nbasis(), 1, psi_i, h_psi_ace, 0, true);`
`802`	`814`
`803`	`815`	`for (int nband = 0; nband < psi_.get_nbands(); nband++)`
`804`	`816`	`{`
`805`	`817`	`psi_.fix_kb(i, nband);`
`806`		`- auto psi_i_n = psi_.get_pointer();`
`807`		`- auto hpsi_i_n = h_psi_ace + nband * psi_.get_nbasis();`
	`818`	`+ T* psi_i_n = psi_.get_pointer();`
	`819`	`+ T* hpsi_i_n = h_psi_ace + nband * psi_.get_nbasis();`
`808`	`820`	`double wg_i_n = (*wg)(i, nband);`
`809`	`821`	`// Eexx += dot(psi_i_n, h_psi_i_n)`
`810`	`822`	`Eexx += dot_op()(psi_.get_nbasis(), psi_i_n, hpsi_i_n, false) * wg_i_n * 2;`
Original file line number	Diff line number	Diff line change
`@@ -9,7 +9,9 @@ void Stress_PW<FPTYPE, Device>::stress_exx(ModuleBase::matrix& sigma,`
`9`	`9`	`const K_Vectors *p_kv,`
`10`	`10`	`const psi::Psi<complex<FPTYPE>, Device>* d_psi_in, const UnitCell& ucell)`
`11`	`11`	`{`
`12`		`- double nqs_half1 = 0.5 * p_kv->nmp[0], nqs_half2 = 0.5 * p_kv->nmp[1], nqs_half3 = 0.5 * p_kv->nmp[2];`
	`12`	`+ double nqs_half1 = 0.5 * p_kv->nmp[0];`
	`13`	`+ double nqs_half2 = 0.5 * p_kv->nmp[1];`
	`14`	`+ double nqs_half3 = 0.5 * p_kv->nmp[2];`
`13`	`15`	`bool gamma_extrapolation = PARAM.inp.exx_gamma_extrapolation;`
`14`	`16`	`if (!p_kv->get_is_mp())`
`15`	`17`	`{`
`@@ -68,20 +70,22 @@ void Stress_PW<FPTYPE, Device>::stress_exx(ModuleBase::matrix& sigma,`
`68`	`70`	`// temporarily for all k points, should be replaced to q points later`
`69`	`71`	`for (int ik = 0; ik < wfcpw->nks; ik++)`
`70`	`72`	`{`
`71`		`- auto k_c = wfcpw->kvec_c[ik];`
`72`		`- auto k_d = wfcpw->kvec_d[ik];`
	`73`	`+ const ModuleBase::Vector3<double> k_c = wfcpw->kvec_c[ik];`
	`74`	`+ const ModuleBase::Vector3<double> k_d = wfcpw->kvec_d[ik];`
`73`	`75`	`#ifdef _OPENMP`
`74`	`76`	`#pragma omp parallel for reduction(+:div)`
`75`	`77`	`#endif`
`76`	`78`	`for (int ig = 0; ig < rhopw->npw; ig++)`
`77`	`79`	`{`
`78`		`- auto q_c = k_c + rhopw->gcar[ig];`
`79`		`- auto q_d = k_d + rhopw->gdirect[ig];`
	`80`	`+ const ModuleBase::Vector3<double> q_c = k_c + rhopw->gcar[ig];`
	`81`	`+ const ModuleBase::Vector3<double> q_d = k_d + rhopw->gdirect[ig];`
`80`	`82`	`double qq = q_c.norm2();`
`81`	`83`	`double grid_factor = 1;`
`82`	`84`	`if (gamma_extrapolation)`
`83`	`85`	`{`
`84`		`- if (isint(q_d[0] * nqs_half1) && isint(q_d[1] * nqs_half2) && isint(q_d[2] * nqs_half3))`
	`86`	`+ if (isint(q_d[0] * nqs_half1) &&`
	`87`	`+ isint(q_d[1] * nqs_half2) &&`
	`88`	`+ isint(q_d[2] * nqs_half3))`
`85`	`89`	`{`
`86`	`90`	`grid_factor = 0;`
`87`	`91`	`}`
`@@ -136,9 +140,9 @@ void Stress_PW<FPTYPE, Device>::stress_exx(ModuleBase::matrix& sigma,`
`136`	`140`	`{`
`137`	`141`	`double hse_omega = GlobalC::exx_info.info_global.hse_omega;`
`138`	`142`	`double omega2 = hse_omega * hse_omega;`
`139`		`-#ifdef _OPENMP`
`140`		`-#pragma omp parallel for reduction(+:aa)`
`141`		`-#endif`
	`143`	`+ #ifdef _OPENMP`
	`144`	`+ #pragma omp parallel for reduction(+:aa)`
	`145`	`+ #endif`
`142`	`146`	`for (int i = 0; i < nqq; i++)`
`143`	`147`	`{`
`144`	`148`	`double q = dq * (i+0.5);`
`@@ -158,22 +162,24 @@ void Stress_PW<FPTYPE, Device>::stress_exx(ModuleBase::matrix& sigma,`
`158`	`162`	`{`
`159`	`163`	`for (int iq = 0; iq < nks; iq++)`
`160`	`164`	`{`
`161`		`- auto k_c = wfcpw->kvec_c[ik];`
`162`		`- auto k_d = wfcpw->kvec_d[ik];`
`163`		`- auto q_c = wfcpw->kvec_c[iq];`
`164`		`- auto q_d = wfcpw->kvec_d[iq];`
	`165`	`+ const ModuleBase::Vector3<double> k_c = wfcpw->kvec_c[ik];`
	`166`	`+ const ModuleBase::Vector3<double> k_d = wfcpw->kvec_d[ik];`
	`167`	`+ const ModuleBase::Vector3<double> q_c = wfcpw->kvec_c[iq];`
	`168`	`+ const ModuleBase::Vector3<double> q_d = wfcpw->kvec_d[iq];`
`165`	`169`
`166`	`170`	`#ifdef _OPENMP`
`167`	`171`	`#pragma omp parallel for schedule(static)`
`168`	`172`	`#endif`
`169`	`173`	`for (int ig = 0; ig < rhopw->npw; ig++)`
`170`	`174`	`{`
`171`		`- auto g_d = rhopw->gdirect[ig];`
`172`		`- auto kqg_d = k_d - q_d + g_d;`
	`175`	`+ const ModuleBase::Vector3<double> g_d = rhopw->gdirect[ig];`
	`176`	`+ const ModuleBase::Vector3<double> kqg_d = k_d - q_d + g_d;`
`173`	`177`	`Real grid_factor = 1;`
`174`	`178`	`if (gamma_extrapolation)`
`175`	`179`	`{`
`176`		`- if (isint(kqg_d[0] * nqs_half1) && isint(kqg_d[1] * nqs_half2) && isint(kqg_d[2] * nqs_half3))`
	`180`	`+ if (isint(kqg_d[0] * nqs_half1) &&`
	`181`	`+ isint(kqg_d[1] * nqs_half2) &&`
	`182`	`+ isint(kqg_d[2] * nqs_half3))`
`177`	`183`	`{`
`178`	`184`	`grid_factor = 0;`
`179`	`185`	`}`
`@@ -183,6 +189,8 @@ void Stress_PW<FPTYPE, Device>::stress_exx(ModuleBase::matrix& sigma,`
`183`	`189`	`}`
`184`	`190`	`}`
`185`	`191`
	`192`	`+ const int ig_kq = ik * nks * rhopw->npw + iq * rhopw->npw + ig;`
	`193`	`+`
`186`	`194`	`Real gg = (k_c - q_c + rhopw->gcar[ig]).norm2() * tpiba2;`
`187`	`195`	`Real hse_omega2 = GlobalC::exx_info.info_global.hse_omega * GlobalC::exx_info.info_global.hse_omega;`
`188`	`196`	`// if (kqgcar2 > 1e-12) // vasp uses 1/40 of the smallest (k spacing)**2`
`@@ -192,19 +200,19 @@ void Stress_PW<FPTYPE, Device>::stress_exx(ModuleBase::matrix& sigma,`
`192`	`200`	`// if (PARAM.inp.dft_functional == "hse")`
`193`	`201`	`if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Erfc)`
`194`	`202`	`{`
`195`		`- pot[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = fac * (1.0 - std::exp(-gg / 4.0 / hse_omega2)) * grid_factor;`
`196`		`- pot_stress[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = (1.0 - (1.0 + gg / 4.0 / hse_omega2) * std::exp(-gg / 4.0 / hse_omega2)) / (1.0 - std::exp(-gg / 4.0 / hse_omega2)) / gg;`
	`203`	`+ pot[ig_kq] = fac * (1.0 - std::exp(-gg / 4.0 / hse_omega2)) * grid_factor;`
	`204`	`+ pot_stress[ig_kq] = (1.0 - (1.0 + gg / 4.0 / hse_omega2) * std::exp(-gg / 4.0 / hse_omega2)) / (1.0 - std::exp(-gg / 4.0 / hse_omega2)) / gg;`
`197`	`205`	`}`
`198`	`206`	`else if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Erf)`
`199`	`207`	`{`
`200`	`208`	`ModuleBase::WARNING("Stress_PW", "Stress for Erf is not implemented yet");`
`201`		`- pot[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = fac * grid_factor;`
`202`		`- pot_stress[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = 1.0 / gg;`
	`209`	`+ pot[ig_kq] = fac * grid_factor;`
	`210`	`+ pot_stress[ig_kq] = 1.0 / gg;`
`203`	`211`	`}`
`204`	`212`	`else if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Hf)`
`205`	`213`	`{`
`206`		`- pot[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = fac * grid_factor;`
`207`		`- pot_stress[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = 1.0 / gg;`
	`214`	`+ pot[ig_kq] = fac * grid_factor;`
	`215`	`+ pot_stress[ig_kq] = 1.0 / gg;`
`208`	`216`	`}`
`209`	`217`	`}`
`210`	`218`	`// }`
`@@ -213,13 +221,13 @@ void Stress_PW<FPTYPE, Device>::stress_exx(ModuleBase::matrix& sigma,`
`213`	`221`	`// if (PARAM.inp.dft_functional == "hse")`
`214`	`222`	`if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Erfc && !gamma_extrapolation)`
`215`	`223`	`{`
`216`		`- pot[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = - ModuleBase::PI * ModuleBase::e2 / hse_omega2; // maybe we should add a exx_div here, but q-e does not do that`
`217`		`- pot_stress[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = 1 / 4.0 / hse_omega2;`
	`224`	`+ pot[ig_kq] = - ModuleBase::PI * ModuleBase::e2 / hse_omega2; // maybe we should add a exx_div here, but q-e does not do that`
	`225`	`+ pot_stress[ig_kq] = 1 / 4.0 / hse_omega2;`
`218`	`226`	`}`
`219`	`227`	`else`
`220`	`228`	`{`
`221`		`- pot[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = exx_div;`
`222`		`- pot_stress[ik * nks * rhopw->npw + iq * rhopw->npw + ig] = 0;`
	`229`	`+ pot[ig_kq] = exx_div;`
	`230`	`+ pot_stress[ig_kq] = 0;`
`223`	`231`	`}`
`224`	`232`	`}`
`225`	`233`	`// assert(is_finite(density_recip[ig]));`
`@@ -273,13 +281,14 @@ void Stress_PW<FPTYPE, Device>::stress_exx(ModuleBase::matrix& sigma,`
`273`	`281`	`#endif`
`274`	`282`	`for (int ig = 0; ig < rhopw->npw; ig++)`
`275`	`283`	`{`
`276`		`- auto kqg = wfcpw->kvec_c[ik] - wfcpw->kvec_c[iq] + rhopw->gcar[ig];`
	`284`	`+ const ModuleBase::Vector3<double> kqg = wfcpw->kvec_c[ik] - wfcpw->kvec_c[iq] + rhopw->gcar[ig];`
`277`	`285`	`double kqg_alpha = kqg[alpha] * tpiba;`
`278`	`286`	`double kqg_beta = kqg[beta] * tpiba;`
`279`	`287`	`// equation 10 of 10.1103/PhysRevB.73.125120`
`280`	`288`	`double density_recip2 = std::real(density_recip[ig] * std::conj(density_recip[ig]));`
`281`		`- double pot_local = pot[ig + iq * rhopw->npw + ik * rhopw->npw * nqs];`
`282`		`- double pot_stress_local = pot_stress[ig + iq * rhopw->npw + ik * rhopw->npw * nqs];`
	`289`	`+ const int idx = ig + iq * rhopw->npw + ik * rhopw->npw * nqs;`
	`290`	`+ double pot_local = pot[idx];`
	`291`	`+ double pot_stress_local = pot_stress[idx];`
`283`	`292`	`// if (GlobalC::exx_info.info_global.ccp_type == Conv_Coulomb_Pot_K::Ccp_Type::Hf)`
`284`	`293`	`// {`
`285`	`294`	`sigma_ab_loc += density_recip2 * pot_local * (kqg_alpha * kqg_beta * pot_stress_local - delta_ab) ;`