From e2b3bbdf13b048c002e48da3412de7ceec40633f Mon Sep 17 00:00:00 2001 From: ErjieWu Date: Tue, 26 Aug 2025 17:30:34 +0800 Subject: [PATCH] Accelerate cal_overlap. --- source/source_base/vector3.h | 6 +- .../module_ao/ORB_gaunt_table.cpp | 7 -- .../source_basis/module_ao/ORB_gaunt_table.h | 5 +- source/source_lcao/center2_orb-orb11.cpp | 69 +++++++++++-------- source/source_lcao/center2_orb-orb21.cpp | 22 ++++-- 5 files changed, 64 insertions(+), 45 deletions(-) diff --git a/source/source_base/vector3.h b/source/source_base/vector3.h index 3540018ae0..39cdbeb014 100644 --- a/source/source_base/vector3.h +++ b/source/source_base/vector3.h @@ -170,11 +170,11 @@ template class Vector3 } /** - * @brief Get the square of nomr of a Vector3 + * @brief Get the square of norm of a Vector3 * * @return T */ - T norm2(void) const + inline T norm2(void) const { return x * x + y * y + z * z; } @@ -184,7 +184,7 @@ template class Vector3 * * @return T */ - T norm(void) const + inline T norm(void) const { return sqrt(norm2()); } diff --git a/source/source_basis/module_ao/ORB_gaunt_table.cpp b/source/source_basis/module_ao/ORB_gaunt_table.cpp index e314bc87d6..b930c6bcba 100644 --- a/source/source_basis/module_ao/ORB_gaunt_table.cpp +++ b/source/source_basis/module_ao/ORB_gaunt_table.cpp @@ -154,13 +154,6 @@ void ORB_gaunt_table::init_Ylm_Gaunt } */ -int ORB_gaunt_table::get_lm_index( - const int l, - const int m) -{ - return l*l+m; -} - ///effective pointers int ORB_gaunt_table::EP_EL(const int& L) diff --git a/source/source_basis/module_ao/ORB_gaunt_table.h b/source/source_basis/module_ao/ORB_gaunt_table.h index 6e4eed550c..1e295b9125 100644 --- a/source/source_basis/module_ao/ORB_gaunt_table.h +++ b/source/source_basis/module_ao/ORB_gaunt_table.h @@ -86,7 +86,10 @@ class ORB_gaunt_table /// ------------------------------------ void init_Gaunt(const int &lmax); - static int get_lm_index(const int l, const int m); + static inline int get_lm_index(const int l, const int m) + { + return l*l+m; + } static int Index_M(const int& m); diff --git a/source/source_lcao/center2_orb-orb11.cpp b/source/source_lcao/center2_orb-orb11.cpp index 394384a3b4..9c4612d6fa 100644 --- a/source/source_lcao/center2_orb-orb11.cpp +++ b/source/source_lcao/center2_orb-orb11.cpp @@ -58,17 +58,20 @@ void Center2_Orb::Orb11::init_radial_table(const std::set& radials) const size_t rmesh = Center2_Orb::get_rmesh(this->nA.getRcut(), this->nB.getRcut(), dr_); std::set radials_used; - for (const size_t& ir: radials) { - if (ir < rmesh) { + for (const size_t& ir: radials) + { + if (ir < rmesh) + { radials_used.insert(ir); -} -} + } + } for (int LAB = std::abs(LA - LB); LAB <= LA + LB; ++LAB) { - if ((LAB - std::abs(LA - LB)) % 2 == 1) { // if LA+LB-LAB == odd, then Gaunt_Coefficients = 0 + if ((LAB - std::abs(LA - LB)) % 2 == 1) // if LA+LB-LAB == odd, then Gaunt_Coefficients = 0 + { continue; -} + } this->Table_r[LAB].resize(rmesh, 0); this->Table_dr[LAB].resize(rmesh, 0); @@ -97,9 +100,10 @@ double Center2_Orb::Orb11::cal_overlap(const ModuleBase::Vector3& RA, const double distance = (distance_true >= tiny1) ? distance_true : distance_true + tiny1; const double RcutA = this->nA.getRcut(); const double RcutB = this->nB.getRcut(); - if (distance > (RcutA + RcutB)) { + if (distance > (RcutA + RcutB)) + { return 0.0; -} + } const int LA = this->nA.getL(); const int LB = this->nB.getL(); @@ -112,6 +116,9 @@ double Center2_Orb::Orb11::cal_overlap(const ModuleBase::Vector3& RA, rly); double overlap = 0.0; + const int idx1 = this->MGT.get_lm_index(LA, mA); + const int idx2 = this->MGT.get_lm_index(LB, mB); + const double* Gaunt_Coefficients_ptr = &(this->MGT.Gaunt_Coefficients(idx1, idx2, 0)); for (const auto& tb_r: this->Table_r) { @@ -120,17 +127,18 @@ double Center2_Orb::Orb11::cal_overlap(const ModuleBase::Vector3& RA, for (int mAB = 0; mAB != 2 * LAB + 1; ++mAB) // const int mAB = mA + mB; { - const double Gaunt_real_A_B_AB = this->MGT.Gaunt_Coefficients(this->MGT.get_lm_index(LA, mA), - this->MGT.get_lm_index(LB, mB), - this->MGT.get_lm_index(LAB, mAB)); - if (0 == Gaunt_real_A_B_AB) { + const int idx3 = this->MGT.get_lm_index(LAB, mAB); + const double Gaunt_real_A_B_AB = *(Gaunt_Coefficients_ptr + idx3); + if (0 == Gaunt_real_A_B_AB) + { continue; -} + } - const double ylm_solid = rly[this->MGT.get_lm_index(LAB, mAB)]; - if (0 == ylm_solid) { + const double ylm_solid = rly[idx3]; + if (0 == ylm_solid) + { continue; -} + } const double ylm_real = (distance > tiny2) ? ylm_solid / pow(distance, LAB) : ylm_solid; const double i_exp = std::pow(-1.0, (LA - LB - LAB) / 2); @@ -166,18 +174,23 @@ ModuleBase::Vector3 Center2_Orb::Orb11::cal_grad_overlap( // caoyu add 2 const double distance = (distance_true >= tiny1) ? distance_true : distance_true + tiny1; const double RcutA = this->nA.getRcut(); const double RcutB = this->nB.getRcut(); - if (distance > (RcutA + RcutB)) { + if (distance > (RcutA + RcutB)) + { return ModuleBase::Vector3(0.0, 0.0, 0.0); -} + } const int LA = this->nA.getL(); const int LB = this->nB.getL(); + const int idx1 = this->MGT.get_lm_index(LA, mA); + const int idx2 = this->MGT.get_lm_index(LB, mB); + const double* Gaunt_Coefficients_ptr = &(this->MGT.Gaunt_Coefficients(idx1, idx2, 0)); - std::vector rly((LA + LB + 1) * (LA + LB + 1)); + const int LAB2 = (LA + LB + 1) * (LA + LB + 1); + std::vector rly(LAB2); std::vector> grly; - ModuleBase::Array_Pool tmp_grly((LA + LB + 1) * (LA + LB + 1), 3); + ModuleBase::Array_Pool tmp_grly(LAB2, 3); ModuleBase::Ylm::grad_rl_sph_harm(LA + LB, delta_R.x, delta_R.y, delta_R.z, rly.data(), tmp_grly.get_ptr_2D()); - for (int i=0; i<(LA + LB + 1) * (LA + LB + 1); ++i) + for (int i=0; i ele(tmp_grly[i][0], tmp_grly[i][1], tmp_grly[i][2]); grly.push_back(ele); @@ -191,17 +204,17 @@ ModuleBase::Vector3 Center2_Orb::Orb11::cal_grad_overlap( // caoyu add 2 for (int mAB = 0; mAB != 2 * LAB + 1; ++mAB) // const int mAB = mA + mB; { - const double Gaunt_real_A_B_AB = this->MGT.Gaunt_Coefficients(this->MGT.get_lm_index(LA, mA), - this->MGT.get_lm_index(LB, mB), - this->MGT.get_lm_index(LAB, mAB)); - if (0 == Gaunt_real_A_B_AB) { + const int idx3 = this->MGT.get_lm_index(LAB, mAB); + const double Gaunt_real_A_B_AB = *(Gaunt_Coefficients_ptr + idx3); + if (0 == Gaunt_real_A_B_AB) + { continue; -} + } - const double ylm_solid = rly[this->MGT.get_lm_index(LAB, mAB)]; + const double ylm_solid = rly[idx3]; const double ylm_real = (distance > tiny2) ? ylm_solid / pow(distance, LAB) : ylm_solid; - const ModuleBase::Vector3 gylm_solid = grly[this->MGT.get_lm_index(LAB, mAB)]; + const ModuleBase::Vector3 gylm_solid = grly[idx3]; const ModuleBase::Vector3 gylm_real = (distance > tiny2) ? gylm_solid / pow(distance, LAB) : gylm_solid; diff --git a/source/source_lcao/center2_orb-orb21.cpp b/source/source_lcao/center2_orb-orb21.cpp index a81db0db26..fa70914f5a 100644 --- a/source/source_lcao/center2_orb-orb21.cpp +++ b/source/source_lcao/center2_orb-orb21.cpp @@ -35,7 +35,9 @@ void Center2_Orb::Orb21::init_radial_table() for (int LA = std::abs(LA1 - LA2); LA <= LA1 + LA2; ++LA) { if ((LA - std::abs(LA1 - LA2)) % 2 == 1) // if LA+LB-LAB == odd, then Gaunt_Coefficients = 0 + { continue; + } this->nA[LA].set_orbital_info(nA_short.getLabel(), nA_short.getType(), @@ -74,7 +76,9 @@ void Center2_Orb::Orb21::init_radial_table(const std::set& radials) for (int LA = std::abs(LA1 - LA2); LA <= LA1 + LA2; ++LA) { if ((LA - std::abs(LA1 - LA2)) % 2 == 1) // if LA+LB-LAB == odd, then Gaunt_Coefficients = 0 + { continue; + } this->nA[LA].set_orbital_info(nA_short.getLabel(), nA_short.getType(), @@ -106,6 +110,9 @@ double Center2_Orb::Orb21::cal_overlap(const ModuleBase::Vector3& RA, { const int LA1 = this->nA1.getL(); const int LA2 = this->nA2.getL(); + const int idx1 = this->MGT.get_lm_index(LA1, mA1); + const int idx2 = this->MGT.get_lm_index(LA2, mA2); + const double* Gaunt_Coefficients_ptr = &(this->MGT.Gaunt_Coefficients(idx1, idx2, 0)); double overlap = 0.0; @@ -116,11 +123,11 @@ double Center2_Orb::Orb21::cal_overlap(const ModuleBase::Vector3& RA, for (int mA = 0; mA != 2 * LA + 1; ++mA) // const int mA=mA1+mA2; { - const double Gaunt_real_A1_A2_A12 = this->MGT.Gaunt_Coefficients(this->MGT.get_lm_index(LA1, mA1), - this->MGT.get_lm_index(LA2, mA2), - this->MGT.get_lm_index(LA, mA)); + const double Gaunt_real_A1_A2_A12 = *(Gaunt_Coefficients_ptr + this->MGT.get_lm_index(LA, mA)); if (0 == Gaunt_real_A1_A2_A12) + { continue; + } overlap += Gaunt_real_A1_A2_A12 * orb11.second.cal_overlap(RA, RB, mA, mB); } @@ -137,6 +144,9 @@ ModuleBase::Vector3 Center2_Orb::Orb21::cal_grad_overlap(const ModuleBas { const int LA1 = this->nA1.getL(); const int LA2 = this->nA2.getL(); + const int idx1 = this->MGT.get_lm_index(LA1, mA1); + const int idx2 = this->MGT.get_lm_index(LA2, mA2); + const double* Gaunt_Coefficients_ptr = &(this->MGT.Gaunt_Coefficients(idx1, idx2, 0)); ModuleBase::Vector3 grad_overlap(0.0, 0.0, 0.0); @@ -147,11 +157,11 @@ ModuleBase::Vector3 Center2_Orb::Orb21::cal_grad_overlap(const ModuleBas for (int mA = 0; mA != 2 * LA + 1; ++mA) // const int mA=mA1+mA2; { - const double Gaunt_real_A1_A2_A12 = this->MGT.Gaunt_Coefficients(this->MGT.get_lm_index(LA1, mA1), - this->MGT.get_lm_index(LA2, mA2), - this->MGT.get_lm_index(LA, mA)); + const double Gaunt_real_A1_A2_A12 = *(Gaunt_Coefficients_ptr + this->MGT.get_lm_index(LA, mA)); if (0 == Gaunt_real_A1_A2_A12) + { continue; + } grad_overlap += Gaunt_real_A1_A2_A12 * orb11.second.cal_grad_overlap(RA, RB, mA, mB); }