Fix RT-TDDFT current calculation crash using >20 MPI processes

AsTonyshment · AsTonyshment · commit 6b415e3d8882 · 2025-03-05T16:01:20.000+08:00
diff --git a/source/module_hamilt_lcao/module_tddft/td_current.cpp b/source/module_hamilt_lcao/module_tddft/td_current.cpp
@@ -4,8 +4,8 @@
 #include "module_base/tool_title.h"
 #include "module_hamilt_lcao/module_tddft/snap_psibeta_half_tddft.h"
 #ifdef _OPENMP
-#include <unordered_set>
 #include <omp.h>
+#include <unordered_set>
 #endif
 
 TD_current::TD_current(const UnitCell* ucell_in,
@@ -14,28 +14,28 @@ TD_current::TD_current(const UnitCell* ucell_in,
                        const LCAO_Orbitals& orb,
                        const TwoCenterIntegrator* intor)
     : ucell(ucell_in), paraV(paraV), orb_(orb), Grid(GridD_in), intor_(intor)
-{   
+{
     // for length gague, the A(t) = 0 for all the time.
-    this->cart_At = ModuleBase::Vector3<double>(0,0,0);
+    this->cart_At = ModuleBase::Vector3<double>(0, 0, 0);
     this->initialize_vcomm_r(GridD_in, paraV);
     this->initialize_grad_term(GridD_in, paraV);
 }
 TD_current::~TD_current()
 {
-    for (int dir=0;dir<3;dir++)
+    for (int dir = 0; dir < 3; dir++)
     {
         delete this->current_term[dir];
     }
 }
-//allocate space for current_term
+// allocate space for current_term
 void TD_current::initialize_vcomm_r(const Grid_Driver* GridD, const Parallel_Orbitals* paraV)
 {
     ModuleBase::TITLE("TD_current", "initialize_vcomm_r");
     ModuleBase::timer::tick("TD_current", "initialize_vcomm_r");
-    for (int dir=0;dir<3;dir++)
+    for (int dir = 0; dir < 3; dir++)
     {
         if (this->current_term[dir] == nullptr)
-        this->current_term[dir] = new hamilt::HContainer<std::complex<double>>(paraV);
+            this->current_term[dir] = new hamilt::HContainer<std::complex<double>>(paraV);
     }
 
     this->adjs_vcommr.clear();
@@ -56,8 +56,8 @@ void TD_current::initialize_vcomm_r(const Grid_Driver* GridD, const Parallel_Orb
             const ModuleBase::Vector3<double>& tau1 = adjs.adjacent_tau[ad1];
             const ModuleBase::Vector3<int>& R_index1 = adjs.box[ad1];
             // choose the real adjacent atoms
-            // Note: the distance of atoms should less than the cutoff radius, 
-            // When equal, the theoretical value of matrix element is zero, 
+            // Note: the distance of atoms should less than the cutoff radius,
+            // When equal, the theoretical value of matrix element is zero,
             // but the calculated value is not zero due to the numerical error, which would lead to result changes.
             if (this->ucell->cal_dtau(iat0, iat1, R_index1).norm() * this->ucell->lat0
                 < orb_.Phi[T1].getRcut() + this->ucell->infoNL.Beta[T0].get_rcut_max())
@@ -84,20 +84,20 @@ void TD_current::initialize_vcomm_r(const Grid_Driver* GridD, const Parallel_Orb
                     continue;
                 }
                 hamilt::AtomPair<std::complex<double>> tmp(iat1,
-                                         iat2,
-                                         R_index2.x - R_index1.x,
-                                         R_index2.y - R_index1.y,
-                                         R_index2.z - R_index1.z,
-                                         paraV);
-                for (int dir=0;dir<3;dir++)
+                                                           iat2,
+                                                           R_index2.x - R_index1.x,
+                                                           R_index2.y - R_index1.y,
+                                                           R_index2.z - R_index1.z,
+                                                           paraV);
+                for (int dir = 0; dir < 3; dir++)
                 {
                     this->current_term[dir]->insert_pair(tmp);
                 }
             }
         }
     }
     // allocate the memory of BaseMatrix in cal_vcomm_r_IJR, and set the new values to zero
-    for (int dir=0;dir<3;dir++)
+    for (int dir = 0; dir < 3; dir++)
     {
         this->current_term[dir]->allocate(nullptr, true);
     }
@@ -108,10 +108,10 @@ void TD_current::initialize_grad_term(const Grid_Driver* GridD, const Parallel_O
     ModuleBase::TITLE("TD_current", "initialize_grad_term");
     ModuleBase::timer::tick("TD_current", "initialize_grad_term");
 
-    for (int dir=0;dir<3;dir++)
+    for (int dir = 0; dir < 3; dir++)
     {
         if (this->current_term[dir] == nullptr)
-        this->current_term[dir] = new hamilt::HContainer<std::complex<double>>(paraV);
+            this->current_term[dir] = new hamilt::HContainer<std::complex<double>>(paraV);
     }
     for (int iat1 = 0; iat1 < ucell->nat; iat1++)
     {
@@ -132,8 +132,8 @@ void TD_current::initialize_grad_term(const Grid_Driver* GridD, const Parallel_O
             }
             const ModuleBase::Vector3<int>& R_index2 = adjs.box[ad1];
             // choose the real adjacent atoms
-            // Note: the distance of atoms should less than the cutoff radius, 
-            // When equal, the theoretical value of matrix element is zero, 
+            // Note: the distance of atoms should less than the cutoff radius,
+            // When equal, the theoretical value of matrix element is zero,
             // but the calculated value is not zero due to the numerical error, which would lead to result changes.
             if (this->ucell->cal_dtau(iat1, iat2, R_index2).norm() * this->ucell->lat0
                 < orb_.Phi[T1].getRcut() + orb_.Phi[T2].getRcut())
@@ -150,14 +150,14 @@ void TD_current::initialize_grad_term(const Grid_Driver* GridD, const Parallel_O
             int iat2 = ucell->itia2iat(T2, I2);
             ModuleBase::Vector3<int>& R_index = adjs.box[ad];
             hamilt::AtomPair<std::complex<double>> tmp(iat1, iat2, R_index.x, R_index.y, R_index.z, paraV);
-            for (int dir=0;dir<3;dir++)
+            for (int dir = 0; dir < 3; dir++)
             {
                 this->current_term[dir]->insert_pair(tmp);
             }
         }
     }
     // allocate the memory of BaseMatrix in HR, and set the new values to zero
-    for (int dir=0;dir<3;dir++)
+    for (int dir = 0; dir < 3; dir++)
     {
         this->current_term[dir]->allocate(nullptr, true);
     }
@@ -188,9 +188,9 @@ void TD_current::calculate_vcomm_r()
             nlm_tot[i].resize(4);
         }
 
-        #pragma omp parallel
+#pragma omp parallel
         {
-            #pragma omp for schedule(dynamic)
+#pragma omp for schedule(dynamic)
             for (int ad = 0; ad < adjs.adj_num + 1; ++ad)
             {
                 const int T1 = adjs.ntype[ad];
@@ -214,27 +214,27 @@ void TD_current::calculate_vcomm_r()
 
                     // snap_psibeta_half_tddft() are used to calculate <psi|exp(-iAr)|beta>
                     // and <psi|rexp(-iAr)|beta> as well if current are needed
-                    
+
                     module_tddft::snap_psibeta_half_tddft(orb_,
-                                                            this->ucell->infoNL,
-                                                            nlm,
-                                                            tau1 * this->ucell->lat0,
-                                                            T1,
-                                                            atom1->iw2l[iw1],
-                                                            atom1->iw2m[iw1],
-                                                            atom1->iw2n[iw1],
-                                                            tau0 * this->ucell->lat0,
-                                                            T0,
-                                                            this->cart_At,
-                                                            true);
+                                                          this->ucell->infoNL,
+                                                          nlm,
+                                                          tau1 * this->ucell->lat0,
+                                                          T1,
+                                                          atom1->iw2l[iw1],
+                                                          atom1->iw2m[iw1],
+                                                          atom1->iw2n[iw1],
+                                                          tau0 * this->ucell->lat0,
+                                                          T0,
+                                                          this->cart_At,
+                                                          true);
                     for (int dir = 0; dir < 4; dir++)
                     {
                         nlm_tot[ad][dir].insert({all_indexes[iw1l], nlm[dir]});
                     }
                 }
             }
 
-    #ifdef _OPENMP
+#ifdef _OPENMP
             // record the iat number of the adjacent atoms
             std::set<int> ad_atom_set;
             for (int ad = 0; ad < adjs.adj_num + 1; ++ad)
@@ -250,28 +250,28 @@ void TD_current::calculate_vcomm_r()
             const int thread_id = omp_get_thread_num();
             std::set<int> ad_atom_set_thread;
             int i = 0;
-            for(const auto iat1 : ad_atom_set)
+            for (const auto iat1: ad_atom_set)
             {
                 if (i % num_threads == thread_id)
                 {
                     ad_atom_set_thread.insert(iat1);
                 }
                 i++;
             }
-    #endif
+#endif
 
-    // 2. calculate <psi_I|beta>D<beta|psi_{J,R}> for each pair of <IJR> atoms
+            // 2. calculate <psi_I|beta>D<beta|psi_{J,R}> for each pair of <IJR> atoms
             for (int ad1 = 0; ad1 < adjs.adj_num + 1; ++ad1)
             {
                 const int T1 = adjs.ntype[ad1];
                 const int I1 = adjs.natom[ad1];
                 const int iat1 = ucell->itia2iat(T1, I1);
-    #ifdef _OPENMP
+#ifdef _OPENMP
                 if (ad_atom_set_thread.find(iat1) == ad_atom_set_thread.end())
-                    {
-                        continue;
-                    }
-    #endif
+                {
+                    continue;
+                }
+#endif
                 ModuleBase::Vector3<int>& R_index1 = adjs.box[ad1];
                 for (int ad2 = 0; ad2 < adjs.adj_num + 1; ++ad2)
                 {
@@ -280,23 +280,22 @@ void TD_current::calculate_vcomm_r()
                     const int iat2 = ucell->itia2iat(T2, I2);
                     ModuleBase::Vector3<int>& R_index2 = adjs.box[ad2];
                     ModuleBase::Vector3<int> R_vector(R_index2[0] - R_index1[0],
-                                                    R_index2[1] - R_index1[1],
-                                                    R_index2[2] - R_index1[2]);
+                                                      R_index2[1] - R_index1[1],
+                                                      R_index2[2] - R_index1[2]);
                     std::complex<double>* tmp_c[3] = {nullptr, nullptr, nullptr};
                     for (int i = 0; i < 3; i++)
                     {
-                        tmp_c[i] = this->current_term[i]->find_matrix(iat1, iat2, R_vector[0], R_vector[1], R_vector[2])->get_pointer();
+                        hamilt::BaseMatrix<std::complex<double>>* matrix_ptr
+                            = this->current_term[i]->find_matrix(iat1, iat2, R_vector[0], R_vector[1], R_vector[2]);
+                        if (matrix_ptr != nullptr)
+                        {
+                            tmp_c[i] = matrix_ptr->get_pointer();
+                        }
                     }
                     // if not found , skip this pair of atoms
                     if (tmp_c[0] != nullptr)
                     {
-                        this->cal_vcomm_r_IJR(iat1,
-                                            iat2,
-                                            T0,
-                                            paraV,
-                                            nlm_tot[ad1],
-                                            nlm_tot[ad2],
-                                            tmp_c);
+                        this->cal_vcomm_r_IJR(iat1, iat2, T0, paraV, nlm_tot[ad1], nlm_tot[ad2], tmp_c);
                     }
                 }
             }
@@ -368,8 +367,8 @@ void TD_current::cal_vcomm_r_IJR(
                         //<psi|rexp(-iAr)|beta><beta|exp(iAr)|psi>-<psi|exp(-iAr)|beta><beta|rexp(iAr)|psi>
                         // multiply d in the end
                         nlm_r_tmp += (nlm1[dir + 1]->at(p1) * std::conj(nlm2[0]->at(p2))
-                                        - nlm1[0]->at(p1) * std::conj(nlm2[dir + 1]->at(p2)))
-                                        * (*tmp_d);
+                                      - nlm1[0]->at(p1) * std::conj(nlm2[dir + 1]->at(p2)))
+                                     * (*tmp_d);
                     }
                     // -i[r,Vnl], 2.0 due to the unit transformation
                     current_mat_p[dir][step_trace[is]] -= imag_unit * nlm_r_tmp / 2.0;
@@ -390,7 +389,7 @@ void TD_current::cal_vcomm_r_IJR(
 void TD_current::calculate_grad_term()
 {
     ModuleBase::TITLE("TD_current", "calculate_grad_term");
-    if(this->current_term[0]==nullptr || this->current_term[0]->size_atom_pairs()<=0)
+    if (this->current_term[0] == nullptr || this->current_term[0]->size_atom_pairs() <= 0)
     {
         ModuleBase::WARNING_QUIT("TD_current::calculate_grad_term", "grad_term is nullptr or empty");
     }
@@ -417,7 +416,12 @@ void TD_current::calculate_grad_term()
             std::complex<double>* tmp_c[3] = {nullptr, nullptr, nullptr};
             for (int i = 0; i < 3; i++)
             {
-                tmp_c[i] = this->current_term[i]->find_matrix(iat1, iat2, R_index2)->get_pointer();
+                hamilt::BaseMatrix<std::complex<double>>* matrix_ptr
+                    = this->current_term[i]->find_matrix(iat1, iat2, R_index2);
+                if (matrix_ptr != nullptr)
+                {
+                    tmp_c[i] = matrix_ptr->get_pointer();
+                }
             }
             if (tmp_c[0] != nullptr)
             {
@@ -473,7 +477,7 @@ void TD_current::cal_grad_IJR(const int& iat1,
     auto row_indexes = paraV->get_indexes_row(iat1);
     auto col_indexes = paraV->get_indexes_col(iat2);
     const int step_trace = col_indexes.size() + 1;
-    for(int iw1l = 0; iw1l < row_indexes.size(); iw1l += npol)
+    for (int iw1l = 0; iw1l < row_indexes.size(); iw1l += npol)
     {
         const int iw1 = row_indexes[iw1l] / npol;
         const int L1 = iw2l1[iw1];
@@ -509,7 +513,7 @@ void TD_current::cal_grad_IJR(const int& iat1,
         for (int dir = 0; dir < 3; dir++)
         {
             current_mat_p[dir] += (npol - 1) * col_indexes.size();
-        }   
+        }
     }
 }
 

Original file line number	Diff line number	Diff line change
`@@ -4,8 +4,8 @@`
`4`	`4`	`#include "module_base/tool_title.h"`
`5`	`5`	`#include "module_hamilt_lcao/module_tddft/snap_psibeta_half_tddft.h"`
`6`	`6`	`#ifdef _OPENMP`
`7`		`-#include <unordered_set>`
`8`	`7`	`#include <omp.h>`
	`8`	`+#include <unordered_set>`
`9`	`9`	`#endif`
`10`	`10`
`11`	`11`	`TD_current::TD_current(const UnitCell* ucell_in,`
`@@ -14,28 +14,28 @@ TD_current::TD_current(const UnitCell* ucell_in,`
`14`	`14`	`const LCAO_Orbitals& orb,`
`15`	`15`	`const TwoCenterIntegrator* intor)`
`16`	`16`	`: ucell(ucell_in), paraV(paraV), orb_(orb), Grid(GridD_in), intor_(intor)`
`17`		`-{`
	`17`	`+{`
`18`	`18`	`// for length gague, the A(t) = 0 for all the time.`
`19`		`- this->cart_At = ModuleBase::Vector3<double>(0,0,0);`
	`19`	`+ this->cart_At = ModuleBase::Vector3<double>(0, 0, 0);`
`20`	`20`	`this->initialize_vcomm_r(GridD_in, paraV);`
`21`	`21`	`this->initialize_grad_term(GridD_in, paraV);`
`22`	`22`	`}`
`23`	`23`	`TD_current::~TD_current()`
`24`	`24`	`{`
`25`		`- for (int dir=0;dir<3;dir++)`
	`25`	`+ for (int dir = 0; dir < 3; dir++)`
`26`	`26`	`{`
`27`	`27`	`delete this->current_term[dir];`
`28`	`28`	`}`
`29`	`29`	`}`
`30`		`-//allocate space for current_term`
	`30`	`+// allocate space for current_term`
`31`	`31`	`void TD_current::initialize_vcomm_r(const Grid_Driver* GridD, const Parallel_Orbitals* paraV)`
`32`	`32`	`{`
`33`	`33`	`ModuleBase::TITLE("TD_current", "initialize_vcomm_r");`
`34`	`34`	`ModuleBase::timer::tick("TD_current", "initialize_vcomm_r");`
`35`		`- for (int dir=0;dir<3;dir++)`
	`35`	`+ for (int dir = 0; dir < 3; dir++)`
`36`	`36`	`{`
`37`	`37`	`if (this->current_term[dir] == nullptr)`
`38`		`- this->current_term[dir] = new hamilt::HContainer<std::complex<double>>(paraV);`
	`38`	`+ this->current_term[dir] = new hamilt::HContainer<std::complex<double>>(paraV);`
`39`	`39`	`}`
`40`	`40`
`41`	`41`	`this->adjs_vcommr.clear();`
`@@ -56,8 +56,8 @@ void TD_current::initialize_vcomm_r(const Grid_Driver* GridD, const Parallel_Orb`
`56`	`56`	`const ModuleBase::Vector3<double>& tau1 = adjs.adjacent_tau[ad1];`
`57`	`57`	`const ModuleBase::Vector3<int>& R_index1 = adjs.box[ad1];`
`58`	`58`	`// choose the real adjacent atoms`
`59`		`- // Note: the distance of atoms should less than the cutoff radius,`
`60`		`- // When equal, the theoretical value of matrix element is zero,`
	`59`	`+ // Note: the distance of atoms should less than the cutoff radius,`
	`60`	`+ // When equal, the theoretical value of matrix element is zero,`
`61`	`61`	`// but the calculated value is not zero due to the numerical error, which would lead to result changes.`
`62`	`62`	`if (this->ucell->cal_dtau(iat0, iat1, R_index1).norm() * this->ucell->lat0`
`63`	`63`	`< orb_.Phi[T1].getRcut() + this->ucell->infoNL.Beta[T0].get_rcut_max())`
`@@ -84,20 +84,20 @@ void TD_current::initialize_vcomm_r(const Grid_Driver* GridD, const Parallel_Orb`
`84`	`84`	`continue;`
`85`	`85`	`}`
`86`	`86`	`hamilt::AtomPair<std::complex<double>> tmp(iat1,`
`87`		`- iat2,`
`88`		`- R_index2.x - R_index1.x,`
`89`		`- R_index2.y - R_index1.y,`
`90`		`- R_index2.z - R_index1.z,`
`91`		`- paraV);`
`92`		`- for (int dir=0;dir<3;dir++)`
	`87`	`+ iat2,`
	`88`	`+ R_index2.x - R_index1.x,`
	`89`	`+ R_index2.y - R_index1.y,`
	`90`	`+ R_index2.z - R_index1.z,`
	`91`	`+ paraV);`
	`92`	`+ for (int dir = 0; dir < 3; dir++)`
`93`	`93`	`{`
`94`	`94`	`this->current_term[dir]->insert_pair(tmp);`
`95`	`95`	`}`
`96`	`96`	`}`
`97`	`97`	`}`
`98`	`98`	`}`
`99`	`99`	`// allocate the memory of BaseMatrix in cal_vcomm_r_IJR, and set the new values to zero`
`100`		`- for (int dir=0;dir<3;dir++)`
	`100`	`+ for (int dir = 0; dir < 3; dir++)`
`101`	`101`	`{`
`102`	`102`	`this->current_term[dir]->allocate(nullptr, true);`
`103`	`103`	`}`
`@@ -108,10 +108,10 @@ void TD_current::initialize_grad_term(const Grid_Driver* GridD, const Parallel_O`
`108`	`108`	`ModuleBase::TITLE("TD_current", "initialize_grad_term");`
`109`	`109`	`ModuleBase::timer::tick("TD_current", "initialize_grad_term");`
`110`	`110`
`111`		`- for (int dir=0;dir<3;dir++)`
	`111`	`+ for (int dir = 0; dir < 3; dir++)`
`112`	`112`	`{`
`113`	`113`	`if (this->current_term[dir] == nullptr)`
`114`		`- this->current_term[dir] = new hamilt::HContainer<std::complex<double>>(paraV);`
	`114`	`+ this->current_term[dir] = new hamilt::HContainer<std::complex<double>>(paraV);`
`115`	`115`	`}`
`116`	`116`	`for (int iat1 = 0; iat1 < ucell->nat; iat1++)`
`117`	`117`	`{`
`@@ -132,8 +132,8 @@ void TD_current::initialize_grad_term(const Grid_Driver* GridD, const Parallel_O`
`132`	`132`	`}`
`133`	`133`	`const ModuleBase::Vector3<int>& R_index2 = adjs.box[ad1];`
`134`	`134`	`// choose the real adjacent atoms`
`135`		`- // Note: the distance of atoms should less than the cutoff radius,`
`136`		`- // When equal, the theoretical value of matrix element is zero,`
	`135`	`+ // Note: the distance of atoms should less than the cutoff radius,`
	`136`	`+ // When equal, the theoretical value of matrix element is zero,`
`137`	`137`	`// but the calculated value is not zero due to the numerical error, which would lead to result changes.`
`138`	`138`	`if (this->ucell->cal_dtau(iat1, iat2, R_index2).norm() * this->ucell->lat0`
`139`	`139`	`< orb_.Phi[T1].getRcut() + orb_.Phi[T2].getRcut())`
`@@ -150,14 +150,14 @@ void TD_current::initialize_grad_term(const Grid_Driver* GridD, const Parallel_O`
`150`	`150`	`int iat2 = ucell->itia2iat(T2, I2);`
`151`	`151`	`ModuleBase::Vector3<int>& R_index = adjs.box[ad];`
`152`	`152`	`hamilt::AtomPair<std::complex<double>> tmp(iat1, iat2, R_index.x, R_index.y, R_index.z, paraV);`
`153`		`- for (int dir=0;dir<3;dir++)`
	`153`	`+ for (int dir = 0; dir < 3; dir++)`
`154`	`154`	`{`
`155`	`155`	`this->current_term[dir]->insert_pair(tmp);`
`156`	`156`	`}`
`157`	`157`	`}`
`158`	`158`	`}`
`159`	`159`	`// allocate the memory of BaseMatrix in HR, and set the new values to zero`
`160`		`- for (int dir=0;dir<3;dir++)`
	`160`	`+ for (int dir = 0; dir < 3; dir++)`
`161`	`161`	`{`
`162`	`162`	`this->current_term[dir]->allocate(nullptr, true);`
`163`	`163`	`}`
`@@ -188,9 +188,9 @@ void TD_current::calculate_vcomm_r()`
`188`	`188`	`nlm_tot[i].resize(4);`
`189`	`189`	`}`
`190`	`190`
`191`		`- #pragma omp parallel`
	`191`	`+#pragma omp parallel`
`192`	`192`	`{`
`193`		`- #pragma omp for schedule(dynamic)`
	`193`	`+#pragma omp for schedule(dynamic)`
`194`	`194`	`for (int ad = 0; ad < adjs.adj_num + 1; ++ad)`
`195`	`195`	`{`
`196`	`196`	`const int T1 = adjs.ntype[ad];`
`@@ -214,27 +214,27 @@ void TD_current::calculate_vcomm_r()`
`214`	`214`
`215`	`215`	`// snap_psibeta_half_tddft() are used to calculate <psi\|exp(-iAr)\|beta>`
`216`	`216`	`// and <psi\|rexp(-iAr)\|beta> as well if current are needed`
`217`		`-`
	`217`	`+`
`218`	`218`	`module_tddft::snap_psibeta_half_tddft(orb_,`
`219`		`- this->ucell->infoNL,`
`220`		`- nlm,`
`221`		`- tau1 * this->ucell->lat0,`
`222`		`- T1,`
`223`		`- atom1->iw2l[iw1],`
`224`		`- atom1->iw2m[iw1],`
`225`		`- atom1->iw2n[iw1],`
`226`		`- tau0 * this->ucell->lat0,`
`227`		`- T0,`
`228`		`- this->cart_At,`
`229`		`- true);`
	`219`	`+ this->ucell->infoNL,`
	`220`	`+ nlm,`
	`221`	`+ tau1 * this->ucell->lat0,`
	`222`	`+ T1,`
	`223`	`+ atom1->iw2l[iw1],`
	`224`	`+ atom1->iw2m[iw1],`
	`225`	`+ atom1->iw2n[iw1],`
	`226`	`+ tau0 * this->ucell->lat0,`
	`227`	`+ T0,`
	`228`	`+ this->cart_At,`
	`229`	`+ true);`
`230`	`230`	`for (int dir = 0; dir < 4; dir++)`
`231`	`231`	`{`
`232`	`232`	`nlm_tot[ad][dir].insert({all_indexes[iw1l], nlm[dir]});`
`233`	`233`	`}`
`234`	`234`	`}`
`235`	`235`	`}`
`236`	`236`
`237`		`- #ifdef _OPENMP`
	`237`	`+#ifdef _OPENMP`
`238`	`238`	`// record the iat number of the adjacent atoms`
`239`	`239`	`std::set<int> ad_atom_set;`
`240`	`240`	`for (int ad = 0; ad < adjs.adj_num + 1; ++ad)`
`@@ -250,28 +250,28 @@ void TD_current::calculate_vcomm_r()`
`250`	`250`	`const int thread_id = omp_get_thread_num();`
`251`	`251`	`std::set<int> ad_atom_set_thread;`
`252`	`252`	`int i = 0;`
`253`		`- for(const auto iat1 : ad_atom_set)`
	`253`	`+ for (const auto iat1: ad_atom_set)`
`254`	`254`	`{`
`255`	`255`	`if (i % num_threads == thread_id)`
`256`	`256`	`{`
`257`	`257`	`ad_atom_set_thread.insert(iat1);`
`258`	`258`	`}`
`259`	`259`	`i++;`
`260`	`260`	`}`
`261`		`- #endif`
	`261`	`+#endif`
`262`	`262`
`263`		`- // 2. calculate <psi_I\|beta>D<beta\|psi_{J,R}> for each pair of <IJR> atoms`
	`263`	`+ // 2. calculate <psi_I\|beta>D<beta\|psi_{J,R}> for each pair of <IJR> atoms`
`264`	`264`	`for (int ad1 = 0; ad1 < adjs.adj_num + 1; ++ad1)`
`265`	`265`	`{`
`266`	`266`	`const int T1 = adjs.ntype[ad1];`
`267`	`267`	`const int I1 = adjs.natom[ad1];`
`268`	`268`	`const int iat1 = ucell->itia2iat(T1, I1);`
`269`		`- #ifdef _OPENMP`
	`269`	`+#ifdef _OPENMP`
`270`	`270`	`if (ad_atom_set_thread.find(iat1) == ad_atom_set_thread.end())`
`271`		`- {`
`272`		`- continue;`
`273`		`- }`
`274`		`- #endif`
	`271`	`+ {`
	`272`	`+ continue;`
	`273`	`+ }`
	`274`	`+#endif`
`275`	`275`	`ModuleBase::Vector3<int>& R_index1 = adjs.box[ad1];`
`276`	`276`	`for (int ad2 = 0; ad2 < adjs.adj_num + 1; ++ad2)`
`277`	`277`	`{`
`@@ -280,23 +280,22 @@ void TD_current::calculate_vcomm_r()`
`280`	`280`	`const int iat2 = ucell->itia2iat(T2, I2);`
`281`	`281`	`ModuleBase::Vector3<int>& R_index2 = adjs.box[ad2];`
`282`	`282`	`ModuleBase::Vector3<int> R_vector(R_index2[0] - R_index1[0],`
`283`		`- R_index2[1] - R_index1[1],`
`284`		`- R_index2[2] - R_index1[2]);`
	`283`	`+ R_index2[1] - R_index1[1],`
	`284`	`+ R_index2[2] - R_index1[2]);`
`285`	`285`	`std::complex<double>* tmp_c[3] = {nullptr, nullptr, nullptr};`
`286`	`286`	`for (int i = 0; i < 3; i++)`
`287`	`287`	`{`
`288`		`- tmp_c[i] = this->current_term[i]->find_matrix(iat1, iat2, R_vector[0], R_vector[1], R_vector[2])->get_pointer();`
	`288`	`+ hamilt::BaseMatrix<std::complex<double>>* matrix_ptr`
	`289`	`+ = this->current_term[i]->find_matrix(iat1, iat2, R_vector[0], R_vector[1], R_vector[2]);`
	`290`	`+ if (matrix_ptr != nullptr)`
	`291`	`+ {`
	`292`	`+ tmp_c[i] = matrix_ptr->get_pointer();`
	`293`	`+ }`
`289`	`294`	`}`
`290`	`295`	`// if not found , skip this pair of atoms`
`291`	`296`	`if (tmp_c[0] != nullptr)`
`292`	`297`	`{`
`293`		`- this->cal_vcomm_r_IJR(iat1,`
`294`		`- iat2,`
`295`		`- T0,`
`296`		`- paraV,`
`297`		`- nlm_tot[ad1],`
`298`		`- nlm_tot[ad2],`
`299`		`- tmp_c);`
	`298`	`+ this->cal_vcomm_r_IJR(iat1, iat2, T0, paraV, nlm_tot[ad1], nlm_tot[ad2], tmp_c);`
`300`	`299`	`}`
`301`	`300`	`}`
`302`	`301`	`}`
`@@ -368,8 +367,8 @@ void TD_current::cal_vcomm_r_IJR(`
`368`	`367`	`//<psi\|rexp(-iAr)\|beta><beta\|exp(iAr)\|psi>-<psi\|exp(-iAr)\|beta><beta\|rexp(iAr)\|psi>`
`369`	`368`	`// multiply d in the end`
`370`	`369`	`nlm_r_tmp += (nlm1[dir + 1]->at(p1) * std::conj(nlm2[0]->at(p2))`
`371`		`- - nlm1[0]->at(p1) * std::conj(nlm2[dir + 1]->at(p2)))`
`372`		`- * (*tmp_d);`
	`370`	`+ - nlm1[0]->at(p1) * std::conj(nlm2[dir + 1]->at(p2)))`
	`371`	`+ * (*tmp_d);`
`373`	`372`	`}`
`374`	`373`	`// -i[r,Vnl], 2.0 due to the unit transformation`
`375`	`374`	`current_mat_p[dir][step_trace[is]] -= imag_unit * nlm_r_tmp / 2.0;`
`@@ -390,7 +389,7 @@ void TD_current::cal_vcomm_r_IJR(`
`390`	`389`	`void TD_current::calculate_grad_term()`
`391`	`390`	`{`
`392`	`391`	`ModuleBase::TITLE("TD_current", "calculate_grad_term");`
`393`		`- if(this->current_term[0]==nullptr \|\| this->current_term[0]->size_atom_pairs()<=0)`
	`392`	`+ if (this->current_term[0] == nullptr \|\| this->current_term[0]->size_atom_pairs() <= 0)`
`394`	`393`	`{`
`395`	`394`	`ModuleBase::WARNING_QUIT("TD_current::calculate_grad_term", "grad_term is nullptr or empty");`
`396`	`395`	`}`
`@@ -417,7 +416,12 @@ void TD_current::calculate_grad_term()`
`417`	`416`	`std::complex<double>* tmp_c[3] = {nullptr, nullptr, nullptr};`
`418`	`417`	`for (int i = 0; i < 3; i++)`
`419`	`418`	`{`
`420`		`- tmp_c[i] = this->current_term[i]->find_matrix(iat1, iat2, R_index2)->get_pointer();`
	`419`	`+ hamilt::BaseMatrix<std::complex<double>>* matrix_ptr`
	`420`	`+ = this->current_term[i]->find_matrix(iat1, iat2, R_index2);`
	`421`	`+ if (matrix_ptr != nullptr)`
	`422`	`+ {`
	`423`	`+ tmp_c[i] = matrix_ptr->get_pointer();`
	`424`	`+ }`
`421`	`425`	`}`
`422`	`426`	`if (tmp_c[0] != nullptr)`
`423`	`427`	`{`
`@@ -473,7 +477,7 @@ void TD_current::cal_grad_IJR(const int& iat1,`
`473`	`477`	`auto row_indexes = paraV->get_indexes_row(iat1);`
`474`	`478`	`auto col_indexes = paraV->get_indexes_col(iat2);`
`475`	`479`	`const int step_trace = col_indexes.size() + 1;`
`476`		`- for(int iw1l = 0; iw1l < row_indexes.size(); iw1l += npol)`
	`480`	`+ for (int iw1l = 0; iw1l < row_indexes.size(); iw1l += npol)`
`477`	`481`	`{`
`478`	`482`	`const int iw1 = row_indexes[iw1l] / npol;`
`479`	`483`	`const int L1 = iw2l1[iw1];`
`@@ -509,7 +513,7 @@ void TD_current::cal_grad_IJR(const int& iat1,`
`509`	`513`	`for (int dir = 0; dir < 3; dir++)`
`510`	`514`	`{`
`511`	`515`	`current_mat_p[dir] += (npol - 1) * col_indexes.size();`
`512`		`- }`
	`516`	`+ }`
`513`	`517`	`}`
`514`	`518`	`}`
`515`	`519`