fix parallel bug and Simplify the computational code

zgn-26714 · zgn-26714 · commit a9cc5c6b6d0e · 2025-08-31T16:09:53.000+08:00
diff --git a/source/source_lcao/module_gint/temp_gint/gint_common.cpp b/source/source_lcao/module_gint/temp_gint/gint_common.cpp
@@ -70,17 +70,18 @@ void transfer_hr_gint_to_hR(const HContainer<T>& hr_gint, HContainer<T>& hR)
 }
 
 //hRgint_tmp to hR
-void transfer_hr_gint_to_hR_nspin4(std::vector<HContainer<double>>& hRGint_tmp, 
+void merge_hR_n4(std::vector<HContainer<double>>& hRGint_tmp, 
                             HContainer<std::complex<double>>& hR,
                             const GintInfo& gint_info)
 {
-    ModuleBase::TITLE("Gint", "transfer_hr_gint_to_hR_nspin4");
-    ModuleBase::timer::tick("Gint", "transfer_hr_gint_to_hR_nspin4");
+    ModuleBase::TITLE("Gint", "merge_hR_n4");
+    ModuleBase::timer::tick("Gint", "merge_hR_n4");
 #ifdef __MPI
     int mg = hR.get_paraV()->get_global_row_size()/2;
     int ng = hR.get_paraV()->get_global_col_size()/2;
     int nb = hR.get_paraV()->get_block_size()/2;
     int blacs_ctxt = hR.get_paraV()->blacs_ctxt;
+    
     const UnitCell* ucell = gint_info.get_ucell();
     int *iat2iwt = new int[ucell->nat];
     for (int iat = 0; iat < ucell->nat; iat++) {
@@ -91,91 +92,49 @@ void transfer_hr_gint_to_hR_nspin4(std::vector<HContainer<double>>& hRGint_tmp,
     pv->set_atomic_trace(iat2iwt, ucell->nat, mg);
     auto ijr_info = hR.get_ijr_info();
 
-    hamilt::HContainer<double>* hR_tmp = new hamilt::HContainer<double>(pv, nullptr, &ijr_info);
+    auto* hR_tmp = new hamilt::HContainer<std::complex<double>>(pv, nullptr, &ijr_info);
+
+    std::vector<int> first = {0, 1, 1, 0};
+    std::vector<int> second= {3, 2, 2, 3};
+    std::vector<int> row_set = {0, 0, 1, 1};
+    std::vector<int> col_set = {0, 1, 0, 1};
+    std::vector<int> clx_i = {1, 0, 0, -1};
+    std::vector<int> clx_j = {0, 1, -1, 0};
     for (int is = 0; is < 4; is++){
-        hR_tmp->set_zero();
-        //std::cout<<"is: "<<is<<std::endl;
-        hamilt::transferSerials2Parallels( hRGint_tmp[is], hR_tmp);
-        for (int iap = 0; iap < hR.size_atom_pairs(); iap++)
+        hamilt::HContainer<std::complex<double>>* hRGint_tmpCd = new hamilt::HContainer<std::complex<double>>(ucell->nat);
+        ijr_info = hRGint_tmp[0].get_ijr_info();
+        hRGint_tmpCd->insert_ijrs(&ijr_info, *(ucell));
+        hRGint_tmpCd->allocate(nullptr, true);
+        hRGint_tmpCd->set_zero();
+        for (int iap = 0; iap < hRGint_tmpCd->size_atom_pairs(); iap++)
         {
             //std::cout<<"iap: "<<iap<<std::endl;
-            auto* ap = &hR.get_atom_pair(iap);
+            auto* ap = &hRGint_tmpCd->get_atom_pair(iap);
             const int iat1 = ap->get_atom_i();
             const int iat2 = ap->get_atom_j();
-            const hamilt::AtomPair<double>* ap_nspin = nullptr;
             if (iat1 <= iat2)
             {
                 hamilt::AtomPair<std::complex<double>>* upper_ap = ap;
-                hamilt::AtomPair<std::complex<double>>* lower_ap = hR.find_pair(iat2, iat1);
-                switch (is)
-                {
-                case 0:
-                    ap_nspin = hR_tmp->find_pair(iat1, iat2);
-                    break;
-                case 3:
-                    ap_nspin = hR_tmp->find_pair(iat1, iat2);
-                    break;
-                }
-                if(ap_nspin == nullptr) break;
+                hamilt::AtomPair<std::complex<double>>* lower_ap = hRGint_tmpCd->find_pair(iat2, iat1);
+                const hamilt::AtomPair<double>* ap_nspin1 = hRGint_tmp[first[is]].find_pair(iat1, iat2);
+                const hamilt::AtomPair<double>* ap_nspin2 = hRGint_tmp[second[is]].find_pair(iat1, iat2);
                 for (int ir = 0; ir < upper_ap->get_R_size(); ir++)
                 {   
                     const auto R_index = upper_ap->get_R_index(ir);
                     auto upper_mat = upper_ap->find_matrix(R_index);
-                    auto mat_nspin = ap_nspin->find_matrix(R_index);
-
+                    auto mat_nspin1 = ap_nspin1->find_matrix(R_index);
+                    auto mat_nspin2 = ap_nspin2->find_matrix(R_index);
                     // The row size and the col size of upper_matrix is double that of matrix_nspin_0
-                    for (int irow = 0; irow < mat_nspin->get_row_size(); ++irow)
+                    for (int irow = 0; irow < mat_nspin1->get_row_size(); ++irow)
                     {
-                        for (int icol = 0; icol < mat_nspin->get_col_size(); ++icol)
+                        for (int icol = 0; icol < mat_nspin1->get_col_size(); ++icol)
                         {
-                            switch (is)
-                            {
-                            case 0:
-                                upper_mat->get_value(2*irow, 2*icol) = mat_nspin->get_value(irow, icol);
-                                upper_mat->get_value(2*irow+1, 2*icol+1) = mat_nspin->get_value(irow, icol);
-                                break;
-                            case 3:
-                                upper_mat->get_value(2*irow, 2*icol) += mat_nspin->get_value(irow, icol);
-                                upper_mat->get_value(2*irow+1, 2*icol+1) -= mat_nspin->get_value(irow, icol);
-                                break;
-                            }
-                        }
-                    }
-
-                    if (PARAM.globalv.domag)
-                    {
-                        const hamilt::AtomPair<double>* ap_nspin = nullptr;
-                        switch (is)
-                        {
-                        case 1:
-                            ap_nspin = hR_tmp->find_pair(iat1, iat2);
-                            break;
-                        case 2:
-                            ap_nspin = hR_tmp->find_pair(iat1, iat2);
-                            break;
-                        }
-                        const auto mat_nspin = ap_nspin->find_matrix(R_index);
-                        for (int irow = 0; irow < mat_nspin->get_row_size(); ++irow)
-                        {
-                            for (int icol = 0; icol < mat_nspin->get_col_size(); ++icol)
-                            {
-                                switch(is)
-                                {
-                                    case 1:
-                                        upper_mat->get_value(2*irow, 2*icol+1) = mat_nspin->get_value(irow, icol);
-                                        upper_mat->get_value(2*irow+1, 2*icol) = mat_nspin->get_value(irow, icol);
-                                        break;
-                                    case 2:
-                                        upper_mat->get_value(2*irow, 2*icol+1) += std::complex<double>(0.0, 1.0) * mat_nspin->get_value(irow, icol);
-                                        upper_mat->get_value(2*irow+1, 2*icol) -= std::complex<double>(0.0, 1.0) * mat_nspin->get_value(irow, icol);
-                                        break;
-                                }
-                             }
+                            upper_mat->get_value(irow, icol) = mat_nspin1->get_value(irow, icol) 
+                            + std::complex<double>(clx_i[is], clx_j[is]) * mat_nspin2->get_value(irow, icol);
                         }
                     }
-                    
-                    // fill the lower triangle matrix
-                    if(is == 3){
+                    //fill the lower triangle matrix
+                    if (PARAM.globalv.domag){
                         if (iat1 < iat2)
                         {
                             auto lower_mat = lower_ap->find_matrix(-R_index);
@@ -191,15 +150,41 @@ void transfer_hr_gint_to_hR_nspin4(std::vector<HContainer<double>>& hRGint_tmp,
                 }
             }
         }
-        
+
+        hR_tmp->set_zero();
+        hamilt::transferSerials2Parallels( *hRGint_tmpCd, hR_tmp);
+        for (int iap = 0; iap < hR.size_atom_pairs(); iap++)
+        {
+            auto* ap = &hR.get_atom_pair(iap);
+            const int iat1 = ap->get_atom_i();
+            const int iat2 = ap->get_atom_j();
+            auto* ap_nspin = hR_tmp ->find_pair(iat1, iat2);
+            for (int ir = 0; ir < ap->get_R_size(); ir++)
+            {   
+                const auto R_index = ap->get_R_index(ir);
+                auto upper_mat = ap->find_matrix(R_index);
+                auto mat_nspin = ap_nspin->find_matrix(R_index);
+
+                // The row size and the col size of upper_matrix is double that of matrix_nspin_0
+                for (int irow = 0; irow < mat_nspin->get_row_size(); ++irow)
+                {
+                    for (int icol = 0; icol < mat_nspin->get_col_size(); ++icol)
+                    {
+                        upper_mat->get_value(2*irow+row_set[is], 2*icol+col_set[is]) = 
+                        mat_nspin->get_value(irow, icol);
+                    }
+                }
+            }
+        }
+        delete hRGint_tmpCd;
     }
     delete[] iat2iwt;
-    delete pv;
-    delete hR_tmp;
 #else
 
 #endif
-    ModuleBase::timer::tick("Gint", "transfer_hr_gint_to_hR_nspin4");
+
+    
+    ModuleBase::timer::tick("Gint", "merge_hR_n4");
     return;
 }
 
@@ -231,6 +216,9 @@ void transfer_dm_2d_to_gint(
     } else  // NSPIN=4 case
     {
 #ifdef __MPI
+        // is=0:↑↑, 1:↑↓, 2:↓↑, 3:↓↓
+        const int row_set[4] = {0, 0, 1, 1};
+        const int col_set[4] = {0, 1, 0, 1};
         int mg = dm[0]->get_paraV()->get_global_row_size()/2;
         int ng = dm[0]->get_paraV()->get_global_col_size()/2;
         int nb = dm[0]->get_paraV()->get_block_size()/2;
@@ -246,43 +234,20 @@ void transfer_dm_2d_to_gint(
         auto ijr_info = dm[0]->get_ijr_info();
         HContainer<T>* DM2D_tmp = new hamilt::HContainer<T>(pv, nullptr, &ijr_info);
         //ModuleBase::Memory::record("Gint::DM2D_tmp", this->DM2D_tmp->get_memory_size());
-         for (int is = 0; is < 4; is++){
+        for (int is = 0; is < 4; is++){
             for (int iap = 0; iap < dm[0]->size_atom_pairs(); ++iap) {
                 auto& ap = dm[0]->get_atom_pair(iap);
                 int iat1 = ap.get_atom_i();
                 int iat2 = ap.get_atom_j();
                 for (int ir = 0; ir < ap.get_R_size(); ++ir) {
                     const ModuleBase::Vector3<int> r_index = ap.get_R_index(ir);
-                    T* tmp_pointer = DM2D_tmp -> find_matrix(iat1, iat2, r_index)->get_pointer();
-                    T* data_full = ap.get_pointer(ir);
-                    for (int irow = 0; irow < ap.get_row_size(); irow += 2) {
-                        switch (is) {//todo: It can be written more compactly
-                            case 0:
-                                for (int icol = 0; icol < ap.get_col_size(); icol += 2) {
-                                    *(tmp_pointer)++ = data_full[icol];
-                                }
-                                data_full += ap.get_col_size() * 2;
-                                break;
-                            case 1:
-                                for (int icol = 0; icol < ap.get_col_size(); icol += 2) {
-                                    *(tmp_pointer)++ = data_full[icol + 1];
-                                }
-                                data_full += ap.get_col_size() * 2;
-                                break;
-                            case 2:
-                                data_full += ap.get_col_size();
-                                for (int icol = 0; icol < ap.get_col_size(); icol += 2) {
-                                    *(tmp_pointer)++ = data_full[icol];
-                                }
-                                data_full += ap.get_col_size();
-                                break;
-                            case 3:
-                                data_full += ap.get_col_size();
-                                for (int icol = 0; icol < ap.get_col_size(); icol += 2) {
-                                    *(tmp_pointer)++ = data_full[icol + 1];
-                                }
-                                data_full += ap.get_col_size();
-                                break;           
+                    T* matrix_out = DM2D_tmp -> find_matrix(iat1, iat2, r_index)->get_pointer();
+                    T* matrix_in = ap.get_pointer(ir);
+                    for (int irow = 0; irow < ap.get_row_size()/2; irow ++) {
+                        for (int icol = 0; icol < ap.get_col_size()/2; icol ++) {
+                            int index_i = irow* ap.get_col_size()/2 + icol;
+                            int index_j = (irow*2+row_set[is]) * ap.get_col_size() + icol*2+col_set[is];
+                            matrix_out[index_i] = matrix_in[index_j];
                         }
                     }
                 }
diff --git a/source/source_lcao/module_gint/temp_gint/gint_common.h b/source/source_lcao/module_gint/temp_gint/gint_common.h
@@ -11,7 +11,7 @@ namespace ModuleGint
     template <typename T>
     void transfer_hr_gint_to_hR(const HContainer<T>& hr_gint, HContainer<T>& hR);
     // for nspin=4 case
-    void transfer_hr_gint_to_hR_nspin4(std::vector<HContainer<double>>& hRGint_tmp, 
+    void merge_hR_n4(std::vector<HContainer<double>>& hRGint_tmp, 
                             HContainer<std::complex<double>>& hR,
                             const GintInfo& gint_info);
 
diff --git a/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4.cpp b/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4.cpp
@@ -14,7 +14,7 @@ void Gint_vl_metagga_nspin4::cal_gint()
     ModuleBase::timer::tick("Gint", "cal_gint_vl");
     init_hr_gint_();
     cal_hr_gint_();
-    transfer_hr_gint_to_hR_nspin4(hr_gint_part_, *hR_, *gint_info_);
+    merge_hR_n4(hr_gint_part_, *hR_, *gint_info_);
     ModuleBase::timer::tick("Gint", "cal_gint_vl");
 }
 
diff --git a/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4_gpu.cpp b/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4_gpu.cpp
@@ -13,7 +13,7 @@ void Gint_vl_metagga_nspin4_gpu::cal_gint()
     ModuleBase::timer::tick("Gint", "cal_gint_vl");
     init_hr_gint_();
     cal_hr_gint_();
-    transfer_hr_gint_to_hR_nspin4(hr_gint_part_, *hR_, *gint_info_);
+    merge_hR_n4(hr_gint_part_, *hR_, *gint_info_);
     ModuleBase::timer::tick("Gint", "cal_gint_vl");
 }
 
diff --git a/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4.cpp b/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4.cpp
@@ -13,7 +13,7 @@ void Gint_vl_nspin4::cal_gint()
     ModuleBase::timer::tick("Gint", "cal_gint_vl");
     init_hr_gint_();
     cal_hr_gint_();
-    transfer_hr_gint_to_hR_nspin4(hr_gint_part_, *hR_, *gint_info_);
+    merge_hR_n4(hr_gint_part_, *hR_, *gint_info_);
     ModuleBase::timer::tick("Gint", "cal_gint_vl");
 }
 
diff --git a/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4_gpu.cpp b/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4_gpu.cpp
@@ -13,7 +13,7 @@ void Gint_vl_nspin4_gpu::cal_gint()
     ModuleBase::timer::tick("Gint", "cal_gint_vl");
     init_hr_gint_();
     cal_hr_gint_();
-    transfer_hr_gint_to_hR_nspin4(hr_gint_part_, *hR_, *gint_info_);
+    merge_hR_n4(hr_gint_part_, *hR_, *gint_info_);
     ModuleBase::timer::tick("Gint", "cal_gint_vl");
 }
 

Original file line number	Diff line number	Diff line change
`@@ -14,7 +14,7 @@ void Gint_vl_metagga_nspin4::cal_gint()`
`14`	`14`	`ModuleBase::timer::tick("Gint", "cal_gint_vl");`
`15`	`15`	`init_hr_gint_();`
`16`	`16`	`cal_hr_gint_();`
`17`		`- transfer_hr_gint_to_hR_nspin4(hr_gint_part_, hR_, gint_info_);`
	`17`	`+ merge_hR_n4(hr_gint_part_, hR_, gint_info_);`
`18`	`18`	`ModuleBase::timer::tick("Gint", "cal_gint_vl");`
`19`	`19`	`}`
`20`	`20`
Original file line number	Diff line number	Diff line change
`@@ -13,7 +13,7 @@ void Gint_vl_metagga_nspin4_gpu::cal_gint()`
`13`	`13`	`ModuleBase::timer::tick("Gint", "cal_gint_vl");`
`14`	`14`	`init_hr_gint_();`
`15`	`15`	`cal_hr_gint_();`
`16`		`- transfer_hr_gint_to_hR_nspin4(hr_gint_part_, hR_, gint_info_);`
	`16`	`+ merge_hR_n4(hr_gint_part_, hR_, gint_info_);`
`17`	`17`	`ModuleBase::timer::tick("Gint", "cal_gint_vl");`
`18`	`18`	`}`
`19`	`19`
Original file line number	Diff line number	Diff line change
`@@ -13,7 +13,7 @@ void Gint_vl_nspin4::cal_gint()`
`13`	`13`	`ModuleBase::timer::tick("Gint", "cal_gint_vl");`
`14`	`14`	`init_hr_gint_();`
`15`	`15`	`cal_hr_gint_();`
`16`		`- transfer_hr_gint_to_hR_nspin4(hr_gint_part_, hR_, gint_info_);`
	`16`	`+ merge_hR_n4(hr_gint_part_, hR_, gint_info_);`
`17`	`17`	`ModuleBase::timer::tick("Gint", "cal_gint_vl");`
`18`	`18`	`}`
`19`	`19`
Original file line number	Diff line number	Diff line change
`@@ -13,7 +13,7 @@ void Gint_vl_nspin4_gpu::cal_gint()`
`13`	`13`	`ModuleBase::timer::tick("Gint", "cal_gint_vl");`
`14`	`14`	`init_hr_gint_();`
`15`	`15`	`cal_hr_gint_();`
`16`		`- transfer_hr_gint_to_hR_nspin4(hr_gint_part_, hR_, gint_info_);`
	`16`	`+ merge_hR_n4(hr_gint_part_, hR_, gint_info_);`
`17`	`17`	`ModuleBase::timer::tick("Gint", "cal_gint_vl");`
`18`	`18`	`}`
`19`	`19`