From 7d4fe5aa9f48b57af742e06c74132593719457ec Mon Sep 17 00:00:00 2001 From: hn <3022939753@qq.com> Date: Thu, 21 Aug 2025 21:13:49 +0800 Subject: [PATCH 1/3] delete tem Hcontainer to reduce memory usage --- source/source_lcao/module_gint/gint.h | 2 +- source/source_lcao/module_gint/gint_old.cpp | 100 +++++++++++------- .../module_gint/temp_gint/gint_common.cpp | 91 ++++++++++------ .../module_gint/temp_gint/gint_info.h | 1 + .../source_lcao/module_lr/utils/gint_move.hpp | 4 +- 5 files changed, 121 insertions(+), 77 deletions(-) diff --git a/source/source_lcao/module_gint/gint.h b/source/source_lcao/module_gint/gint.h index 6ca6f53eab..979d5051e2 100644 --- a/source/source_lcao/module_gint/gint.h +++ b/source/source_lcao/module_gint/gint.h @@ -265,7 +265,7 @@ class Gint { std::vector*> DMRGint; //! tmp tools used in transfer_DM2DtoGrid - hamilt::HContainer* DMRGint_full = nullptr; + hamilt::HContainer* DM2D_tmp = nullptr; std::vector> pvdpRx_reduced; std::vector> pvdpRy_reduced; diff --git a/source/source_lcao/module_gint/gint_old.cpp b/source/source_lcao/module_gint/gint_old.cpp index caaf2f92c7..07f24a25ea 100644 --- a/source/source_lcao/module_gint/gint_old.cpp +++ b/source/source_lcao/module_gint/gint_old.cpp @@ -33,7 +33,7 @@ Gint::~Gint() { delete this->hRGint_tmp[is]; } #ifdef __MPI - delete this->DMRGint_full; + delete this->DM2D_tmp; #endif } @@ -171,10 +171,9 @@ void Gint::initialize_pvpR(const UnitCell& ucell_in, const Grid_Driver* gd, cons this->hRGint_tmp[is] = new hamilt::HContainer(ucell_in.nat); } #ifdef __MPI - if (this->DMRGint_full != nullptr) { - delete this->DMRGint_full; + if (this->DM2D_tmp != nullptr) { + delete this->DM2D_tmp; } - this->DMRGint_full = new hamilt::HContainer(ucell_in.nat); #endif } @@ -210,12 +209,6 @@ void Gint::initialize_pvpR(const UnitCell& ucell_in, const Grid_Driver* gd, cons ModuleBase::Memory::record("Gint::DMRGint", this->DMRGint[0]->get_memory_size() * this->DMRGint.size()*nspin); -#ifdef __MPI - this->DMRGint_full->insert_ijrs(this->gridt->get_ijr_info(), ucell_in, npol); - this->DMRGint_full->allocate(nullptr, true); - ModuleBase::Memory::record("Gint::DMRGint_full", - this->DMRGint_full->get_memory_size()); -#endif } } @@ -231,9 +224,7 @@ void Gint::reset_DMRGint(const int& nspin) { for (auto& d : this->DMRGint) { d->allocate(nullptr, false); } #ifdef __MPI - delete this->DMRGint_full; - this->DMRGint_full = new hamilt::HContainer(*this->hRGint); - this->DMRGint_full->allocate(nullptr, false); + delete this->DM2D_tmp; #endif } } @@ -262,37 +253,66 @@ void Gint::transfer_DM2DtoGrid(std::vector*> DM2D) { } else // NSPIN=4 case { #ifdef __MPI - hamilt::transferParallels2Serials(*DM2D[0], this->DMRGint_full); -#else - this->DMRGint_full = DM2D[0]; -#endif - std::vector tmp_pointer(4, nullptr); - for (int iap = 0; iap < this->DMRGint_full->size_atom_pairs(); ++iap) { - auto& ap = this->DMRGint_full->get_atom_pair(iap); - int iat1 = ap.get_atom_i(); - int iat2 = ap.get_atom_j(); - for (int ir = 0; ir < ap.get_R_size(); ++ir) { - const ModuleBase::Vector3 r_index = ap.get_R_index(ir); - for (int is = 0; is < 4; is++) { - tmp_pointer[is] = this->DMRGint[is] - ->find_matrix(iat1, iat2, r_index) - ->get_pointer(); - } - double* data_full = ap.get_pointer(ir); - for (int irow = 0; irow < ap.get_row_size(); irow += 2) { - for (int icol = 0; icol < ap.get_col_size(); icol += 2) { - *(tmp_pointer[0])++ = data_full[icol]; - *(tmp_pointer[1])++ = data_full[icol + 1]; - } - data_full += ap.get_col_size(); - for (int icol = 0; icol < ap.get_col_size(); icol += 2) { - *(tmp_pointer[2])++ = data_full[icol]; - *(tmp_pointer[3])++ = data_full[icol + 1]; + int mg = DM2D[0]->get_paraV()->get_global_row_size()/2; + int ng = DM2D[0]->get_paraV()->get_global_col_size()/2; + int nb = DM2D[0]->get_paraV()->get_block_size()/2; + int blacs_ctxt = DM2D[0]->get_paraV()->blacs_ctxt; + int *iat2iwt = new int[ucell->nat]; + for (int iat = 0; iat < ucell->nat; iat++) { + iat2iwt[iat] = ucell->get_iat2iwt()[iat]/2; + } + Parallel_Orbitals *pv = new Parallel_Orbitals(); + pv->set(mg, ng, nb, blacs_ctxt); + pv->set_atomic_trace(iat2iwt, ucell->nat, mg); + auto ijr_info = DM2D[0]->get_ijr_info(); + this-> DM2D_tmp = new hamilt::HContainer(pv, nullptr, &ijr_info); + ModuleBase::Memory::record("Gint::DM2D_tmp", this->DM2D_tmp->get_memory_size()); + for (int is = 0; is < 4; is++){ + for (int iap = 0; iap < DM2D[0]->size_atom_pairs(); ++iap) { + auto& ap = DM2D[0]->get_atom_pair(iap); + int iat1 = ap.get_atom_i(); + int iat2 = ap.get_atom_j(); + for (int ir = 0; ir < ap.get_R_size(); ++ir) { + const ModuleBase::Vector3 r_index = ap.get_R_index(ir); + double* tmp_pointer = this -> DM2D_tmp -> find_matrix(iat1, iat2, r_index)->get_pointer(); + double* data_full = ap.get_pointer(ir); + for (int irow = 0; irow < ap.get_row_size(); irow += 2) { + switch (is) {//todo: It can be written more compactly + case 0: + for (int icol = 0; icol < ap.get_col_size(); icol += 2) { + *(tmp_pointer)++ = data_full[icol]; + } + data_full += ap.get_col_size() * 2; + break; + case 1: + for (int icol = 0; icol < ap.get_col_size(); icol += 2) { + *(tmp_pointer)++ = data_full[icol + 1]; + } + data_full += ap.get_col_size() * 2; + break; + case 2: + data_full += ap.get_col_size(); + for (int icol = 0; icol < ap.get_col_size(); icol += 2) { + *(tmp_pointer)++ = data_full[icol]; + } + data_full += ap.get_col_size(); + break; + case 3: + data_full += ap.get_col_size(); + for (int icol = 0; icol < ap.get_col_size(); icol += 2) { + *(tmp_pointer)++ = data_full[icol + 1]; + } + data_full += ap.get_col_size(); + break; + } } - data_full += ap.get_col_size(); } } + hamilt::transferParallels2Serials( *(this->DM2D_tmp), this->DMRGint[is]); } +#else + //this->DMRGint_full = DM2D[0]; +#endif } ModuleBase::timer::tick("Gint", "transfer_DMR"); } \ No newline at end of file diff --git a/source/source_lcao/module_gint/temp_gint/gint_common.cpp b/source/source_lcao/module_gint/temp_gint/gint_common.cpp index fc3248ad93..4e09479c26 100644 --- a/source/source_lcao/module_gint/temp_gint/gint_common.cpp +++ b/source/source_lcao/module_gint/temp_gint/gint_common.cpp @@ -163,44 +163,67 @@ void transfer_dm_2d_to_gint( } else // NSPIN=4 case { #ifdef __MPI - const int npol = 2; - HContainer dm_full = gint_info.get_hr(npol); - hamilt::transferParallels2Serials(*dm[0], &dm_full); -#else - HContainer& dm_full = *(dm[0]); -#endif - std::vector tmp_pointer(4, nullptr); - for (int iap = 0; iap < dm_full.size_atom_pairs(); iap++) - { - auto& ap = dm_full.get_atom_pair(iap); - const int iat1 = ap.get_atom_i(); - const int iat2 = ap.get_atom_j(); - for (int ir = 0; ir < ap.get_R_size(); ir++) - { - const ModuleBase::Vector3 r_index = ap.get_R_index(ir); - for (int is = 0; is < 4; is++) - { - tmp_pointer[is] = - dm_gint[is].find_matrix(iat1, iat2, r_index)->get_pointer(); - } - T* data_full = ap.get_pointer(ir); - for (int irow = 0; irow < ap.get_row_size(); irow += 2) - { - for (int icol = 0; icol < ap.get_col_size(); icol += 2) - { - *(tmp_pointer[0])++ = data_full[icol]; - *(tmp_pointer[1])++ = data_full[icol + 1]; - } - data_full += ap.get_col_size(); - for (int icol = 0; icol < ap.get_col_size(); icol += 2) - { - *(tmp_pointer[2])++ = data_full[icol]; - *(tmp_pointer[3])++ = data_full[icol + 1]; + int mg = dm[0]->get_paraV()->get_global_row_size()/2; + int ng = dm[0]->get_paraV()->get_global_col_size()/2; + int nb = dm[0]->get_paraV()->get_block_size()/2; + int blacs_ctxt = dm[0]->get_paraV()->blacs_ctxt; + const UnitCell* ucell = gint_info.get_ucell(); + int *iat2iwt = new int[ucell->nat]; + for (int iat = 0; iat < ucell->nat; iat++) { + iat2iwt[iat] = ucell->get_iat2iwt()[iat]/2; + } + Parallel_Orbitals *pv = new Parallel_Orbitals(); + pv->set(mg, ng, nb, blacs_ctxt); + pv->set_atomic_trace(iat2iwt, ucell->nat, mg); + auto ijr_info = dm[0]->get_ijr_info(); + HContainer* DM2D_tmp = new hamilt::HContainer(pv, nullptr, &ijr_info); + //ModuleBase::Memory::record("Gint::DM2D_tmp", this->DM2D_tmp->get_memory_size()); + for (int is = 0; is < 4; is++){ + for (int iap = 0; iap < dm[0]->size_atom_pairs(); ++iap) { + auto& ap = dm[0]->get_atom_pair(iap); + int iat1 = ap.get_atom_i(); + int iat2 = ap.get_atom_j(); + for (int ir = 0; ir < ap.get_R_size(); ++ir) { + const ModuleBase::Vector3 r_index = ap.get_R_index(ir); + T* tmp_pointer = DM2D_tmp -> find_matrix(iat1, iat2, r_index)->get_pointer(); + T* data_full = ap.get_pointer(ir); + for (int irow = 0; irow < ap.get_row_size(); irow += 2) { + switch (is) {//todo: It can be written more compactly + case 0: + for (int icol = 0; icol < ap.get_col_size(); icol += 2) { + *(tmp_pointer)++ = data_full[icol]; + } + data_full += ap.get_col_size() * 2; + break; + case 1: + for (int icol = 0; icol < ap.get_col_size(); icol += 2) { + *(tmp_pointer)++ = data_full[icol + 1]; + } + data_full += ap.get_col_size() * 2; + break; + case 2: + data_full += ap.get_col_size(); + for (int icol = 0; icol < ap.get_col_size(); icol += 2) { + *(tmp_pointer)++ = data_full[icol]; + } + data_full += ap.get_col_size(); + break; + case 3: + data_full += ap.get_col_size(); + for (int icol = 0; icol < ap.get_col_size(); icol += 2) { + *(tmp_pointer)++ = data_full[icol + 1]; + } + data_full += ap.get_col_size(); + break; + } } - data_full += ap.get_col_size(); } } + hamilt::transferParallels2Serials( *DM2D_tmp, &dm_gint[is]); } +#else + //HContainer& dm_full = *(dm[0]); +#endif } ModuleBase::timer::tick("Gint", "transfer_dm_2d_to_gint"); } diff --git a/source/source_lcao/module_gint/temp_gint/gint_info.h b/source/source_lcao/module_gint/temp_gint/gint_info.h index 7cfe476d25..356a62127e 100644 --- a/source/source_lcao/module_gint/temp_gint/gint_info.h +++ b/source/source_lcao/module_gint/temp_gint/gint_info.h @@ -38,6 +38,7 @@ class GintInfo const std::vector& get_trace_lo() const{ return trace_lo_; } int get_lgd() const { return lgd_; } int get_nat() const { return ucell_->nat; } // return the number of atoms in the unitcell + const UnitCell* get_ucell() const { return ucell_; } int get_local_mgrid_num() const { return localcell_info_->get_mgrids_num(); } double get_mgrid_volume() const { return meshgrid_info_->get_volume(); } diff --git a/source/source_lcao/module_lr/utils/gint_move.hpp b/source/source_lcao/module_lr/utils/gint_move.hpp index 0faa68f39d..cddfbd3fe2 100644 --- a/source/source_lcao/module_lr/utils/gint_move.hpp +++ b/source/source_lcao/module_lr/utils/gint_move.hpp @@ -60,8 +60,8 @@ Gint& Gint::operator=(Gint&& rhs) this->pvdpRz_reduced = std::move(rhs.pvdpRz_reduced); this->DMRGint = std::move(rhs.DMRGint); this->hRGint_tmp = std::move(rhs.hRGint_tmp); - this->DMRGint_full = rhs.DMRGint_full; - rhs.DMRGint_full = nullptr; + this->DM2D_tmp = rhs.DM2D_tmp; + rhs.DM2D_tmp = nullptr; return *this; } From b5a0f8f8b6ecadc9addc73e7dea183701f007394 Mon Sep 17 00:00:00 2001 From: hn <3022939753@qq.com> Date: Tue, 26 Aug 2025 16:31:44 +0800 Subject: [PATCH 2/3] delete hr_gint_full_ to reduce memory usage --- .../module_gint/temp_gint/gint_common.cpp | 198 ++++++++++++------ .../module_gint/temp_gint/gint_common.h | 8 +- .../temp_gint/gint_vl_metagga_nspin4.cpp | 5 +- .../temp_gint/gint_vl_metagga_nspin4.h | 1 - .../temp_gint/gint_vl_metagga_nspin4_gpu.cpp | 5 +- .../temp_gint/gint_vl_metagga_nspin4_gpu.h | 2 +- .../module_gint/temp_gint/gint_vl_nspin4.cpp | 5 +- .../module_gint/temp_gint/gint_vl_nspin4.h | 1 - .../temp_gint/gint_vl_nspin4_gpu.cpp | 5 +- .../temp_gint/gint_vl_nspin4_gpu.h | 1 - 10 files changed, 143 insertions(+), 88 deletions(-) diff --git a/source/source_lcao/module_gint/temp_gint/gint_common.cpp b/source/source_lcao/module_gint/temp_gint/gint_common.cpp index 4e09479c26..dc579e5696 100644 --- a/source/source_lcao/module_gint/temp_gint/gint_common.cpp +++ b/source/source_lcao/module_gint/temp_gint/gint_common.cpp @@ -47,95 +47,163 @@ void compose_hr_gint(HContainer& hr_gint) ModuleBase::timer::tick("Gint", "compose_hr_gint"); } -void compose_hr_gint(const std::vector>& hr_gint_part, - HContainer>& hr_gint_full) +template +void transfer_hr_gint_to_hR(const HContainer& hr_gint, HContainer& hR) { - ModuleBase::TITLE("Gint", "compose_hr_gint"); - ModuleBase::timer::tick("Gint", "compose_hr_gint"); - for (int iap = 0; iap < hr_gint_full.size_atom_pairs(); iap++) + ModuleBase::TITLE("Gint", "transfer_hr_gint_to_hR"); + ModuleBase::timer::tick("Gint", "transfer_hr_gint_to_hR"); +#ifdef __MPI + int size = 0; + MPI_Comm_size(MPI_COMM_WORLD, &size); + if (size == 1) + { + hR.add(hr_gint); + } + else { - auto* ap = &(hr_gint_full.get_atom_pair(iap)); - const int iat1 = ap->get_atom_i(); - const int iat2 = ap->get_atom_j(); - if (iat1 <= iat2) + hamilt::transferSerials2Parallels(hr_gint, &hR); + } +#else + hR.add(hr_gint); +#endif + ModuleBase::timer::tick("Gint", "transfer_hr_gint_to_hR"); +} + +//hRgint_tmp to hR +void transfer_hr_gint_to_hR_nspin4(std::vector>& hRGint_tmp, + HContainer>& hR, + const GintInfo& gint_info) +{ + ModuleBase::TITLE("Gint", "transfer_hr_gint_to_hR_nspin4"); + ModuleBase::timer::tick("Gint", "transfer_hr_gint_to_hR_nspin4"); +#ifdef __MPI + int mg = hR.get_paraV()->get_global_row_size()/2; + int ng = hR.get_paraV()->get_global_col_size()/2; + int nb = hR.get_paraV()->get_block_size()/2; + int blacs_ctxt = hR.get_paraV()->blacs_ctxt; + const UnitCell* ucell = gint_info.get_ucell(); + int *iat2iwt = new int[ucell->nat]; + for (int iat = 0; iat < ucell->nat; iat++) { + iat2iwt[iat] = ucell->get_iat2iwt()[iat]/2; + } + Parallel_Orbitals *pv = new Parallel_Orbitals(); + pv->set(mg, ng, nb, blacs_ctxt); + pv->set_atomic_trace(iat2iwt, ucell->nat, mg); + auto ijr_info = hR.get_ijr_info(); + + hamilt::HContainer* hR_tmp = new hamilt::HContainer(pv, nullptr, &ijr_info); + for (int is = 0; is < 4; is++){ + hR_tmp->set_zero(); + //std::cout<<"is: "<>* upper_ap = ap; - hamilt::AtomPair>* lower_ap = hr_gint_full.find_pair(iat2, iat1); - const hamilt::AtomPair* ap_nspin_0 = hr_gint_part[0].find_pair(iat1, iat2); - const hamilt::AtomPair* ap_nspin_3 = hr_gint_part[3].find_pair(iat1, iat2); - for (int ir = 0; ir < upper_ap->get_R_size(); ir++) + //std::cout<<"iap: "<get_atom_i(); + const int iat2 = ap->get_atom_j(); + const hamilt::AtomPair* ap_nspin = nullptr; + if (iat1 <= iat2) { - const auto R_index = upper_ap->get_R_index(ir); - auto upper_mat = upper_ap->find_matrix(R_index); - auto mat_nspin_0 = ap_nspin_0->find_matrix(R_index); - auto mat_nspin_3 = ap_nspin_3->find_matrix(R_index); - - // The row size and the col size of upper_matrix is double that of matrix_nspin_0 - for (int irow = 0; irow < mat_nspin_0->get_row_size(); ++irow) + hamilt::AtomPair>* upper_ap = ap; + hamilt::AtomPair>* lower_ap = hR.find_pair(iat2, iat1); + switch (is) { - for (int icol = 0; icol < mat_nspin_0->get_col_size(); ++icol) - { - upper_mat->get_value(2*irow, 2*icol) = mat_nspin_0->get_value(irow, icol) + mat_nspin_3->get_value(irow, icol); - upper_mat->get_value(2*irow+1, 2*icol+1) = mat_nspin_0->get_value(irow, icol) - mat_nspin_3->get_value(irow, icol); - } + case 0: + ap_nspin = hR_tmp->find_pair(iat1, iat2); + break; + case 3: + ap_nspin = hR_tmp->find_pair(iat1, iat2); + break; } + if(ap_nspin == nullptr) break; + for (int ir = 0; ir < upper_ap->get_R_size(); ir++) + { + const auto R_index = upper_ap->get_R_index(ir); + auto upper_mat = upper_ap->find_matrix(R_index); + auto mat_nspin = ap_nspin->find_matrix(R_index); - if (PARAM.globalv.domag) - { - const hamilt::AtomPair* ap_nspin_1 = hr_gint_part[1].find_pair(iat1, iat2); - const hamilt::AtomPair* ap_nspin_2 = hr_gint_part[2].find_pair(iat1, iat2); - const auto mat_nspin_1 = ap_nspin_1->find_matrix(R_index); - const auto mat_nspin_2 = ap_nspin_2->find_matrix(R_index); - for (int irow = 0; irow < mat_nspin_1->get_row_size(); ++irow) + // The row size and the col size of upper_matrix is double that of matrix_nspin_0 + for (int irow = 0; irow < mat_nspin->get_row_size(); ++irow) { - for (int icol = 0; icol < mat_nspin_1->get_col_size(); ++icol) + for (int icol = 0; icol < mat_nspin->get_col_size(); ++icol) { - upper_mat->get_value(2*irow, 2*icol+1) = mat_nspin_1->get_value(irow, icol) + std::complex(0.0, 1.0) * mat_nspin_2->get_value(irow, icol); - upper_mat->get_value(2*irow+1, 2*icol) = mat_nspin_1->get_value(irow, icol) - std::complex(0.0, 1.0) * mat_nspin_2->get_value(irow, icol); + switch (is) + { + case 0: + upper_mat->get_value(2*irow, 2*icol) = mat_nspin->get_value(irow, icol); + upper_mat->get_value(2*irow+1, 2*icol+1) = mat_nspin->get_value(irow, icol); + break; + case 3: + upper_mat->get_value(2*irow, 2*icol) += mat_nspin->get_value(irow, icol); + upper_mat->get_value(2*irow+1, 2*icol+1) -= mat_nspin->get_value(irow, icol); + break; + } } } - } - // fill the lower triangle matrix - if (iat1 < iat2) - { - auto lower_mat = lower_ap->find_matrix(-R_index); - for (int irow = 0; irow < upper_mat->get_row_size(); ++irow) + if (PARAM.globalv.domag) { - for (int icol = 0; icol < upper_mat->get_col_size(); ++icol) + const hamilt::AtomPair* ap_nspin = nullptr; + switch (is) { - lower_mat->get_value(icol, irow) = conj(upper_mat->get_value(irow, icol)); + case 1: + ap_nspin = hR_tmp->find_pair(iat1, iat2); + break; + case 2: + ap_nspin = hR_tmp->find_pair(iat1, iat2); + break; + } + const auto mat_nspin = ap_nspin->find_matrix(R_index); + for (int irow = 0; irow < mat_nspin->get_row_size(); ++irow) + { + for (int icol = 0; icol < mat_nspin->get_col_size(); ++icol) + { + switch(is) + { + case 1: + upper_mat->get_value(2*irow, 2*icol+1) = mat_nspin->get_value(irow, icol); + upper_mat->get_value(2*irow+1, 2*icol) = mat_nspin->get_value(irow, icol); + break; + case 2: + upper_mat->get_value(2*irow, 2*icol+1) += std::complex(0.0, 1.0) * mat_nspin->get_value(irow, icol); + upper_mat->get_value(2*irow+1, 2*icol) -= std::complex(0.0, 1.0) * mat_nspin->get_value(irow, icol); + break; + } + } + } + } + + // fill the lower triangle matrix + if(is == 3){ + if (iat1 < iat2) + { + auto lower_mat = lower_ap->find_matrix(-R_index); + for (int irow = 0; irow < upper_mat->get_row_size(); ++irow) + { + for (int icol = 0; icol < upper_mat->get_col_size(); ++icol) + { + lower_mat->get_value(icol, irow) = conj(upper_mat->get_value(irow, icol)); + } + } } } } } } + } - ModuleBase::timer::tick("Gint", "compose_hr_gint"); -} - -template -void transfer_hr_gint_to_hR(const HContainer& hr_gint, HContainer& hR) -{ - ModuleBase::TITLE("Gint", "transfer_hr_gint_to_hR"); - ModuleBase::timer::tick("Gint", "transfer_hr_gint_to_hR"); -#ifdef __MPI - int size = 0; - MPI_Comm_size(MPI_COMM_WORLD, &size); - if (size == 1) - { - hR.add(hr_gint); - } - else - { - hamilt::transferSerials2Parallels(hr_gint, &hR); - } + delete[] iat2iwt; + delete pv; + delete hR_tmp; #else - hR.add(hr_gint); + #endif - ModuleBase::timer::tick("Gint", "transfer_hr_gint_to_hR"); + ModuleBase::timer::tick("Gint", "transfer_hr_gint_to_hR_nspin4"); + return; } + // gint_info should not have been a parameter, but it was added to initialize dm_gint_full // In the future, we might try to remove the gint_info parameter template diff --git a/source/source_lcao/module_gint/temp_gint/gint_common.h b/source/source_lcao/module_gint/temp_gint/gint_common.h index 17a324df2b..c6872ccc9c 100644 --- a/source/source_lcao/module_gint/temp_gint/gint_common.h +++ b/source/source_lcao/module_gint/temp_gint/gint_common.h @@ -6,12 +6,14 @@ namespace ModuleGint { // fill the lower triangle matrix with the upper triangle matrix void compose_hr_gint(HContainer& hr_gint); - // for nspin=4 case - void compose_hr_gint(const std::vector>& hr_gint_part, - HContainer>& hr_gint_full); + template void transfer_hr_gint_to_hR(const HContainer& hr_gint, HContainer& hR); + // for nspin=4 case + void transfer_hr_gint_to_hR_nspin4(std::vector>& hRGint_tmp, + HContainer>& hR, + const GintInfo& gint_info); template void transfer_dm_2d_to_gint( diff --git a/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4.cpp b/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4.cpp index d5880600b0..a0c78a2c39 100644 --- a/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4.cpp +++ b/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4.cpp @@ -14,8 +14,7 @@ void Gint_vl_metagga_nspin4::cal_gint() ModuleBase::timer::tick("Gint", "cal_gint_vl"); init_hr_gint_(); cal_hr_gint_(); - compose_hr_gint(hr_gint_part_, hr_gint_full_); - transfer_hr_gint_to_hR(hr_gint_full_, *hR_); + transfer_hr_gint_to_hR_nspin4(hr_gint_part_, *hR_, *gint_info_); ModuleBase::timer::tick("Gint", "cal_gint_vl"); } @@ -26,8 +25,6 @@ void Gint_vl_metagga_nspin4::init_hr_gint_() { hr_gint_part_[i] = gint_info_->get_hr(); } - const int npol = 2; - hr_gint_full_ = gint_info_->get_hr>(npol); } void Gint_vl_metagga_nspin4::cal_hr_gint_() diff --git a/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4.h b/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4.h index 12722fef21..138cb1a277 100644 --- a/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4.h +++ b/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4.h @@ -37,7 +37,6 @@ class Gint_vl_metagga_nspin4 : public Gint const int nspin_ = 4; std::vector> hr_gint_part_; - HContainer> hr_gint_full_; }; } \ No newline at end of file diff --git a/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4_gpu.cpp b/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4_gpu.cpp index 9adc4cb137..570b872292 100644 --- a/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4_gpu.cpp +++ b/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4_gpu.cpp @@ -13,8 +13,7 @@ void Gint_vl_metagga_nspin4_gpu::cal_gint() ModuleBase::timer::tick("Gint", "cal_gint_vl"); init_hr_gint_(); cal_hr_gint_(); - compose_hr_gint(hr_gint_part_, hr_gint_full_); - transfer_hr_gint_to_hR(hr_gint_full_, *hR_); + transfer_hr_gint_to_hR_nspin4(hr_gint_part_, *hR_, *gint_info_); ModuleBase::timer::tick("Gint", "cal_gint_vl"); } @@ -25,8 +24,6 @@ void Gint_vl_metagga_nspin4_gpu::init_hr_gint_() { hr_gint_part_[i] = gint_info_->get_hr(); } - const int npol = 2; - hr_gint_full_ = gint_info_->get_hr>(npol); } void Gint_vl_metagga_nspin4_gpu::transfer_cpu_to_gpu_() diff --git a/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4_gpu.h b/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4_gpu.h index fd967a63ec..92e75c7d3c 100644 --- a/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4_gpu.h +++ b/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4_gpu.h @@ -42,7 +42,7 @@ class Gint_vl_metagga_nspin4_gpu : public Gint const int nspin_ = 4; std::vector> hr_gint_part_; - HContainer> hr_gint_full_; + //HContainer> hr_gint_full_; std::vector> vr_eff_d_; std::vector> vofk_d_; diff --git a/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4.cpp b/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4.cpp index 56aca5edeb..cda7bc8afc 100644 --- a/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4.cpp +++ b/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4.cpp @@ -13,8 +13,7 @@ void Gint_vl_nspin4::cal_gint() ModuleBase::timer::tick("Gint", "cal_gint_vl"); init_hr_gint_(); cal_hr_gint_(); - compose_hr_gint(hr_gint_part_, hr_gint_full_); - transfer_hr_gint_to_hR(hr_gint_full_, *hR_); + transfer_hr_gint_to_hR_nspin4(hr_gint_part_, *hR_, *gint_info_); ModuleBase::timer::tick("Gint", "cal_gint_vl"); } @@ -25,8 +24,6 @@ void Gint_vl_nspin4::init_hr_gint_() { hr_gint_part_[i] = gint_info_->get_hr(); } - const int npol = 2; - hr_gint_full_ = gint_info_->get_hr>(npol); } void Gint_vl_nspin4::cal_hr_gint_() diff --git a/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4.h b/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4.h index eed9827242..97aa47ca58 100644 --- a/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4.h +++ b/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4.h @@ -39,7 +39,6 @@ class Gint_vl_nspin4 : public Gint const int nspin_ = 4; std::vector> hr_gint_part_; - HContainer> hr_gint_full_; }; } // namespace ModuleGint \ No newline at end of file diff --git a/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4_gpu.cpp b/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4_gpu.cpp index c070258db5..37a58a645f 100644 --- a/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4_gpu.cpp +++ b/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4_gpu.cpp @@ -13,8 +13,7 @@ void Gint_vl_nspin4_gpu::cal_gint() ModuleBase::timer::tick("Gint", "cal_gint_vl"); init_hr_gint_(); cal_hr_gint_(); - compose_hr_gint(hr_gint_part_, hr_gint_full_); - transfer_hr_gint_to_hR(hr_gint_full_, *hR_); + transfer_hr_gint_to_hR_nspin4(hr_gint_part_, *hR_, *gint_info_); ModuleBase::timer::tick("Gint", "cal_gint_vl"); } @@ -25,8 +24,6 @@ void Gint_vl_nspin4_gpu::init_hr_gint_() { hr_gint_part_[i] = gint_info_->get_hr(); } - const int npol = 2; - hr_gint_full_ = gint_info_->get_hr>(npol); } void Gint_vl_nspin4_gpu::transfer_cpu_to_gpu_() diff --git a/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4_gpu.h b/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4_gpu.h index 81215b33ac..6d17a9a1bb 100644 --- a/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4_gpu.h +++ b/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4_gpu.h @@ -44,7 +44,6 @@ class Gint_vl_nspin4_gpu : public Gint const int nspin_ = 4; std::vector> hr_gint_part_; - HContainer> hr_gint_full_; std::vector> vr_eff_d_; std::vector> hr_gint_part_d_; From a9cc5c6b6d0e8006408d1a99f6ade03ad590825f Mon Sep 17 00:00:00 2001 From: hn <3022939753@qq.com> Date: Sun, 31 Aug 2025 16:09:53 +0800 Subject: [PATCH 3/3] fix parallel bug and Simplify the computational code --- .../module_gint/temp_gint/gint_common.cpp | 177 +++++++----------- .../module_gint/temp_gint/gint_common.h | 2 +- .../temp_gint/gint_vl_metagga_nspin4.cpp | 2 +- .../temp_gint/gint_vl_metagga_nspin4_gpu.cpp | 2 +- .../module_gint/temp_gint/gint_vl_nspin4.cpp | 2 +- .../temp_gint/gint_vl_nspin4_gpu.cpp | 2 +- 6 files changed, 76 insertions(+), 111 deletions(-) diff --git a/source/source_lcao/module_gint/temp_gint/gint_common.cpp b/source/source_lcao/module_gint/temp_gint/gint_common.cpp index dc579e5696..225e9a37c5 100644 --- a/source/source_lcao/module_gint/temp_gint/gint_common.cpp +++ b/source/source_lcao/module_gint/temp_gint/gint_common.cpp @@ -70,17 +70,18 @@ void transfer_hr_gint_to_hR(const HContainer& hr_gint, HContainer& hR) } //hRgint_tmp to hR -void transfer_hr_gint_to_hR_nspin4(std::vector>& hRGint_tmp, +void merge_hR_n4(std::vector>& hRGint_tmp, HContainer>& hR, const GintInfo& gint_info) { - ModuleBase::TITLE("Gint", "transfer_hr_gint_to_hR_nspin4"); - ModuleBase::timer::tick("Gint", "transfer_hr_gint_to_hR_nspin4"); + ModuleBase::TITLE("Gint", "merge_hR_n4"); + ModuleBase::timer::tick("Gint", "merge_hR_n4"); #ifdef __MPI int mg = hR.get_paraV()->get_global_row_size()/2; int ng = hR.get_paraV()->get_global_col_size()/2; int nb = hR.get_paraV()->get_block_size()/2; int blacs_ctxt = hR.get_paraV()->blacs_ctxt; + const UnitCell* ucell = gint_info.get_ucell(); int *iat2iwt = new int[ucell->nat]; for (int iat = 0; iat < ucell->nat; iat++) { @@ -91,91 +92,49 @@ void transfer_hr_gint_to_hR_nspin4(std::vector>& hRGint_tmp, pv->set_atomic_trace(iat2iwt, ucell->nat, mg); auto ijr_info = hR.get_ijr_info(); - hamilt::HContainer* hR_tmp = new hamilt::HContainer(pv, nullptr, &ijr_info); + auto* hR_tmp = new hamilt::HContainer>(pv, nullptr, &ijr_info); + + std::vector first = {0, 1, 1, 0}; + std::vector second= {3, 2, 2, 3}; + std::vector row_set = {0, 0, 1, 1}; + std::vector col_set = {0, 1, 0, 1}; + std::vector clx_i = {1, 0, 0, -1}; + std::vector clx_j = {0, 1, -1, 0}; for (int is = 0; is < 4; is++){ - hR_tmp->set_zero(); - //std::cout<<"is: "<>* hRGint_tmpCd = new hamilt::HContainer>(ucell->nat); + ijr_info = hRGint_tmp[0].get_ijr_info(); + hRGint_tmpCd->insert_ijrs(&ijr_info, *(ucell)); + hRGint_tmpCd->allocate(nullptr, true); + hRGint_tmpCd->set_zero(); + for (int iap = 0; iap < hRGint_tmpCd->size_atom_pairs(); iap++) { //std::cout<<"iap: "<get_atom_pair(iap); const int iat1 = ap->get_atom_i(); const int iat2 = ap->get_atom_j(); - const hamilt::AtomPair* ap_nspin = nullptr; if (iat1 <= iat2) { hamilt::AtomPair>* upper_ap = ap; - hamilt::AtomPair>* lower_ap = hR.find_pair(iat2, iat1); - switch (is) - { - case 0: - ap_nspin = hR_tmp->find_pair(iat1, iat2); - break; - case 3: - ap_nspin = hR_tmp->find_pair(iat1, iat2); - break; - } - if(ap_nspin == nullptr) break; + hamilt::AtomPair>* lower_ap = hRGint_tmpCd->find_pair(iat2, iat1); + const hamilt::AtomPair* ap_nspin1 = hRGint_tmp[first[is]].find_pair(iat1, iat2); + const hamilt::AtomPair* ap_nspin2 = hRGint_tmp[second[is]].find_pair(iat1, iat2); for (int ir = 0; ir < upper_ap->get_R_size(); ir++) { const auto R_index = upper_ap->get_R_index(ir); auto upper_mat = upper_ap->find_matrix(R_index); - auto mat_nspin = ap_nspin->find_matrix(R_index); - + auto mat_nspin1 = ap_nspin1->find_matrix(R_index); + auto mat_nspin2 = ap_nspin2->find_matrix(R_index); // The row size and the col size of upper_matrix is double that of matrix_nspin_0 - for (int irow = 0; irow < mat_nspin->get_row_size(); ++irow) + for (int irow = 0; irow < mat_nspin1->get_row_size(); ++irow) { - for (int icol = 0; icol < mat_nspin->get_col_size(); ++icol) + for (int icol = 0; icol < mat_nspin1->get_col_size(); ++icol) { - switch (is) - { - case 0: - upper_mat->get_value(2*irow, 2*icol) = mat_nspin->get_value(irow, icol); - upper_mat->get_value(2*irow+1, 2*icol+1) = mat_nspin->get_value(irow, icol); - break; - case 3: - upper_mat->get_value(2*irow, 2*icol) += mat_nspin->get_value(irow, icol); - upper_mat->get_value(2*irow+1, 2*icol+1) -= mat_nspin->get_value(irow, icol); - break; - } - } - } - - if (PARAM.globalv.domag) - { - const hamilt::AtomPair* ap_nspin = nullptr; - switch (is) - { - case 1: - ap_nspin = hR_tmp->find_pair(iat1, iat2); - break; - case 2: - ap_nspin = hR_tmp->find_pair(iat1, iat2); - break; - } - const auto mat_nspin = ap_nspin->find_matrix(R_index); - for (int irow = 0; irow < mat_nspin->get_row_size(); ++irow) - { - for (int icol = 0; icol < mat_nspin->get_col_size(); ++icol) - { - switch(is) - { - case 1: - upper_mat->get_value(2*irow, 2*icol+1) = mat_nspin->get_value(irow, icol); - upper_mat->get_value(2*irow+1, 2*icol) = mat_nspin->get_value(irow, icol); - break; - case 2: - upper_mat->get_value(2*irow, 2*icol+1) += std::complex(0.0, 1.0) * mat_nspin->get_value(irow, icol); - upper_mat->get_value(2*irow+1, 2*icol) -= std::complex(0.0, 1.0) * mat_nspin->get_value(irow, icol); - break; - } - } + upper_mat->get_value(irow, icol) = mat_nspin1->get_value(irow, icol) + + std::complex(clx_i[is], clx_j[is]) * mat_nspin2->get_value(irow, icol); } } - - // fill the lower triangle matrix - if(is == 3){ + //fill the lower triangle matrix + if (PARAM.globalv.domag){ if (iat1 < iat2) { auto lower_mat = lower_ap->find_matrix(-R_index); @@ -191,15 +150,41 @@ void transfer_hr_gint_to_hR_nspin4(std::vector>& hRGint_tmp, } } } - + + hR_tmp->set_zero(); + hamilt::transferSerials2Parallels( *hRGint_tmpCd, hR_tmp); + for (int iap = 0; iap < hR.size_atom_pairs(); iap++) + { + auto* ap = &hR.get_atom_pair(iap); + const int iat1 = ap->get_atom_i(); + const int iat2 = ap->get_atom_j(); + auto* ap_nspin = hR_tmp ->find_pair(iat1, iat2); + for (int ir = 0; ir < ap->get_R_size(); ir++) + { + const auto R_index = ap->get_R_index(ir); + auto upper_mat = ap->find_matrix(R_index); + auto mat_nspin = ap_nspin->find_matrix(R_index); + + // The row size and the col size of upper_matrix is double that of matrix_nspin_0 + for (int irow = 0; irow < mat_nspin->get_row_size(); ++irow) + { + for (int icol = 0; icol < mat_nspin->get_col_size(); ++icol) + { + upper_mat->get_value(2*irow+row_set[is], 2*icol+col_set[is]) = + mat_nspin->get_value(irow, icol); + } + } + } + } + delete hRGint_tmpCd; } delete[] iat2iwt; - delete pv; - delete hR_tmp; #else #endif - ModuleBase::timer::tick("Gint", "transfer_hr_gint_to_hR_nspin4"); + + + ModuleBase::timer::tick("Gint", "merge_hR_n4"); return; } @@ -231,6 +216,9 @@ void transfer_dm_2d_to_gint( } else // NSPIN=4 case { #ifdef __MPI + // is=0:↑↑, 1:↑↓, 2:↓↑, 3:↓↓ + const int row_set[4] = {0, 0, 1, 1}; + const int col_set[4] = {0, 1, 0, 1}; int mg = dm[0]->get_paraV()->get_global_row_size()/2; int ng = dm[0]->get_paraV()->get_global_col_size()/2; int nb = dm[0]->get_paraV()->get_block_size()/2; @@ -246,43 +234,20 @@ void transfer_dm_2d_to_gint( auto ijr_info = dm[0]->get_ijr_info(); HContainer* DM2D_tmp = new hamilt::HContainer(pv, nullptr, &ijr_info); //ModuleBase::Memory::record("Gint::DM2D_tmp", this->DM2D_tmp->get_memory_size()); - for (int is = 0; is < 4; is++){ + for (int is = 0; is < 4; is++){ for (int iap = 0; iap < dm[0]->size_atom_pairs(); ++iap) { auto& ap = dm[0]->get_atom_pair(iap); int iat1 = ap.get_atom_i(); int iat2 = ap.get_atom_j(); for (int ir = 0; ir < ap.get_R_size(); ++ir) { const ModuleBase::Vector3 r_index = ap.get_R_index(ir); - T* tmp_pointer = DM2D_tmp -> find_matrix(iat1, iat2, r_index)->get_pointer(); - T* data_full = ap.get_pointer(ir); - for (int irow = 0; irow < ap.get_row_size(); irow += 2) { - switch (is) {//todo: It can be written more compactly - case 0: - for (int icol = 0; icol < ap.get_col_size(); icol += 2) { - *(tmp_pointer)++ = data_full[icol]; - } - data_full += ap.get_col_size() * 2; - break; - case 1: - for (int icol = 0; icol < ap.get_col_size(); icol += 2) { - *(tmp_pointer)++ = data_full[icol + 1]; - } - data_full += ap.get_col_size() * 2; - break; - case 2: - data_full += ap.get_col_size(); - for (int icol = 0; icol < ap.get_col_size(); icol += 2) { - *(tmp_pointer)++ = data_full[icol]; - } - data_full += ap.get_col_size(); - break; - case 3: - data_full += ap.get_col_size(); - for (int icol = 0; icol < ap.get_col_size(); icol += 2) { - *(tmp_pointer)++ = data_full[icol + 1]; - } - data_full += ap.get_col_size(); - break; + T* matrix_out = DM2D_tmp -> find_matrix(iat1, iat2, r_index)->get_pointer(); + T* matrix_in = ap.get_pointer(ir); + for (int irow = 0; irow < ap.get_row_size()/2; irow ++) { + for (int icol = 0; icol < ap.get_col_size()/2; icol ++) { + int index_i = irow* ap.get_col_size()/2 + icol; + int index_j = (irow*2+row_set[is]) * ap.get_col_size() + icol*2+col_set[is]; + matrix_out[index_i] = matrix_in[index_j]; } } } diff --git a/source/source_lcao/module_gint/temp_gint/gint_common.h b/source/source_lcao/module_gint/temp_gint/gint_common.h index c6872ccc9c..366a5058e5 100644 --- a/source/source_lcao/module_gint/temp_gint/gint_common.h +++ b/source/source_lcao/module_gint/temp_gint/gint_common.h @@ -11,7 +11,7 @@ namespace ModuleGint template void transfer_hr_gint_to_hR(const HContainer& hr_gint, HContainer& hR); // for nspin=4 case - void transfer_hr_gint_to_hR_nspin4(std::vector>& hRGint_tmp, + void merge_hR_n4(std::vector>& hRGint_tmp, HContainer>& hR, const GintInfo& gint_info); diff --git a/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4.cpp b/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4.cpp index a0c78a2c39..9ef358feb8 100644 --- a/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4.cpp +++ b/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4.cpp @@ -14,7 +14,7 @@ void Gint_vl_metagga_nspin4::cal_gint() ModuleBase::timer::tick("Gint", "cal_gint_vl"); init_hr_gint_(); cal_hr_gint_(); - transfer_hr_gint_to_hR_nspin4(hr_gint_part_, *hR_, *gint_info_); + merge_hR_n4(hr_gint_part_, *hR_, *gint_info_); ModuleBase::timer::tick("Gint", "cal_gint_vl"); } diff --git a/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4_gpu.cpp b/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4_gpu.cpp index 570b872292..e3c2c7e36c 100644 --- a/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4_gpu.cpp +++ b/source/source_lcao/module_gint/temp_gint/gint_vl_metagga_nspin4_gpu.cpp @@ -13,7 +13,7 @@ void Gint_vl_metagga_nspin4_gpu::cal_gint() ModuleBase::timer::tick("Gint", "cal_gint_vl"); init_hr_gint_(); cal_hr_gint_(); - transfer_hr_gint_to_hR_nspin4(hr_gint_part_, *hR_, *gint_info_); + merge_hR_n4(hr_gint_part_, *hR_, *gint_info_); ModuleBase::timer::tick("Gint", "cal_gint_vl"); } diff --git a/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4.cpp b/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4.cpp index cda7bc8afc..c7562a4aae 100644 --- a/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4.cpp +++ b/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4.cpp @@ -13,7 +13,7 @@ void Gint_vl_nspin4::cal_gint() ModuleBase::timer::tick("Gint", "cal_gint_vl"); init_hr_gint_(); cal_hr_gint_(); - transfer_hr_gint_to_hR_nspin4(hr_gint_part_, *hR_, *gint_info_); + merge_hR_n4(hr_gint_part_, *hR_, *gint_info_); ModuleBase::timer::tick("Gint", "cal_gint_vl"); } diff --git a/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4_gpu.cpp b/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4_gpu.cpp index 37a58a645f..8bb71ff76c 100644 --- a/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4_gpu.cpp +++ b/source/source_lcao/module_gint/temp_gint/gint_vl_nspin4_gpu.cpp @@ -13,7 +13,7 @@ void Gint_vl_nspin4_gpu::cal_gint() ModuleBase::timer::tick("Gint", "cal_gint_vl"); init_hr_gint_(); cal_hr_gint_(); - transfer_hr_gint_to_hR_nspin4(hr_gint_part_, *hR_, *gint_info_); + merge_hR_n4(hr_gint_part_, *hR_, *gint_info_); ModuleBase::timer::tick("Gint", "cal_gint_vl"); }