diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_hsr.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/spar_hsr.cpp index e2ab76647b..282404b74c 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/spar_hsr.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_hsr.cpp @@ -1,12 +1,72 @@ #include "spar_hsr.h" -#include "module_parameter/parameter.h" #include "module_hamilt_lcao/module_hcontainer/hcontainer.h" #include "module_hamilt_lcao/module_tddft/td_velocity.h" +#include "module_parameter/parameter.h" #include "spar_dh.h" #include "spar_exx.h" #include "spar_u.h" +#ifdef __MPI +void sparse_format::sync_all_R_coor(std::set>& all_R_coor, MPI_Comm comm) +{ + int my_rank, nproc; + MPI_Comm_rank(comm, &my_rank); + MPI_Comm_size(comm, &nproc); + + // Step 1: Gather the number of R coordinates from each process + int local_size = all_R_coor.size(); + std::vector recv_counts(nproc, 0); + MPI_Allgather(&local_size, 1, MPI_INT, recv_counts.data(), 1, MPI_INT, comm); + + // Step 2: Calculate the number of integers sent by each process (each R coordinate contains 3 integers) + std::vector recv_counts_elements(nproc); + std::vector displacements(nproc, 0); + int total_size_elements = 0; + + for (int i = 0; i < nproc; ++i) + { + recv_counts_elements[i] = recv_counts[i] * 3; // Number of integers sent by each process + displacements[i] = total_size_elements; + total_size_elements += recv_counts_elements[i]; + } + + // Step 3: Gather the raw data of all R coordinates (each R coordinate is stored as 3 integers) + std::vector local_R_data; + local_R_data.reserve(local_size * 3); + for (const auto& R: all_R_coor) + { + local_R_data.push_back(R.x); + local_R_data.push_back(R.y); + local_R_data.push_back(R.z); + } + + // Step 4: Allocate the receive buffer and call MPI_Allgatherv + std::vector global_R_data(total_size_elements); + MPI_Allgatherv(local_R_data.data(), + local_size * 3, + MPI_INT, + global_R_data.data(), + recv_counts_elements.data(), + displacements.data(), + MPI_INT, + comm); + + // Step 5: Merge to create a global set of R coordinates + std::set> global_R_coor; + for (int i = 0; i < total_size_elements; i += 3) + { + int x = global_R_data[i]; + int y = global_R_data[i + 1]; + int z = global_R_data[i + 2]; + global_R_coor.insert(Abfs::Vector3_Order(x, y, z)); + } + + // Step 6: Update all processes' all_R_coor + all_R_coor = std::move(global_R_coor); +} +#endif // __MPI + void sparse_format::cal_HSR(const UnitCell& ucell, const Parallel_Orbitals& pv, LCAO_HS_Arrays& HS_Arrays, @@ -20,7 +80,8 @@ void sparse_format::cal_HSR(const UnitCell& ucell, const std::vector>>>* Hexxd, const std::vector>>>>* Hexxc #endif -) { +) +{ ModuleBase::TITLE("sparse_format", "cal_HSR"); sparse_format::set_R_range(HS_Arrays.all_R_coor, grid); @@ -28,19 +89,24 @@ void sparse_format::cal_HSR(const UnitCell& ucell, const int nspin = PARAM.inp.nspin; // cal_STN_R_sparse(current_spin, sparse_thr); - if (nspin == 1 || nspin == 2) { + if (nspin == 1 || nspin == 2) + { hamilt::HamiltLCAO, double>* p_ham_lcao - = dynamic_cast, double>*>( - p_ham); - - if (TD_Velocity::tddft_velocity) { - sparse_format::cal_HContainer_td( - pv, - current_spin, - sparse_thr, - *(p_ham_lcao->getHR()), - TD_Velocity::td_vel_op->HR_sparse_td_vel[current_spin]); - } else { + = dynamic_cast, double>*>(p_ham); + + HS_Arrays.all_R_coor = get_R_range(*(p_ham_lcao->getHR())); + + if (TD_Velocity::tddft_velocity) + { + sparse_format::cal_HContainer_td(pv, + current_spin, + sparse_thr, + *(p_ham_lcao->getHR()), + TD_Velocity::td_vel_op->HR_sparse_td_vel[current_spin]); + } + else + { + sparse_format::cal_HContainer_d(pv, current_spin, sparse_thr, @@ -48,49 +114,42 @@ void sparse_format::cal_HSR(const UnitCell& ucell, HS_Arrays.HR_sparse[current_spin]); } - sparse_format::cal_HContainer_d(pv, - current_spin, - sparse_thr, - *(p_ham_lcao->getSR()), - HS_Arrays.SR_sparse); - } else if (nspin == 4) { - hamilt::HamiltLCAO, std::complex>* - p_ham_lcao - = dynamic_cast, - std::complex>*>(p_ham); - - sparse_format::cal_HContainer_cd(pv, - current_spin, - sparse_thr, - *(p_ham_lcao->getHR()), - HS_Arrays.HR_soc_sparse); - - sparse_format::cal_HContainer_cd(pv, - current_spin, - sparse_thr, - *(p_ham_lcao->getSR()), - HS_Arrays.SR_soc_sparse); - } else { + sparse_format::cal_HContainer_d(pv, current_spin, sparse_thr, *(p_ham_lcao->getSR()), HS_Arrays.SR_sparse); + } + else if (nspin == 4) + { + hamilt::HamiltLCAO, std::complex>* p_ham_lcao + = dynamic_cast, std::complex>*>(p_ham); + + HS_Arrays.all_R_coor = get_R_range(*(p_ham_lcao->getHR())); + + sparse_format::cal_HContainer_cd(pv, current_spin, sparse_thr, *(p_ham_lcao->getHR()), HS_Arrays.HR_soc_sparse); + + sparse_format::cal_HContainer_cd(pv, current_spin, sparse_thr, *(p_ham_lcao->getSR()), HS_Arrays.SR_soc_sparse); + } + else + { ModuleBase::WARNING_QUIT("cal_HSR", "check the value of nspin."); } // only old DFT+U method need to cal extra contribution to HR - if (PARAM.inp.dft_plus_u == 2) { - if (nspin == 1 || nspin == 2) { - cal_HR_dftu(pv, - HS_Arrays.all_R_coor, - HS_Arrays.SR_sparse, - HS_Arrays.HR_sparse, - current_spin, - sparse_thr); - } else if (nspin == 4) { + if (PARAM.inp.dft_plus_u == 2) + { + if (nspin == 1 || nspin == 2) + { + cal_HR_dftu(pv, HS_Arrays.all_R_coor, HS_Arrays.SR_sparse, HS_Arrays.HR_sparse, current_spin, sparse_thr); + } + else if (nspin == 4) + { cal_HR_dftu_soc(pv, HS_Arrays.all_R_coor, HS_Arrays.SR_soc_sparse, HS_Arrays.HR_soc_sparse, current_spin, sparse_thr); - } else { + } + else + { ModuleBase::WARNING_QUIT("cal_HSR", "check the value of nspin."); } } @@ -98,26 +157,18 @@ void sparse_format::cal_HSR(const UnitCell& ucell, #ifdef __EXX #ifdef __MPI // if EXX is considered - if (GlobalC::exx_info.info_global.cal_exx) { + if (GlobalC::exx_info.info_global.cal_exx) + { + if (Hexxd && GlobalC::exx_info.info_ri.real_number) { - sparse_format::cal_HR_exx(ucell, - pv, - HS_Arrays, - current_spin, - sparse_thr, - nmp, - *Hexxd); + + sparse_format::cal_HR_exx(ucell, pv, HS_Arrays, current_spin, sparse_thr, nmp, *Hexxd); } else if (Hexxc && !GlobalC::exx_info.info_ri.real_number) { - sparse_format::cal_HR_exx(ucell, - pv, - HS_Arrays, - current_spin, - sparse_thr, - nmp, - *Hexxc); + + sparse_format::cal_HR_exx(ucell, pv, HS_Arrays, current_spin, sparse_thr, nmp, *Hexxc); } } #endif // __MPI @@ -133,30 +184,34 @@ void sparse_format::cal_HContainer_d( const int& current_spin, const double& sparse_thr, const hamilt::HContainer& hR, - std::map, - std::map>>& target) { + std::map, std::map>>& target) +{ ModuleBase::TITLE("sparse_format", "cal_HContainer_d"); auto row_indexes = pv.get_indexes_row(); auto col_indexes = pv.get_indexes_col(); - for (int iap = 0; iap < hR.size_atom_pairs(); ++iap) { + for (int iap = 0; iap < hR.size_atom_pairs(); ++iap) + { int atom_i = hR.get_atom_pair(iap).get_atom_i(); int atom_j = hR.get_atom_pair(iap).get_atom_j(); int start_i = pv.atom_begin_row[atom_i]; int start_j = pv.atom_begin_col[atom_j]; int row_size = pv.get_row_size(atom_i); int col_size = pv.get_col_size(atom_j); - for (int iR = 0; iR < hR.get_atom_pair(iap).get_R_size(); ++iR) { + for (int iR = 0; iR < hR.get_atom_pair(iap).get_R_size(); ++iR) + { auto& matrix = hR.get_atom_pair(iap).get_HR_values(iR); - const ModuleBase::Vector3 r_index - = hR.get_atom_pair(iap).get_R_index(iR); + const ModuleBase::Vector3 r_index = hR.get_atom_pair(iap).get_R_index(iR); Abfs::Vector3_Order dR(r_index.x, r_index.y, r_index.z); - for (int i = 0; i < row_size; ++i) { + for (int i = 0; i < row_size; ++i) + { int mu = row_indexes[start_i + i]; - for (int j = 0; j < col_size; ++j) { + for (int j = 0; j < col_size; ++j) + { int nu = col_indexes[start_j + j]; const auto& value_tmp = matrix.get_value(i, j); - if (std::abs(value_tmp) > sparse_thr) { + if (std::abs(value_tmp) > sparse_thr) + { target[dR][mu][nu] = value_tmp; } } @@ -172,31 +227,34 @@ void sparse_format::cal_HContainer_cd( const int& current_spin, const double& sparse_thr, const hamilt::HContainer>& hR, - std::map, - std::map>>>& - target) { + std::map, std::map>>>& target) +{ ModuleBase::TITLE("sparse_format", "cal_HContainer_cd"); auto row_indexes = pv.get_indexes_row(); auto col_indexes = pv.get_indexes_col(); - for (int iap = 0; iap < hR.size_atom_pairs(); ++iap) { + for (int iap = 0; iap < hR.size_atom_pairs(); ++iap) + { int atom_i = hR.get_atom_pair(iap).get_atom_i(); int atom_j = hR.get_atom_pair(iap).get_atom_j(); int start_i = pv.atom_begin_row[atom_i]; int start_j = pv.atom_begin_col[atom_j]; int row_size = pv.get_row_size(atom_i); int col_size = pv.get_col_size(atom_j); - for (int iR = 0; iR < hR.get_atom_pair(iap).get_R_size(); ++iR) { + for (int iR = 0; iR < hR.get_atom_pair(iap).get_R_size(); ++iR) + { auto& matrix = hR.get_atom_pair(iap).get_HR_values(iR); - const ModuleBase::Vector3 r_index - = hR.get_atom_pair(iap).get_R_index(iR); + const ModuleBase::Vector3 r_index = hR.get_atom_pair(iap).get_R_index(iR); Abfs::Vector3_Order dR(r_index.x, r_index.y, r_index.z); - for (int i = 0; i < row_size; ++i) { + for (int i = 0; i < row_size; ++i) + { int mu = row_indexes[start_i + i]; - for (int j = 0; j < col_size; ++j) { + for (int j = 0; j < col_size; ++j) + { int nu = col_indexes[start_j + j]; const auto& value_tmp = matrix.get_value(i, j); - if (std::abs(value_tmp) > sparse_thr) { + if (std::abs(value_tmp) > sparse_thr) + { target[dR][mu][nu] = value_tmp; } } @@ -212,32 +270,34 @@ void sparse_format::cal_HContainer_td( const int& current_spin, const double& sparse_thr, const hamilt::HContainer& hR, - std::map, - std::map>>>& - target) { + std::map, std::map>>>& target) +{ ModuleBase::TITLE("sparse_format", "cal_HContainer_td"); auto row_indexes = pv.get_indexes_row(); auto col_indexes = pv.get_indexes_col(); - for (int iap = 0; iap < hR.size_atom_pairs(); ++iap) { + for (int iap = 0; iap < hR.size_atom_pairs(); ++iap) + { int atom_i = hR.get_atom_pair(iap).get_atom_i(); int atom_j = hR.get_atom_pair(iap).get_atom_j(); int start_i = pv.atom_begin_row[atom_i]; int start_j = pv.atom_begin_col[atom_j]; int row_size = pv.get_row_size(atom_i); int col_size = pv.get_col_size(atom_j); - for (int iR = 0; iR < hR.get_atom_pair(iap).get_R_size(); ++iR) { + for (int iR = 0; iR < hR.get_atom_pair(iap).get_R_size(); ++iR) + { auto& matrix = hR.get_atom_pair(iap).get_HR_values(iR); - const ModuleBase::Vector3 r_index - = hR.get_atom_pair(iap).get_R_index(iR); + const ModuleBase::Vector3 r_index = hR.get_atom_pair(iap).get_R_index(iR); Abfs::Vector3_Order dR(r_index.x, r_index.y, r_index.z); - for (int i = 0; i < row_size; ++i) { + for (int i = 0; i < row_size; ++i) + { int mu = row_indexes[start_i + i]; - for (int j = 0; j < col_size; ++j) { + for (int j = 0; j < col_size; ++j) + { int nu = col_indexes[start_j + j]; - const auto& value_tmp - = std::complex(matrix.get_value(i, j), 0.0); - if (std::abs(value_tmp) > sparse_thr) { + const auto& value_tmp = std::complex(matrix.get_value(i, j), 0.0); + if (std::abs(value_tmp) > sparse_thr) + { target[dR][mu][nu] += value_tmp; } } @@ -249,35 +309,47 @@ void sparse_format::cal_HContainer_td( } // in case there are elements smaller than the threshold -void sparse_format::clear_zero_elements(LCAO_HS_Arrays& HS_Arrays, - const int& current_spin, - const double& sparse_thr) { +void sparse_format::clear_zero_elements(LCAO_HS_Arrays& HS_Arrays, const int& current_spin, const double& sparse_thr) +{ ModuleBase::TITLE("sparse_format", "clear_zero_elements"); - if (PARAM.inp.nspin != 4) { - for (auto& R_loop: HS_Arrays.HR_sparse[current_spin]) { - for (auto& row_loop: R_loop.second) { + if (PARAM.inp.nspin != 4) + { + for (auto& R_loop: HS_Arrays.HR_sparse[current_spin]) + { + for (auto& row_loop: R_loop.second) + { auto& col_map = row_loop.second; auto iter = col_map.begin(); - while (iter != col_map.end()) { - if (std::abs(iter->second) <= sparse_thr) { + while (iter != col_map.end()) + { + if (std::abs(iter->second) <= sparse_thr) + { col_map.erase(iter++); - } else { + } + else + { iter++; } } } } - if (TD_Velocity::tddft_velocity) { - for (auto& R_loop: - TD_Velocity::td_vel_op->HR_sparse_td_vel[current_spin]) { - for (auto& row_loop: R_loop.second) { + if (TD_Velocity::tddft_velocity) + { + for (auto& R_loop: TD_Velocity::td_vel_op->HR_sparse_td_vel[current_spin]) + { + for (auto& row_loop: R_loop.second) + { auto& col_map = row_loop.second; auto iter = col_map.begin(); - while (iter != col_map.end()) { - if (std::abs(iter->second) <= sparse_thr) { + while (iter != col_map.end()) + { + if (std::abs(iter->second) <= sparse_thr) + { col_map.erase(iter++); - } else { + } + else + { iter++; } } @@ -285,74 +357,90 @@ void sparse_format::clear_zero_elements(LCAO_HS_Arrays& HS_Arrays, } } - for (auto& R_loop: HS_Arrays.SR_sparse) { - for (auto& row_loop: R_loop.second) { + for (auto& R_loop: HS_Arrays.SR_sparse) + { + for (auto& row_loop: R_loop.second) + { auto& col_map = row_loop.second; auto iter = col_map.begin(); - while (iter != col_map.end()) { - if (std::abs(iter->second) <= sparse_thr) { + while (iter != col_map.end()) + { + if (std::abs(iter->second) <= sparse_thr) + { col_map.erase(iter++); - } else { + } + else + { iter++; } } } } - } else { - for (auto& R_loop: HS_Arrays.HR_soc_sparse) { - for (auto& row_loop: R_loop.second) { + } + else + { + for (auto& R_loop: HS_Arrays.HR_soc_sparse) + { + for (auto& row_loop: R_loop.second) + { auto& col_map = row_loop.second; auto iter = col_map.begin(); - while (iter != col_map.end()) { - if (std::abs(iter->second) <= sparse_thr) { + while (iter != col_map.end()) + { + if (std::abs(iter->second) <= sparse_thr) + { col_map.erase(iter++); - } else { + } + else + { iter++; } } // end while iter - } // end row loop - } // end R loop + } // end row loop + } // end R loop - for (auto& R_loop: HS_Arrays.SR_soc_sparse) { - for (auto& row_loop: R_loop.second) { + for (auto& R_loop: HS_Arrays.SR_soc_sparse) + { + for (auto& row_loop: R_loop.second) + { auto& col_map = row_loop.second; auto iter = col_map.begin(); - while (iter != col_map.end()) { - if (std::abs(iter->second) <= sparse_thr) { + while (iter != col_map.end()) + { + if (std::abs(iter->second) <= sparse_thr) + { col_map.erase(iter++); - } else { + } + else + { iter++; } } // end while iter - } // end row_loop - } // end R_loop + } // end row_loop + } // end R_loop } return; } -void sparse_format::destroy_HS_R_sparse(LCAO_HS_Arrays& HS_Arrays) { +void sparse_format::destroy_HS_R_sparse(LCAO_HS_Arrays& HS_Arrays) +{ ModuleBase::TITLE("sparse_format", "destroy_HS_R_sparse"); - if (PARAM.inp.nspin != 4) { - std::map, - std::map>> - empty_HR_sparse_up; - std::map, - std::map>> - empty_HR_sparse_down; - std::map, - std::map>> - empty_SR_sparse; + if (PARAM.inp.nspin != 4) + { + std::map, std::map>> empty_HR_sparse_up; + std::map, std::map>> empty_HR_sparse_down; + std::map, std::map>> empty_SR_sparse; HS_Arrays.HR_sparse[0].swap(empty_HR_sparse_up); HS_Arrays.HR_sparse[1].swap(empty_HR_sparse_down); HS_Arrays.SR_sparse.swap(empty_SR_sparse); - } else { - std::map, - std::map>>> + } + else + { + std::map, std::map>>> empty_HR_soc_sparse; - std::map, - std::map>>> + std::map, std::map>>> empty_SR_soc_sparse; HS_Arrays.HR_soc_sparse.swap(empty_HR_soc_sparse); HS_Arrays.SR_soc_sparse.swap(empty_SR_soc_sparse); diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_hsr.h b/source/module_hamilt_lcao/hamilt_lcaodft/spar_hsr.h index bf80386678..8b0e89dd7a 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/spar_hsr.h +++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_hsr.h @@ -3,50 +3,75 @@ #include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h" -namespace sparse_format { - using TAC = std::pair>; - void cal_HSR(const UnitCell& ucell, - const Parallel_Orbitals& pv, - LCAO_HS_Arrays& HS_Arrays, - const Grid_Driver& grid, - const int& current_spin, - const double& sparse_thr, - const int (&nmp)[3], - hamilt::Hamilt>* p_ham +namespace sparse_format +{ +#ifdef __MPI +// Synchronize all processes' R coordinates, +// otherwise HS_Arrays.all_R_coor would have different sizes on different processes, +// causing MPI communication errors. +void sync_all_R_coor(std::set>& all_R_coor, MPI_Comm comm); +#endif + +template +std::set> get_R_range(const hamilt::HContainer& hR) +{ + std::set> all_R_coor; + for (int iap = 0; iap < hR.size_atom_pairs(); ++iap) + { + const hamilt::AtomPair& atom_pair = hR.get_atom_pair(iap); + for (int iR = 0; iR < atom_pair.get_R_size(); ++iR) + { + const auto& r_index = atom_pair.get_R_index(iR); + Abfs::Vector3_Order dR(r_index.x, r_index.y, r_index.z); + all_R_coor.insert(dR); + } + } + +#ifdef __MPI + // Fix: Sync all_R_coor across processes + sparse_format::sync_all_R_coor(all_R_coor, MPI_COMM_WORLD); +#endif + + return all_R_coor; +}; + +using TAC = std::pair>; +void cal_HSR(const UnitCell& ucell, + const Parallel_Orbitals& pv, + LCAO_HS_Arrays& HS_Arrays, + const Grid_Driver& grid, + const int& current_spin, + const double& sparse_thr, + const int (&nmp)[3], + hamilt::Hamilt>* p_ham #ifdef __EXX - , - const std::vector>>>* Hexxd = nullptr, - const std::vector>>>>* Hexxc = nullptr + , + const std::vector>>>* Hexxd = nullptr, + const std::vector>>>>* Hexxc = nullptr #endif - ); +); -void cal_HContainer_d( - const Parallel_Orbitals& pv, - const int& current_spin, - const double& sparse_threshold, - const hamilt::HContainer& hR, - std::map, - std::map>>& target); +void cal_HContainer_d(const Parallel_Orbitals& pv, + const int& current_spin, + const double& sparse_threshold, + const hamilt::HContainer& hR, + std::map, std::map>>& target); void cal_HContainer_cd( const Parallel_Orbitals& pv, const int& current_spin, const double& sparse_threshold, const hamilt::HContainer>& hR, - std::map, - std::map>>>& target); + std::map, std::map>>>& target); void cal_HContainer_td( const Parallel_Orbitals& pv, const int& current_spin, const double& sparse_threshold, const hamilt::HContainer& hR, - std::map, - std::map>>>& target); + std::map, std::map>>>& target); -void clear_zero_elements(LCAO_HS_Arrays& HS_Arrays, - const int& current_spin, - const double& sparse_thr); +void clear_zero_elements(LCAO_HS_Arrays& HS_Arrays, const int& current_spin, const double& sparse_thr); void destroy_HS_R_sparse(LCAO_HS_Arrays& HS_Arrays);