diff --git a/source/module_base/global_variable.h b/source/module_base/global_variable.h index 9063298947..56629444d3 100644 --- a/source/module_base/global_variable.h +++ b/source/module_base/global_variable.h @@ -47,20 +47,9 @@ extern int GSIZE; extern int KPAR_LCAO; //========================================================== -// EXPLAIN : readin file dir, output file std::ofstream -// GLOBAL VARIABLES : -// NAME : global_in_card -// NAME : stru_file -// NAME : global_kpoint_card -// NAME : global_wannier_card -// NAME : global_pseudo_dir -// NAME : global_pseudo_type // mohan add 2013-05-20 (xiaohui add 2013-06-23) -// NAME : global_out_dir // NAME : ofs_running( contain information during runnnig) // NAME : ofs_warning( contain warning information, including error) //========================================================== -// extern std::string global_pseudo_type; // mohan add 2013-05-20 (xiaohui add -// 2013-06-23) extern std::ofstream ofs_running; extern std::ofstream ofs_warning; extern std::ofstream ofs_info; diff --git a/source/module_base/parallel_comm.cpp b/source/module_base/parallel_comm.cpp index 15504b15ad..7ede7efe4f 100644 --- a/source/module_base/parallel_comm.cpp +++ b/source/module_base/parallel_comm.cpp @@ -1,12 +1,55 @@ #if defined __MPI #include "mpi.h" +#include "parallel_global.h" MPI_Comm POOL_WORLD; -MPI_Comm INTER_POOL = MPI_COMM_NULL; // communicator among different pools +MPI_Comm INTER_POOL; // communicator among different pools MPI_Comm STO_WORLD; MPI_Comm PARAPW_WORLD; MPI_Comm GRID_WORLD; // mohan add 2012-01-13 MPI_Comm DIAG_WORLD; // mohan add 2012-01-13 +MPICommGroup::MPICommGroup(MPI_Comm parent_comm) + : parent_comm(parent_comm) +{ + MPI_Comm_size(parent_comm, &this->gsize); + MPI_Comm_rank(parent_comm, &this->grank); +} + +MPICommGroup::~MPICommGroup() +{ + if (group_comm != MPI_COMM_NULL) + { + MPI_Comm_free(&group_comm); + } + if (inter_comm != MPI_COMM_NULL) + { + MPI_Comm_free(&inter_comm); + } +} + +void MPICommGroup::divide_group_comm(const int& ngroup, const bool assert_even) +{ + this->ngroups = ngroup; + Parallel_Global::divide_mpi_groups(this->gsize, + ngroup, + this->grank, + this->nprocs_in_group, + this->my_group, + this->rank_in_group, + assert_even); + + MPI_Comm_split(parent_comm, my_group, rank_in_group, &group_comm); + if(this->gsize % ngroup == 0) + { + this->is_even = true; + } + + if (this->is_even) + { + MPI_Comm_split(parent_comm, my_inter, rank_in_inter, &inter_comm); + } +} + #endif \ No newline at end of file diff --git a/source/module_base/parallel_comm.h b/source/module_base/parallel_comm.h index 8feee297a4..c05772fa92 100644 --- a/source/module_base/parallel_comm.h +++ b/source/module_base/parallel_comm.h @@ -2,7 +2,6 @@ #define PARALLEL_COMM_H #ifdef __MPI - #include "mpi.h" extern MPI_Comm POOL_WORLD; extern MPI_Comm INTER_POOL; // communicator among different pools @@ -11,6 +10,33 @@ extern MPI_Comm PARAPW_WORLD; extern MPI_Comm GRID_WORLD; // mohan add 2012-01-13 extern MPI_Comm DIAG_WORLD; // mohan add 2012-01-13 + +class MPICommGroup +{ +public: + MPICommGroup(MPI_Comm parent_comm); + ~MPICommGroup(); + void divide_group_comm(const int& ngroup, const bool assert_even = true); +public: + bool is_even = false; ///< whether the group is even + + MPI_Comm parent_comm = MPI_COMM_NULL; ///< parent communicator + int gsize = 0; ///< size of parent communicator + int grank = 0; ///< rank of parent communicator + + MPI_Comm group_comm = MPI_COMM_NULL; ///< group communicator + int ngroups = 0; ///< number of groups + int nprocs_in_group = 0; ///< number of processes in the group + int my_group = 0; ///< the group index + int rank_in_group = 0; ///< the rank in the group + + MPI_Comm inter_comm = MPI_COMM_NULL; ///< inter communicator + bool has_inter_comm = false; ///< whether has inter communicator + int& nprocs_in_inter = ngroups; ///< number of processes in the inter communicator + int& my_inter = rank_in_group; ///< the rank in the inter communicator + int& rank_in_inter = my_group; ///< the inter group index +}; + #endif #endif // PARALLEL_COMM_H \ No newline at end of file diff --git a/source/module_base/parallel_global.cpp b/source/module_base/parallel_global.cpp index b4ce7016aa..4081fd7207 100644 --- a/source/module_base/parallel_global.cpp +++ b/source/module_base/parallel_global.cpp @@ -251,7 +251,7 @@ void Parallel_Global::finalize_mpi() void Parallel_Global::init_pools(const int& NPROC, const int& MY_RANK, - const int& NSTOGROUP, + const int& BNDPAR, const int& KPAR, int& NPROC_IN_STOGROUP, int& RANK_IN_STOGROUP, @@ -266,7 +266,7 @@ void Parallel_Global::init_pools(const int& NPROC, //---------------------------------------------------------- Parallel_Global::divide_pools(NPROC, MY_RANK, - NSTOGROUP, + BNDPAR, KPAR, NPROC_IN_STOGROUP, RANK_IN_STOGROUP, @@ -314,7 +314,7 @@ void Parallel_Global::init_pools(const int& NPROC, #ifdef __MPI void Parallel_Global::divide_pools(const int& NPROC, const int& MY_RANK, - const int& NSTOGROUP, + const int& BNDPAR, const int& KPAR, int& NPROC_IN_STOGROUP, int& RANK_IN_STOGROUP, @@ -323,30 +323,55 @@ void Parallel_Global::divide_pools(const int& NPROC, int& RANK_IN_POOL, int& MY_POOL) { - // Divide the global communicator into stogroups. - divide_mpi_groups(NPROC, NSTOGROUP, MY_RANK, NPROC_IN_STOGROUP, MY_STOGROUP, RANK_IN_STOGROUP, true); - - // (2) per process in each pool - divide_mpi_groups(NPROC_IN_STOGROUP, KPAR, RANK_IN_STOGROUP, NPROC_IN_POOL, MY_POOL, RANK_IN_POOL); - - int key = 1; - MPI_Comm_split(MPI_COMM_WORLD, MY_STOGROUP, key, &STO_WORLD); - - //======================================================== - // MPI_Comm_Split: Creates new communicators based on - // colors(2nd parameter) and keys(3rd parameter) - // Note: The color must be non-negative or MPI_UNDEFINED. - //======================================================== - MPI_Comm_split(STO_WORLD, MY_POOL, key, &POOL_WORLD); - - if (NPROC_IN_STOGROUP % KPAR == 0) + // note: the order of k-point parallelization and band parallelization is important + // The order will not change the behavior of INTER_POOL or PARAPW_WORLD, and MY_POOL + // and MY_STOGROUP will be the same as well. + if(BNDPAR > 1 && NPROC %(BNDPAR * KPAR) != 0) { - MPI_Comm_split(STO_WORLD, RANK_IN_POOL, key, &INTER_POOL); + std::cout << "Error: When BNDPAR = " << BNDPAR << " > 1, number of processes (" << NPROC << ") must be divisible by the number of groups (" + << BNDPAR * KPAR << ")." << std::endl; + exit(1); + } + // k-point parallelization + MPICommGroup kpar_group(MPI_COMM_WORLD); + kpar_group.divide_group_comm(KPAR, false); + + // band parallelization + MPICommGroup bndpar_group(kpar_group.group_comm); + bndpar_group.divide_group_comm(BNDPAR, true); + + // Set parallel index. + // In previous versions, the order of k-point parallelization and band parallelization is reversed. + // So we need to keep some variables for compatibility. + NPROC_IN_POOL = bndpar_group.nprocs_in_group; + RANK_IN_POOL = bndpar_group.rank_in_group; + MY_POOL = kpar_group.my_group; + MPI_Comm_dup(bndpar_group.group_comm, &POOL_WORLD); + if(kpar_group.inter_comm != MPI_COMM_NULL) + { + MPI_Comm_dup(kpar_group.inter_comm, &INTER_POOL); + } + else + { + INTER_POOL = MPI_COMM_NULL; + } + + if(BNDPAR > 1) + { + NPROC_IN_STOGROUP = kpar_group.ngroups * bndpar_group.nprocs_in_group; + RANK_IN_STOGROUP = kpar_group.my_group * bndpar_group.nprocs_in_group + bndpar_group.rank_in_group; + MY_STOGROUP = bndpar_group.my_group; + MPI_Comm_split(MPI_COMM_WORLD, MY_STOGROUP, RANK_IN_STOGROUP, &STO_WORLD); + MPI_Comm_dup(bndpar_group.inter_comm, &PARAPW_WORLD); + } + else + { + NPROC_IN_STOGROUP = NPROC; + RANK_IN_STOGROUP = MY_RANK; + MY_STOGROUP = 0; + MPI_Comm_dup(MPI_COMM_WORLD, &STO_WORLD); + MPI_Comm_split(MPI_COMM_WORLD, MY_RANK, 0, &PARAPW_WORLD); } - - int color = MY_RANK % NPROC_IN_STOGROUP; - MPI_Comm_split(MPI_COMM_WORLD, color, key, &PARAPW_WORLD); - return; } @@ -380,31 +405,17 @@ void Parallel_Global::divide_mpi_groups(const int& procs, exit(1); } - int* nproc_group_ = new int[num_groups]; - - for (int i = 0; i < num_groups; i++) + if(rank < extra_procs) { - nproc_group_[i] = procs_in_group; - if (i < extra_procs) - { - ++nproc_group_[i]; - } + procs_in_group++; + my_group = rank / procs_in_group; + rank_in_group = rank % procs_in_group; } - - int np_now = 0; - for (int i = 0; i < num_groups; i++) + else { - np_now += nproc_group_[i]; - if (rank < np_now) - { - my_group = i; - procs_in_group = nproc_group_[i]; - rank_in_group = rank - (np_now - procs_in_group); - break; - } + my_group = (rank - extra_procs) / procs_in_group; + rank_in_group = (rank - extra_procs) % procs_in_group; } - - delete[] nproc_group_; } #endif diff --git a/source/module_base/parallel_global.h b/source/module_base/parallel_global.h index b87aae5b5d..1fcf933f7b 100644 --- a/source/module_base/parallel_global.h +++ b/source/module_base/parallel_global.h @@ -46,7 +46,7 @@ void split_grid_world(const int diag_np, const int& nproc, const int& my_rank, i */ void init_pools(const int& NPROC, const int& MY_RANK, - const int& NSTOGROUP, + const int& BNDPAR, const int& KPAR, int& NPROC_IN_STOGROUP, int& RANK_IN_STOGROUP, @@ -57,7 +57,7 @@ void init_pools(const int& NPROC, void divide_pools(const int& NPROC, const int& MY_RANK, - const int& NSTOGROUP, + const int& BNDPAR, const int& KPAR, int& NPROC_IN_STOGROUP, int& RANK_IN_STOGROUP, diff --git a/source/module_base/test_parallel/parallel_global_test.cpp b/source/module_base/test_parallel/parallel_global_test.cpp index 473ee4aab9..9c0972ebe8 100644 --- a/source/module_base/test_parallel/parallel_global_test.cpp +++ b/source/module_base/test_parallel/parallel_global_test.cpp @@ -4,6 +4,7 @@ #include "mpi.h" #include "gtest/gtest.h" +#include "gmock/gmock.h" #include #include #include @@ -165,8 +166,8 @@ TEST_F(ParaGlobal, InitPools) mpi.kpar = 3; mpi.nstogroup = 3; my_rank = 5; - - Parallel_Global::init_pools(nproc, + testing::internal::CaptureStdout(); + EXPECT_EXIT(Parallel_Global::init_pools(nproc, my_rank, mpi.nstogroup, mpi.kpar, @@ -175,45 +176,11 @@ TEST_F(ParaGlobal, InitPools) mpi.my_stogroup, mpi.nproc_in_pool, mpi.rank_in_pool, - mpi.my_pool); - EXPECT_EQ(mpi.nproc_in_stogroup, 4); - EXPECT_EQ(mpi.my_stogroup, 1); - EXPECT_EQ(mpi.rank_in_stogroup, 1); - EXPECT_EQ(mpi.my_pool, 0); - EXPECT_EQ(mpi.rank_in_pool, 1); - EXPECT_EQ(mpi.nproc_in_pool, 2); - EXPECT_EQ(MPI_COMM_WORLD != STO_WORLD, true); - EXPECT_EQ(STO_WORLD != POOL_WORLD, true); - EXPECT_EQ(MPI_COMM_WORLD != PARAPW_WORLD, true); + mpi.my_pool), ::testing::ExitedWithCode(1), ""); + std::string output = testing::internal::GetCapturedStdout(); + EXPECT_THAT(output, testing::HasSubstr("Error:")); } -TEST_F(ParaGlobal, DividePools) -{ - nproc = 12; - mpi.kpar = 3; - mpi.nstogroup = 3; - this->my_rank = 5; - - Parallel_Global::divide_pools(nproc, - this->my_rank, - mpi.nstogroup, - mpi.kpar, - mpi.nproc_in_stogroup, - mpi.rank_in_stogroup, - mpi.my_stogroup, - mpi.nproc_in_pool, - mpi.rank_in_pool, - mpi.my_pool); - EXPECT_EQ(mpi.nproc_in_stogroup, 4); - EXPECT_EQ(mpi.my_stogroup, 1); - EXPECT_EQ(mpi.rank_in_stogroup, 1); - EXPECT_EQ(mpi.my_pool, 0); - EXPECT_EQ(mpi.rank_in_pool, 1); - EXPECT_EQ(mpi.nproc_in_pool, 2); - EXPECT_EQ(MPI_COMM_WORLD != STO_WORLD, true); - EXPECT_EQ(STO_WORLD != POOL_WORLD, true); - EXPECT_EQ(MPI_COMM_WORLD != PARAPW_WORLD, true); -} TEST_F(ParaGlobal, DivideMPIPools) { diff --git a/source/module_elecstate/module_charge/charge_mpi.cpp b/source/module_elecstate/module_charge/charge_mpi.cpp index 76646b7895..61f01ea58b 100644 --- a/source/module_elecstate/module_charge/charge_mpi.cpp +++ b/source/module_elecstate/module_charge/charge_mpi.cpp @@ -8,7 +8,7 @@ #ifdef __MPI void Charge::init_chgmpi() { - if (GlobalV::NPROC_IN_STOGROUP % GlobalV::KPAR == 0) + if (INTER_POOL != MPI_COMM_NULL) { this->use_intel_pool = true; } diff --git a/source/module_elecstate/test_mpi/charge_mpi_test.cpp b/source/module_elecstate/test_mpi/charge_mpi_test.cpp index 9e748151eb..040f5953d6 100644 --- a/source/module_elecstate/test_mpi/charge_mpi_test.cpp +++ b/source/module_elecstate/test_mpi/charge_mpi_test.cpp @@ -66,7 +66,7 @@ TEST_F(ChargeMpiTest, reduce_diff_pools1) if (GlobalV::NPROC >= 2 && GlobalV::NPROC % 2 == 0) { GlobalV::KPAR = 2; - Parallel_Global::divide_pools(GlobalV::NPROC, + Parallel_Global::init_pools(GlobalV::NPROC, GlobalV::MY_RANK, PARAM.input.bndpar, GlobalV::KPAR, diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_wf.cpp b/source/module_hamilt_pw/hamilt_stodft/sto_wf.cpp index 7ad8fdcc36..6d5accd3b0 100644 --- a/source/module_hamilt_pw/hamilt_stodft/sto_wf.cpp +++ b/source/module_hamilt_pw/hamilt_stodft/sto_wf.cpp @@ -72,7 +72,7 @@ void Stochastic_WF::init_sto_orbitals(const int seed_in) } else { - srand((unsigned)std::abs(seed_in) + GlobalV::MY_RANK * 10000); + srand((unsigned)std::abs(seed_in) + (GlobalV::MY_STOGROUP * GlobalV::NPROC_IN_STOGROUP + GlobalV::RANK_IN_STOGROUP) * 10000); } this->allocate_chi0();