@@ -251,7 +251,7 @@ void Parallel_Global::finalize_mpi()
251251
252252void Parallel_Global::init_pools (const int & NPROC,
253253 const int & MY_RANK,
254- const int & NSTOGROUP ,
254+ const int & BNDPAR ,
255255 const int & KPAR,
256256 int & NPROC_IN_STOGROUP,
257257 int & RANK_IN_STOGROUP,
@@ -266,7 +266,7 @@ void Parallel_Global::init_pools(const int& NPROC,
266266 // ----------------------------------------------------------
267267 Parallel_Global::divide_pools (NPROC,
268268 MY_RANK,
269- NSTOGROUP ,
269+ BNDPAR ,
270270 KPAR,
271271 NPROC_IN_STOGROUP,
272272 RANK_IN_STOGROUP,
@@ -314,7 +314,7 @@ void Parallel_Global::init_pools(const int& NPROC,
314314#ifdef __MPI
315315void Parallel_Global::divide_pools (const int & NPROC,
316316 const int & MY_RANK,
317- const int & NSTOGROUP ,
317+ const int & BNDPAR ,
318318 const int & KPAR,
319319 int & NPROC_IN_STOGROUP,
320320 int & RANK_IN_STOGROUP,
@@ -323,30 +323,55 @@ void Parallel_Global::divide_pools(const int& NPROC,
323323 int & RANK_IN_POOL,
324324 int & MY_POOL)
325325{
326- // Divide the global communicator into stogroups.
327- divide_mpi_groups (NPROC, NSTOGROUP, MY_RANK, NPROC_IN_STOGROUP, MY_STOGROUP, RANK_IN_STOGROUP, true );
328-
329- // (2) per process in each pool
330- divide_mpi_groups (NPROC_IN_STOGROUP, KPAR, RANK_IN_STOGROUP, NPROC_IN_POOL, MY_POOL, RANK_IN_POOL);
331-
332- int key = 1 ;
333- MPI_Comm_split (MPI_COMM_WORLD, MY_STOGROUP, key, &STO_WORLD);
334-
335- // ========================================================
336- // MPI_Comm_Split: Creates new communicators based on
337- // colors(2nd parameter) and keys(3rd parameter)
338- // Note: The color must be non-negative or MPI_UNDEFINED.
339- // ========================================================
340- MPI_Comm_split (STO_WORLD, MY_POOL, key, &POOL_WORLD);
341-
342- if (NPROC_IN_STOGROUP % KPAR == 0 )
326+ // note: the order of k-point parallelization and band parallelization is important
327+ // The order will not change the behavior of INTER_POOL or PARAPW_WORLD, and MY_POOL
328+ // and MY_STOGROUP will be the same as well.
329+ if (BNDPAR > 1 && NPROC %(BNDPAR * KPAR) != 0 )
343330 {
344- MPI_Comm_split (STO_WORLD, RANK_IN_POOL, key, &INTER_POOL);
331+ std::cout << " Error: When BNDPAR = " << BNDPAR << " > 1, number of processes (" << NPROC << " ) must be divisible by the number of groups ("
332+ << BNDPAR * KPAR << " )." << std::endl;
333+ exit (1 );
334+ }
335+ // k-point parallelization
336+ MPICommGroup kpar_group (MPI_COMM_WORLD);
337+ kpar_group.divide_group_comm (KPAR, false );
338+
339+ // band parallelization
340+ MPICommGroup bndpar_group (kpar_group.group_comm );
341+ bndpar_group.divide_group_comm (BNDPAR, true );
342+
343+ // Set parallel index.
344+ // In previous versions, the order of k-point parallelization and band parallelization is reversed.
345+ // So we need to keep some variables for compatibility.
346+ NPROC_IN_POOL = bndpar_group.nprocs_in_group ;
347+ RANK_IN_POOL = bndpar_group.rank_in_group ;
348+ MY_POOL = kpar_group.my_group ;
349+ MPI_Comm_dup (bndpar_group.group_comm , &POOL_WORLD);
350+ if (kpar_group.inter_comm != MPI_COMM_NULL)
351+ {
352+ MPI_Comm_dup (kpar_group.inter_comm , &INTER_POOL);
353+ }
354+ else
355+ {
356+ INTER_POOL = MPI_COMM_NULL;
357+ }
358+
359+ if (BNDPAR > 1 )
360+ {
361+ NPROC_IN_STOGROUP = kpar_group.ngroups * bndpar_group.nprocs_in_group ;
362+ RANK_IN_STOGROUP = kpar_group.my_group * bndpar_group.nprocs_in_group + bndpar_group.rank_in_group ;
363+ MY_STOGROUP = bndpar_group.my_group ;
364+ MPI_Comm_split (MPI_COMM_WORLD, MY_STOGROUP, RANK_IN_STOGROUP, &STO_WORLD);
365+ MPI_Comm_dup (bndpar_group.inter_comm , &PARAPW_WORLD);
366+ }
367+ else
368+ {
369+ NPROC_IN_STOGROUP = NPROC;
370+ RANK_IN_STOGROUP = MY_RANK;
371+ MY_STOGROUP = 0 ;
372+ MPI_Comm_dup (MPI_COMM_WORLD, &STO_WORLD);
373+ MPI_Comm_split (MPI_COMM_WORLD, MY_RANK, 0 , &PARAPW_WORLD);
345374 }
346-
347- int color = MY_RANK % NPROC_IN_STOGROUP;
348- MPI_Comm_split (MPI_COMM_WORLD, color, key, &PARAPW_WORLD);
349-
350375 return ;
351376}
352377
@@ -380,31 +405,17 @@ void Parallel_Global::divide_mpi_groups(const int& procs,
380405 exit (1 );
381406 }
382407
383- int * nproc_group_ = new int [num_groups];
384-
385- for (int i = 0 ; i < num_groups; i++)
408+ if (rank < extra_procs)
386409 {
387- nproc_group_[i] = procs_in_group;
388- if (i < extra_procs)
389- {
390- ++nproc_group_[i];
391- }
410+ procs_in_group++;
411+ my_group = rank / procs_in_group;
412+ rank_in_group = rank % procs_in_group;
392413 }
393-
394- int np_now = 0 ;
395- for (int i = 0 ; i < num_groups; i++)
414+ else
396415 {
397- np_now += nproc_group_[i];
398- if (rank < np_now)
399- {
400- my_group = i;
401- procs_in_group = nproc_group_[i];
402- rank_in_group = rank - (np_now - procs_in_group);
403- break ;
404- }
416+ my_group = (rank - extra_procs) / procs_in_group;
417+ rank_in_group = (rank - extra_procs) % procs_in_group;
405418 }
406-
407- delete[] nproc_group_;
408419}
409420
410421#endif
0 commit comments