Skip to content

Commit c59ae3f

Browse files
committed
Fix write_cube and pgrid reduce logic
1 parent 7f40c6b commit c59ae3f

File tree

3 files changed

+67
-31
lines changed

3 files changed

+67
-31
lines changed

source/module_hamilt_pw/hamilt_pwdft/parallel_grid.cpp

Lines changed: 61 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@ Parallel_Grid::~Parallel_Grid()
1616
delete[] numz[ip];
1717
delete[] startz[ip];
1818
delete[] whichpro[ip];
19+
delete[] whichpro_loc[ip];
1920
}
2021
delete[] numz;
2122
delete[] startz;
2223
delete[] whichpro;
24+
delete[] whichpro_loc;
2325
delete[] nproc_in_pool;
2426
}
2527
}
@@ -70,10 +72,12 @@ void Parallel_Grid::init(
7072
delete[] numz[ip];
7173
delete[] startz[ip];
7274
delete[] whichpro[ip];
75+
delete[] whichpro_loc[ip];
7376
}
7477
delete[] numz;
7578
delete[] startz;
7679
delete[] whichpro;
80+
delete[] whichpro_loc;
7781
delete[] nproc_in_pool;
7882
this->allocate = false;
7983
}
@@ -99,16 +103,19 @@ void Parallel_Grid::init(
99103
this->numz = new int*[GlobalV::KPAR];
100104
this->startz = new int*[GlobalV::KPAR];
101105
this->whichpro = new int*[GlobalV::KPAR];
106+
this->whichpro_loc = new int*[GlobalV::KPAR];
102107

103108
for(int ip=0; ip<GlobalV::KPAR; ip++)
104109
{
105110
const int nproc = nproc_in_pool[ip];
106111
this->numz[ip] = new int[nproc];
107112
this->startz[ip] = new int[nproc];
108113
this->whichpro[ip] = new int[this->ncz];
114+
this->whichpro_loc[ip] = new int[this->ncz];
109115
ModuleBase::GlobalFunc::ZEROS(this->numz[ip], nproc);
110116
ModuleBase::GlobalFunc::ZEROS(this->startz[ip], nproc);
111117
ModuleBase::GlobalFunc::ZEROS(this->whichpro[ip], this->ncz);
118+
ModuleBase::GlobalFunc::ZEROS(this->whichpro_loc[ip], this->ncz);
112119
}
113120

114121
this->allocate = true;
@@ -163,11 +170,13 @@ void Parallel_Grid::z_distribution()
163170
if(iz>=startz[ip][nproc-1])
164171
{
165172
whichpro[ip][iz] = startp[ip] + nproc-1;
173+
whichpro_loc[ip][iz] = nproc-1;
166174
break;
167175
}
168176
else if(iz>=startz[ip][proc] && iz<startz[ip][proc+1])
169177
{
170178
whichpro[ip][iz] = startp[ip] + proc;
179+
whichpro_loc[ip][iz] = proc;
171180
break;
172181
}
173182
}
@@ -353,49 +362,72 @@ void Parallel_Grid::reduce(double* rhotot, const double* const rhoin)const
353362

354363
// if not the first pool, wait here until processpr 0
355364
// send the Barrier command.
356-
if(GlobalV::MY_POOL!=0)
357-
{
358-
return;
359-
}
365+
366+
// if(GlobalV::MY_POOL!=0)
367+
// {
368+
// return;
369+
// }
360370

361371
double* zpiece = new double[this->ncxy];
362372

363373
for(int iz=0; iz<this->ncz; iz++)
364374
{
365375
const int znow = iz - this->startz[GlobalV::MY_POOL][GlobalV::RANK_IN_POOL];
366376
const int proc = this->whichpro[GlobalV::MY_POOL][iz];
377+
const int proc_loc = this->whichpro_loc[GlobalV::MY_POOL][iz]; // Obtain the local processor index in the pool
367378
ModuleBase::GlobalFunc::ZEROS(zpiece, this->ncxy);
368379
int tag = iz;
369380
MPI_Status ierror;
370381

371-
// case 1: the first part of rho in processor 0.
372-
if(proc == 0 && GlobalV::RANK_IN_POOL ==0)
373-
{
374-
for(int ir=0; ir<ncxy; ir++)
375-
{
376-
zpiece[ir] = rhoin[ir*this->nczp + znow];
377-
}
378-
}
382+
// // case 1: the first part of rho in processor 0.
383+
// if(proc == 0 && GlobalV::RANK_IN_POOL ==0)
384+
// {
385+
// for(int ir=0; ir<ncxy; ir++)
386+
// {
387+
// zpiece[ir] = rhoin[ir*this->nczp + znow];
388+
// }
389+
// }
390+
391+
// // case 2: > first part rho: send the rho to
392+
// // processor 0.
393+
// else if(proc == GlobalV::RANK_IN_POOL )
394+
// {
395+
// for(int ir=0; ir<ncxy; ir++)
396+
// {
397+
// zpiece[ir] = rhoin[ir*this->nczp + znow];
398+
// }
399+
// MPI_Send(zpiece, ncxy, MPI_DOUBLE, 0, tag, POOL_WORLD);
400+
// }
401+
402+
// Local processor 0 collects data from all other processors in the pool
403+
if (proc_loc == GlobalV::RANK_IN_POOL)
404+
{
405+
for(int ir=0; ir<ncxy; ir++)
406+
{
407+
zpiece[ir] = rhoin[ir*this->nczp + znow];
408+
}
409+
// Send data to the root of the pool
410+
if (GlobalV::RANK_IN_POOL != 0)
411+
{
412+
MPI_Send(zpiece, ncxy, MPI_DOUBLE, 0, tag, POOL_WORLD);
413+
}
414+
}
379415

380-
// case 2: > first part rho: send the rho to
381-
// processor 0.
382-
else if(proc == GlobalV::RANK_IN_POOL )
383-
{
384-
for(int ir=0; ir<ncxy; ir++)
385-
{
386-
zpiece[ir] = rhoin[ir*this->nczp + znow];
387-
}
388-
MPI_Send(zpiece, ncxy, MPI_DOUBLE, 0, tag, POOL_WORLD);
389-
}
416+
// // case 2: > first part rho: processor 0 receive the rho
417+
// // from other processors
418+
// else if(GlobalV::RANK_IN_POOL==0)
419+
// {
420+
// MPI_Recv(zpiece, ncxy, MPI_DOUBLE, proc, tag, POOL_WORLD, &ierror);
421+
// }
390422

391-
// case 2: > first part rho: processor 0 receive the rho
392-
// from other processors
393-
else if(GlobalV::RANK_IN_POOL==0)
394-
{
395-
MPI_Recv(zpiece, ncxy, MPI_DOUBLE, proc, tag, POOL_WORLD, &ierror);
396-
}
423+
// The root of the pool receives data from other processors
424+
if (GlobalV::RANK_IN_POOL == 0 && proc_loc != GlobalV::RANK_IN_POOL)
425+
{
426+
MPI_Recv(zpiece, ncxy, MPI_DOUBLE, proc_loc, tag, POOL_WORLD, &ierror);
427+
}
397428

398-
if(GlobalV::MY_RANK==0)
429+
// if(GlobalV::MY_RANK==0)
430+
if (GlobalV::RANK_IN_POOL == 0)
399431
{
400432
for (int ixy = 0; ixy < this->ncxy;++ixy)
401433
{

source/module_hamilt_pw/hamilt_pwdft/parallel_grid.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ class Parallel_Grid
4848
int **numz = nullptr;
4949
int **startz = nullptr;
5050
int **whichpro = nullptr;
51+
int **whichpro_loc = nullptr;
5152

5253
int ncx=0;
5354
int ncy=0;

source/module_io/write_cube.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ void ModuleIO::write_vdata_palgrid(
2626

2727
const int my_rank = GlobalV::MY_RANK;
2828
const int my_pool = GlobalV::MY_POOL;
29+
const int rank_in_pool = GlobalV::RANK_IN_POOL;
2930

3031
time_t start;
3132
time_t end;
@@ -41,7 +42,8 @@ void ModuleIO::write_vdata_palgrid(
4142
// reduce
4243
std::vector<double> data_xyz_full(nxyz); // data to be written
4344
#ifdef __MPI // reduce to rank 0
44-
if (my_pool == 0 && GlobalV::MY_BNDGROUP == 0)
45+
// if (my_pool == 0 && GlobalV::MY_BNDGROUP == 0)
46+
if (GlobalV::MY_BNDGROUP == 0)
4547
{
4648
pgrid.reduce(data_xyz_full.data(), data);
4749
}
@@ -51,7 +53,8 @@ void ModuleIO::write_vdata_palgrid(
5153
#endif
5254

5355
// build the info structure
54-
if (my_rank == 0)
56+
// if (my_rank == 0)
57+
if (rank_in_pool == 0)
5558
{
5659
/// output header for cube file
5760
ss << "STEP: " << iter << " Cubefile created from ABACUS. Inner loop is z, followed by y and x" << std::endl;

0 commit comments

Comments
 (0)