@@ -16,10 +16,12 @@ Parallel_Grid::~Parallel_Grid()
1616 delete[] numz[ip];
1717 delete[] startz[ip];
1818 delete[] whichpro[ip];
19+ delete[] whichpro_loc[ip];
1920 }
2021 delete[] numz;
2122 delete[] startz;
2223 delete[] whichpro;
24+ delete[] whichpro_loc;
2325 delete[] nproc_in_pool;
2426 }
2527}
@@ -70,10 +72,12 @@ void Parallel_Grid::init(
7072 delete[] numz[ip];
7173 delete[] startz[ip];
7274 delete[] whichpro[ip];
75+ delete[] whichpro_loc[ip];
7376 }
7477 delete[] numz;
7578 delete[] startz;
7679 delete[] whichpro;
80+ delete[] whichpro_loc;
7781 delete[] nproc_in_pool;
7882 this ->allocate = false ;
7983 }
@@ -99,16 +103,19 @@ void Parallel_Grid::init(
99103 this ->numz = new int *[GlobalV::KPAR];
100104 this ->startz = new int *[GlobalV::KPAR];
101105 this ->whichpro = new int *[GlobalV::KPAR];
106+ this ->whichpro_loc = new int *[GlobalV::KPAR];
102107
103108 for (int ip=0 ; ip<GlobalV::KPAR; ip++)
104109 {
105110 const int nproc = nproc_in_pool[ip];
106111 this ->numz [ip] = new int [nproc];
107112 this ->startz [ip] = new int [nproc];
108113 this ->whichpro [ip] = new int [this ->ncz ];
114+ this ->whichpro_loc [ip] = new int [this ->ncz ];
109115 ModuleBase::GlobalFunc::ZEROS (this ->numz [ip], nproc);
110116 ModuleBase::GlobalFunc::ZEROS (this ->startz [ip], nproc);
111117 ModuleBase::GlobalFunc::ZEROS (this ->whichpro [ip], this ->ncz );
118+ ModuleBase::GlobalFunc::ZEROS (this ->whichpro_loc [ip], this ->ncz );
112119 }
113120
114121 this ->allocate = true ;
@@ -163,11 +170,13 @@ void Parallel_Grid::z_distribution()
163170 if (iz>=startz[ip][nproc-1 ])
164171 {
165172 whichpro[ip][iz] = startp[ip] + nproc-1 ;
173+ whichpro_loc[ip][iz] = nproc-1 ;
166174 break ;
167175 }
168176 else if (iz>=startz[ip][proc] && iz<startz[ip][proc+1 ])
169177 {
170178 whichpro[ip][iz] = startp[ip] + proc;
179+ whichpro_loc[ip][iz] = proc;
171180 break ;
172181 }
173182 }
@@ -353,49 +362,72 @@ void Parallel_Grid::reduce(double* rhotot, const double* const rhoin)const
353362
354363 // if not the first pool, wait here until processpr 0
355364 // send the Barrier command.
356- if (GlobalV::MY_POOL!=0 )
357- {
358- return ;
359- }
365+
366+ // if(GlobalV::MY_POOL!=0)
367+ // {
368+ // return;
369+ // }
360370
361371 double * zpiece = new double [this ->ncxy ];
362372
363373 for (int iz=0 ; iz<this ->ncz ; iz++)
364374 {
365375 const int znow = iz - this ->startz [GlobalV::MY_POOL][GlobalV::RANK_IN_POOL];
366376 const int proc = this ->whichpro [GlobalV::MY_POOL][iz];
377+ const int proc_loc = this ->whichpro_loc [GlobalV::MY_POOL][iz]; // Obtain the local processor index in the pool
367378 ModuleBase::GlobalFunc::ZEROS (zpiece, this ->ncxy );
368379 int tag = iz;
369380 MPI_Status ierror;
370381
371- // case 1: the first part of rho in processor 0.
372- if (proc == 0 && GlobalV::RANK_IN_POOL ==0 )
373- {
374- for (int ir=0 ; ir<ncxy; ir++)
375- {
376- zpiece[ir] = rhoin[ir*this ->nczp + znow];
377- }
378- }
382+ // // case 1: the first part of rho in processor 0.
383+ // if(proc == 0 && GlobalV::RANK_IN_POOL ==0)
384+ // {
385+ // for(int ir=0; ir<ncxy; ir++)
386+ // {
387+ // zpiece[ir] = rhoin[ir*this->nczp + znow];
388+ // }
389+ // }
390+
391+ // // case 2: > first part rho: send the rho to
392+ // // processor 0.
393+ // else if(proc == GlobalV::RANK_IN_POOL )
394+ // {
395+ // for(int ir=0; ir<ncxy; ir++)
396+ // {
397+ // zpiece[ir] = rhoin[ir*this->nczp + znow];
398+ // }
399+ // MPI_Send(zpiece, ncxy, MPI_DOUBLE, 0, tag, POOL_WORLD);
400+ // }
401+
402+ // Local processor 0 collects data from all other processors in the pool
403+ if (proc_loc == GlobalV::RANK_IN_POOL)
404+ {
405+ for (int ir=0 ; ir<ncxy; ir++)
406+ {
407+ zpiece[ir] = rhoin[ir*this ->nczp + znow];
408+ }
409+ // Send data to the root of the pool
410+ if (GlobalV::RANK_IN_POOL != 0 )
411+ {
412+ MPI_Send (zpiece, ncxy, MPI_DOUBLE, 0 , tag, POOL_WORLD);
413+ }
414+ }
379415
380- // case 2: > first part rho: send the rho to
381- // processor 0.
382- else if (proc == GlobalV::RANK_IN_POOL )
383- {
384- for (int ir=0 ; ir<ncxy; ir++)
385- {
386- zpiece[ir] = rhoin[ir*this ->nczp + znow];
387- }
388- MPI_Send (zpiece, ncxy, MPI_DOUBLE, 0 , tag, POOL_WORLD);
389- }
416+ // // case 2: > first part rho: processor 0 receive the rho
417+ // // from other processors
418+ // else if(GlobalV::RANK_IN_POOL==0)
419+ // {
420+ // MPI_Recv(zpiece, ncxy, MPI_DOUBLE, proc, tag, POOL_WORLD, &ierror);
421+ // }
390422
391- // case 2: > first part rho: processor 0 receive the rho
392- // from other processors
393- else if (GlobalV::RANK_IN_POOL==0 )
394- {
395- MPI_Recv (zpiece, ncxy, MPI_DOUBLE, proc, tag, POOL_WORLD, &ierror);
396- }
423+ // The root of the pool receives data from other processors
424+ if (GlobalV::RANK_IN_POOL == 0 && proc_loc != GlobalV::RANK_IN_POOL)
425+ {
426+ MPI_Recv (zpiece, ncxy, MPI_DOUBLE, proc_loc, tag, POOL_WORLD, &ierror);
427+ }
397428
398- if (GlobalV::MY_RANK==0 )
429+ // if(GlobalV::MY_RANK==0)
430+ if (GlobalV::RANK_IN_POOL == 0 )
399431 {
400432 for (int ixy = 0 ; ixy < this ->ncxy ;++ixy)
401433 {
0 commit comments