33// DATE : 2009-11-08
44// ==========================================================
55#include " parallel_global.h"
6+ #include " mpi.h"
67#include " parallel_common.h"
78#include " parallel_reduce.h"
89#include " ../module_base/global_function.h"
10+ #include < iostream>
911
1012#ifdef _OPENMP
11- #include < omp.h> // Peize Lin add 2018-02-13
13+ #include < omp.h>
14+ #include < thread>
1215#endif
1316
14- using namespace std ;
15-
1617#if defined __MPI
1718MPI_Datatype mpicomplex;
1819MPI_Op myOp;
@@ -147,21 +148,7 @@ void Parallel_Global::split_grid_world(const int &diag_np)
147148
148149void Parallel_Global::read_mpi_parameters (int argc,char **argv)
149150{
150- #if defined __MPI
151- // for test
152- /*
153- std::cout << "\n Hello! Test MPI NOW : argc = "<<argc<<std::endl;
154- for(int i=0;i<argc;i++)
155- {
156- std::cout<<"\n argv["<<i<<"]="<<argv[i];
157- }
158- std::cout<<std::endl;
159- */
160-
161- #ifdef _OPENMP
162- // omp_set_nested(true); // Peize Lin add 2018-02-13
163- #endif
164-
151+ #ifdef __MPI
165152#ifdef _OPENMP
166153 int provided;
167154 MPI_Init_thread (&argc,&argv,MPI_THREAD_SERIALIZED,&provided);
@@ -171,67 +158,71 @@ void Parallel_Global::read_mpi_parameters(int argc,char **argv)
171158 // MPI_THREAD_FUNNELED is enough for ABACUS. Using MPI_THREAD_SERIALIZED for elpa.
172159#else
173160 MPI_Init (&argc,&argv); // Peize Lin change 2018-07-12
174- #endif
175- // ----------------------------------------------------------
176- // int atoi ( const char * str );
177- // atoi : Convert std::string to int type
178- // atof : Convert std::string to double type
179- // atol : Convert std::string to long int type
180- // ----------------------------------------------------------
181- // GlobalV::KPAR = atoi(argv[1]); // mohan abandon 2010-06-09
161+ #endif // _OPENMP
162+
163+ // GlobalV::KPAR = atoi(argv[1]); // mohan abandon 2010-06-09
182164
183165 // get the size --> GlobalV::NPROC
184166 // get the rank --> GlobalV::MY_RANK
185167 MPI_Comm_size (MPI_COMM_WORLD,&GlobalV::NPROC);
186- MPI_Comm_rank (MPI_COMM_WORLD,&GlobalV::MY_RANK);
168+ MPI_Comm_rank (MPI_COMM_WORLD, &GlobalV::MY_RANK);
187169
170+ // determining appropriate thread number for OpenMP
171+ #ifdef _OPENMP
172+ const int max_thread_num = std::thread::hardware_concurrency (); // Consider Hyperthreading disabled.
173+ int current_thread_num = omp_get_max_threads ();
174+ MPI_Comm shmcomm;
175+ MPI_Comm_split_type (MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0 , MPI_INFO_NULL, &shmcomm);
176+ int process_num, local_rank;
177+ MPI_Comm_size (shmcomm, &process_num);
178+ MPI_Comm_rank (shmcomm, &local_rank);
179+ MPI_Comm_free (&shmcomm);
180+ int desired_thread_num = max_thread_num / process_num;
181+ if (desired_thread_num != current_thread_num && current_thread_num == max_thread_num)
182+ {
183+ // OpenMP thread num not set
184+ omp_set_num_threads (desired_thread_num);
185+ current_thread_num = omp_get_max_threads ();
186+ }
187+ if (current_thread_num * process_num != max_thread_num && local_rank==0 )
188+ {
189+ // only output info in local rank 0
190+ std::cerr << " WARNING: Total thread number on this node mismatches with hardware availability. "
191+ " This may cause poor performance." << std::endl;
192+ std::cerr << " Info: Local MPI proc number: " << process_num << " ,"
193+ << " OpenMP thread number: " << current_thread_num << " ,"
194+ << " Total thread number: " << current_thread_num * process_num << " ,"
195+ << " Local thread limit: " << max_thread_num << std::endl;
196+ }
197+ #endif
188198
199+ if (GlobalV::MY_RANK == 0 )
200+ {
201+ std::cout << " *********************************************************" << std::endl;
202+ std::cout << " * *" << std::endl;
203+ std::cout << " * WELCOME TO ABACUS *" << std::endl;
204+ std::cout << " * *" << std::endl;
205+ std::cout << " * 'Atomic-orbital Based Ab-initio *" << std::endl;
206+ std::cout << " * Computation at UStc' *" << std::endl;
207+ std::cout << " * *" << std::endl;
208+ std::cout << " * Website: http://abacus.ustc.edu.cn/ *" << std::endl;
209+ std::cout << " * *" << std::endl;
210+ std::cout << " *********************************************************" << std::endl;
211+ time_t time_now = time (NULL );
212+ std::cout << " " << ctime (&time_now);
213+ }
189214
190- // for test
215+ // for test
216+ /*
191217 for (int i=0; i<GlobalV::NPROC; i++)
192218 {
193219 if (GlobalV::MY_RANK == i)
194220 {
195- if (i==0 )
196- {
197- /*
198- printf( "\n\e[33m%s\e[0m\n", " ===================================================");
199- printf( "\e[33m%s\e[0m", " WELCOME");
200- printf( "\e[33m%s\e[0m", " TO");
201- printf( "\e[33m%s\e[0m", " ESP");
202- printf( "\e[33m%s\e[0m\n", " WORLD ");
203- printf( "\e[33m%s\e[0m\n", " ===================================================");
204- */
205- // xiaohui modify 2015-03-25
206- /*
207- std::cout << " *********************************************************" << std::endl;
208- std::cout << " * *" << std::endl;
209- std::cout << " * WELCOME TO MESIA *" << std::endl;
210- std::cout << " * *" << std::endl;
211- std::cout << " * 'Massive Electronic simulation based on *" << std::endl;
212- std::cout << " * Systematically Improvable Atomic bases' *" << std::endl;
213- std::cout << " * *" << std::endl;
214- std::cout << " *********************************************************" << std::endl;
215- */
216- std::cout << " *********************************************************" << std::endl;
217- std::cout << " * *" << std::endl;
218- std::cout << " * WELCOME TO ABACUS *" << std::endl;
219- std::cout << " * *" << std::endl;
220- std::cout << " * 'Atomic-orbital Based Ab-initio *" << std::endl;
221- std::cout << " * Computation at UStc' *" << std::endl;
222- std::cout << " * *" << std::endl;
223- std::cout << " * Website: http://abacus.ustc.edu.cn/ *" << std::endl;
224- std::cout << " * *" << std::endl;
225- std::cout << " *********************************************************" << std::endl;
226-
227- // std::cout << " <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<" << std::endl;
228- time_t time_now = time (NULL );
229- std::cout << " " << ctime (&time_now);
230- }
231- // std::cout << " PROCESSOR " << std::setw(4) << GlobalV::MY_RANK+1 << " IS READY." << std::endl;
221+ std::cout << " PROCESSOR " << std::setw(4) << GlobalV::MY_RANK+1 << " IS READY." << std::endl;
232222 }
233223 MPI_Barrier(MPI_COMM_WORLD);
234224 }
225+ */
235226
236227 // This section can be chosen !!
237228 // mohan 2011-03-15
@@ -260,6 +251,6 @@ void Parallel_Global::read_mpi_parameters(int argc,char **argv)
260251 MPI_Type_commit (&mpicomplex);
261252 MPI_Op_create ((MPI_User_function *)Parallel_Global::myProd,1 ,&myOp);
262253
263- #endif
254+ #endif // __MPI
264255 return ;
265256}
0 commit comments