@@ -160,7 +160,9 @@ void test_performance(int lda, int nb, int nbands, MPI_Comm comm,int case_numb,
160160 MPI_Comm_size (comm, &nproc);
161161
162162 std::vector<T> h_mat, s_mat, wfc, h_psi, s_psi;
163+ #ifdef __ELPA
163164 std::vector<typename GetTypeReal<T>::type> ekb_elpa (lda);
165+ #endif
164166 std::vector<typename GetTypeReal<T>::type> ekb_scalap (lda);
165167 std::vector<typename GetTypeReal<T>::type> ekb_lapack (lda);
166168
@@ -176,32 +178,36 @@ void test_performance(int lda, int nb, int nbands, MPI_Comm comm,int case_numb,
176178 }
177179
178180 // store all the times in a vector
181+ #ifdef __ELPA
179182 std::vector<double > time_elpa (case_numb, 0 );
183+ #endif
180184 std::vector<double > time_scalap (case_numb, 0 );
181185 std::vector<double > time_lapack (case_numb, 0 );
182186
183187 if (my_rank == 0 ) { std::cout << " Random matrix " ;
184188}
185- for (int randomi = 0 ; randomi < case_numb; ++randomi)
189+ for (int randomi = 0 ; randomi < case_numb; ++randomi)
186190 {
187-
191+
188192 if (my_rank == 0 ) {
189193 std::cout << randomi << " " ;
190194 generate_random_hs (lda, randomi, h_mat, s_mat);
191195 }
192-
196+ auto start = std::chrono::high_resolution_clock::now ();
197+ auto end = std::chrono::high_resolution_clock::now ();
198+ #ifdef __ELPA
193199 // ELPA
194200 MPI_Barrier (comm);
195- auto start = std::chrono::high_resolution_clock::now ();
201+ start = std::chrono::high_resolution_clock::now ();
196202 for (int j=0 ;j<loop_numb;j++)
197203 {
198204 hsolver::diago_hs_para<T>(h_mat.data (), s_mat.data (), lda, nbands,ekb_elpa.data (), wfc.data (), comm, 1 , nb);
199205 MPI_Barrier (comm);
200206 }
201207 MPI_Barrier (comm);
202- auto end = std::chrono::high_resolution_clock::now ();
208+ end = std::chrono::high_resolution_clock::now ();
203209 time_elpa[randomi] = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count ();
204-
210+ # endif
205211
206212 // scalapack
207213 start = std::chrono::high_resolution_clock::now ();
@@ -215,8 +221,8 @@ void test_performance(int lda, int nb, int nbands, MPI_Comm comm,int case_numb,
215221 time_scalap[randomi] = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count ();
216222
217223 // LApack
218- if (my_rank == 0 )
219- {
224+ if (my_rank == 0 )
225+ {
220226 std::vector<T> h_tmp, s_tmp;
221227 start = std::chrono::high_resolution_clock::now ();
222228 base_device::DEVICE_CPU* ctx = {};
@@ -239,26 +245,34 @@ void test_performance(int lda, int nb, int nbands, MPI_Comm comm,int case_numb,
239245
240246 // COMPARE EKB
241247 for (int i = 0 ; i < nbands; ++i) {
242- typename GetTypeReal<T>::type diff_elpa_lapack = std::abs (ekb_elpa[i] - ekb_lapack[i]);
243248 typename GetTypeReal<T>::type diff_scalap_lapack = std::abs (ekb_scalap[i] - ekb_lapack[i]);
249+ #ifdef __ELPA
250+ typename GetTypeReal<T>::type diff_elpa_lapack = std::abs (ekb_elpa[i] - ekb_lapack[i]);
244251 if (diff_elpa_lapack > 1e-6 || diff_scalap_lapack > 1e-6 )
252+ #else
253+ if (diff_scalap_lapack > 1e-6 )
254+ #endif
245255 {
256+ #ifdef __ELPA
246257 std::cout << " eigenvalue " << i << " by ELPA: " << ekb_elpa[i] << std::endl;
258+ #endif
247259 std::cout << " eigenvalue " << i << " by Scalapack: " << ekb_scalap[i] << std::endl;
248260 std::cout << " eigenvalue " << i << " by Lapack: " << ekb_lapack[i] << std::endl;
249261 }
250262 }
251263 }
252- MPI_Barrier (comm);
264+ MPI_Barrier (comm);
253265
254266 }
255267
256268 if (my_rank == 0 )
257269 {
270+ #ifdef __ELPA
258271 std::cout << " \n ELPA Time : " ;
259272 for (int i=0 ; i < case_numb;i++)
260273 {std::cout << time_elpa[i] << " " ;}
261274 std::cout << std::endl;
275+ #endif
262276
263277 std::cout << " scalapack Time: " ;
264278 for (int i=0 ; i < case_numb;i++)
@@ -271,21 +285,29 @@ void test_performance(int lda, int nb, int nbands, MPI_Comm comm,int case_numb,
271285 std::cout << std::endl;
272286
273287 // print out the average time and speedup
288+ #ifdef __ELPA
274289 double avg_time_elpa = 0 ;
290+ #endif
275291 double avg_time_scalap = 0 ;
276292 double avg_time_lapack = 0 ;
277293 for (int i=0 ; i < case_numb;i++)
278294 {
295+ #ifdef __ELPA
279296 avg_time_elpa += time_elpa[i];
297+ #endif
280298 avg_time_scalap += time_scalap[i];
281299 avg_time_lapack += time_lapack[i];
282300 }
283301
302+ #ifdef __ELPA
284303 avg_time_elpa /= case_numb;
304+ #endif
285305 avg_time_scalap /= case_numb;
286306 avg_time_lapack /= case_numb;
287307 std::cout << " Average Lapack Time : " << avg_time_lapack << " ms" << std::endl;
308+ #ifdef __ELPA
288309 std::cout << " Average ELPA Time : " << avg_time_elpa << " ms, Speedup: " << avg_time_lapack / avg_time_elpa << std::endl;
310+ #endif
289311 std::cout << " Average Scalapack Time: " << avg_time_scalap << " ms, Speedup: " << avg_time_lapack / avg_time_scalap << std::endl;
290312 }
291313}
0 commit comments