Skip to content

Commit 9183cd2

Browse files
Fix: resolve compile error with USE_ELPA=OFF + BUILD_TESTING=ON and switch to nvtx3 headers when CUDA_VERSION >= 12090 (#6497)
* Fix: switch to nvtx3 headers when CUDA_VERSION >= 12090 Signed-off-by:Tianxiang Wang<[email protected]>, Contributed under MetaX Integrated Circuits (Shanghai) Co., Ltd. * Fix: resolve compile error with USE_ELPA=OFF + BUILD_TESTING=ON Signed-off-by:Tianxiang Wang<[email protected]>, Contributed under MetaX Integrated Circuits (Shanghai) Co., Ltd.
1 parent 8f7d319 commit 9183cd2

File tree

3 files changed

+50
-17
lines changed

3 files changed

+50
-17
lines changed

source/source_base/timer.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,11 @@
1515
#include "source_base/formatter.h"
1616

1717
#if defined(__CUDA) && defined(__USE_NVTX)
18-
#include <nvToolsExt.h>
18+
#if CUDA_VERSION < 12090
19+
#include "nvToolsExt.h"
20+
#else
21+
#include "nvtx3/nvToolsExt.h"
22+
#endif
1923
#include "source_io/module_parameter/parameter.h"
2024
#endif
2125

source/source_hsolver/test/CMakeLists.txt

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -153,12 +153,19 @@ install(FILES diago_pexsi_parallel_test.sh DESTINATION ${CMAKE_CURRENT_BINARY_DI
153153
install(FILES parallel_k2d_test.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
154154

155155

156-
157-
AddTest(
158-
TARGET MODULE_HSOLVER_diago_hs_parallel
159-
LIBS parameter ${math_libs} ELPA::ELPA base device MPI::MPI_CXX genelpa psi
160-
SOURCES test_diago_hs_para.cpp ../diag_hs_para.cpp ../diago_pxxxgvx.cpp ../diago_elpa.cpp ../diago_scalapack.cpp
161-
)
156+
if (USE_ELPA)
157+
AddTest(
158+
TARGET MODULE_HSOLVER_diago_hs_parallel
159+
LIBS parameter ${math_libs} ELPA::ELPA base device MPI::MPI_CXX genelpa psi
160+
SOURCES test_diago_hs_para.cpp ../diag_hs_para.cpp ../diago_pxxxgvx.cpp ../diago_elpa.cpp ../diago_scalapack.cpp
161+
)
162+
else()
163+
AddTest(
164+
TARGET MODULE_HSOLVER_diago_hs_parallel
165+
LIBS parameter ${math_libs} base device MPI::MPI_CXX psi
166+
SOURCES test_diago_hs_para.cpp ../diag_hs_para.cpp ../diago_pxxxgvx.cpp ../diago_scalapack.cpp
167+
)
168+
endif()
162169

163170
AddTest(
164171
TARGET MODULE_HSOLVER_linear_trans

source/source_hsolver/test/test_diago_hs_para.cpp

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,9 @@ void test_performance(int lda, int nb, int nbands, MPI_Comm comm,int case_numb,
160160
MPI_Comm_size(comm, &nproc);
161161

162162
std::vector<T> h_mat, s_mat, wfc, h_psi, s_psi;
163+
#ifdef __ELPA
163164
std::vector<typename GetTypeReal<T>::type> ekb_elpa(lda);
165+
#endif
164166
std::vector<typename GetTypeReal<T>::type> ekb_scalap(lda);
165167
std::vector<typename GetTypeReal<T>::type> ekb_lapack(lda);
166168

@@ -176,32 +178,36 @@ void test_performance(int lda, int nb, int nbands, MPI_Comm comm,int case_numb,
176178
}
177179

178180
// store all the times in a vector
181+
#ifdef __ELPA
179182
std::vector<double> time_elpa(case_numb, 0);
183+
#endif
180184
std::vector<double> time_scalap(case_numb, 0);
181185
std::vector<double> time_lapack(case_numb, 0);
182186

183187
if (my_rank == 0) { std::cout << "Random matrix ";
184188
}
185-
for (int randomi = 0; randomi < case_numb; ++randomi)
189+
for (int randomi = 0; randomi < case_numb; ++randomi)
186190
{
187-
191+
188192
if (my_rank == 0) {
189193
std::cout << randomi << " ";
190194
generate_random_hs(lda, randomi, h_mat, s_mat);
191195
}
192-
196+
auto start = std::chrono::high_resolution_clock::now();
197+
auto end = std::chrono::high_resolution_clock::now();
198+
#ifdef __ELPA
193199
// ELPA
194200
MPI_Barrier(comm);
195-
auto start = std::chrono::high_resolution_clock::now();
201+
start = std::chrono::high_resolution_clock::now();
196202
for (int j=0;j<loop_numb;j++)
197203
{
198204
hsolver::diago_hs_para<T>(h_mat.data(), s_mat.data(), lda, nbands,ekb_elpa.data(), wfc.data(), comm, 1, nb);
199205
MPI_Barrier(comm);
200206
}
201207
MPI_Barrier(comm);
202-
auto end = std::chrono::high_resolution_clock::now();
208+
end = std::chrono::high_resolution_clock::now();
203209
time_elpa[randomi] = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
204-
210+
#endif
205211

206212
// scalapack
207213
start = std::chrono::high_resolution_clock::now();
@@ -215,8 +221,8 @@ void test_performance(int lda, int nb, int nbands, MPI_Comm comm,int case_numb,
215221
time_scalap[randomi] = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
216222

217223
//LApack
218-
if (my_rank == 0)
219-
{
224+
if (my_rank == 0)
225+
{
220226
std::vector<T> h_tmp, s_tmp;
221227
start = std::chrono::high_resolution_clock::now();
222228
base_device::DEVICE_CPU* ctx = {};
@@ -239,26 +245,34 @@ void test_performance(int lda, int nb, int nbands, MPI_Comm comm,int case_numb,
239245

240246
//COMPARE EKB
241247
for (int i = 0; i < nbands; ++i) {
242-
typename GetTypeReal<T>::type diff_elpa_lapack = std::abs(ekb_elpa[i] - ekb_lapack[i]);
243248
typename GetTypeReal<T>::type diff_scalap_lapack = std::abs(ekb_scalap[i] - ekb_lapack[i]);
249+
#ifdef __ELPA
250+
typename GetTypeReal<T>::type diff_elpa_lapack = std::abs(ekb_elpa[i] - ekb_lapack[i]);
244251
if (diff_elpa_lapack > 1e-6 || diff_scalap_lapack > 1e-6)
252+
#else
253+
if (diff_scalap_lapack > 1e-6)
254+
#endif
245255
{
256+
#ifdef __ELPA
246257
std::cout << "eigenvalue " << i << " by ELPA: " << ekb_elpa[i] << std::endl;
258+
#endif
247259
std::cout << "eigenvalue " << i << " by Scalapack: " << ekb_scalap[i] << std::endl;
248260
std::cout << "eigenvalue " << i << " by Lapack: " << ekb_lapack[i] << std::endl;
249261
}
250262
}
251263
}
252-
MPI_Barrier(comm);
264+
MPI_Barrier(comm);
253265

254266
}
255267

256268
if (my_rank == 0)
257269
{
270+
#ifdef __ELPA
258271
std::cout << "\nELPA Time : ";
259272
for (int i=0; i < case_numb;i++)
260273
{std::cout << time_elpa[i] << " ";}
261274
std::cout << std::endl;
275+
#endif
262276

263277
std::cout << "scalapack Time: ";
264278
for (int i=0; i < case_numb;i++)
@@ -271,21 +285,29 @@ void test_performance(int lda, int nb, int nbands, MPI_Comm comm,int case_numb,
271285
std::cout << std::endl;
272286

273287
// print out the average time and speedup
288+
#ifdef __ELPA
274289
double avg_time_elpa = 0;
290+
#endif
275291
double avg_time_scalap = 0;
276292
double avg_time_lapack = 0;
277293
for (int i=0; i < case_numb;i++)
278294
{
295+
#ifdef __ELPA
279296
avg_time_elpa += time_elpa[i];
297+
#endif
280298
avg_time_scalap += time_scalap[i];
281299
avg_time_lapack += time_lapack[i];
282300
}
283301

302+
#ifdef __ELPA
284303
avg_time_elpa /= case_numb;
304+
#endif
285305
avg_time_scalap /= case_numb;
286306
avg_time_lapack /= case_numb;
287307
std::cout << "Average Lapack Time : " << avg_time_lapack << " ms" << std::endl;
308+
#ifdef __ELPA
288309
std::cout << "Average ELPA Time : " << avg_time_elpa << " ms, Speedup: " << avg_time_lapack / avg_time_elpa << std::endl;
310+
#endif
289311
std::cout << "Average Scalapack Time: " << avg_time_scalap << " ms, Speedup: " << avg_time_lapack / avg_time_scalap << std::endl;
290312
}
291313
}

0 commit comments

Comments
 (0)