From 200a9cf5a17bb08df8458e3716f19e1c47d5151a Mon Sep 17 00:00:00 2001 From: root Date: Thu, 21 Nov 2024 10:33:44 +0800 Subject: [PATCH 01/12] feature: parallel solve subspace diagonalization in dav_subspace --- docs/advanced/input_files/input-main.md | 14 +- .../src/hsolver/py_diago_dav_subspace.hpp | 4 +- source/module_base/blacs_connector.h | 2 +- source/module_base/scalapack_connector.h | 11 + source/module_hsolver/CMakeLists.txt | 3 + source/module_hsolver/diag_hs_para.cpp | 201 ++++++++++ source/module_hsolver/diag_hs_para.h | 47 +++ source/module_hsolver/diago_dav_subspace.cpp | 126 ++++-- source/module_hsolver/diago_dav_subspace.h | 7 +- source/module_hsolver/diago_pxxxgvx.cpp | 344 +++++++++++++++++ source/module_hsolver/diago_pxxxgvx.h | 37 ++ source/module_hsolver/hsolver_pw.cpp | 4 +- source/module_hsolver/test/CMakeLists.txt | 20 +- .../test/test_diago_hs_para.cpp | 359 ++++++++++++++++++ source/module_io/read_input_item_system.cpp | 6 + source/module_lr/hsolver_lrtd.hpp | 4 +- source/module_parameter/input_parameter.h | 1 + .../integrate/102_PW_DS_davsubspace_sca/INPUT | 29 ++ tests/integrate/102_PW_DS_davsubspace_sca/KPT | 4 + .../integrate/102_PW_DS_davsubspace_sca/STRU | 19 + tests/integrate/102_PW_DS_davsubspace_sca/jd | 1 + .../102_PW_DS_davsubspace_sca/result.ref | 6 + tests/integrate/CASES_CPU.txt | 1 + 23 files changed, 1202 insertions(+), 48 deletions(-) create mode 100644 source/module_hsolver/diag_hs_para.cpp create mode 100644 source/module_hsolver/diag_hs_para.h create mode 100644 source/module_hsolver/diago_pxxxgvx.cpp create mode 100644 source/module_hsolver/diago_pxxxgvx.h create mode 100644 source/module_hsolver/test/test_diago_hs_para.cpp create mode 100644 tests/integrate/102_PW_DS_davsubspace_sca/INPUT create mode 100644 tests/integrate/102_PW_DS_davsubspace_sca/KPT create mode 100644 tests/integrate/102_PW_DS_davsubspace_sca/STRU create mode 100644 tests/integrate/102_PW_DS_davsubspace_sca/jd create mode 100644 tests/integrate/102_PW_DS_davsubspace_sca/result.ref diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md index d8491bbf71..789e7428b5 100644 --- a/docs/advanced/input_files/input-main.md +++ b/docs/advanced/input_files/input-main.md @@ -39,6 +39,7 @@ - [pw\_diag\_thr](#pw_diag_thr) - [pw\_diag\_nmax](#pw_diag_nmax) - [pw\_diag\_ndim](#pw_diag_ndim) + - [diag\_subspace\_method](#diag_subspace_method) - [erf\_ecut](#erf_ecut) - [fft\_mode](#fft_mode) - [erf\_height](#erf_height) @@ -783,7 +784,18 @@ These variables are used to control the plane wave related parameters. - **Type**: Integer - **Description**: Only useful when you use `ks_solver = dav` or `ks_solver = dav_subspace`. It indicates dimension of workspace(number of wavefunction packets, at least 2 needed) for the Davidson method. A larger value may yield a smaller number of iterations in the algorithm but uses more memory and more CPU time in subspace diagonalization. -- **Default**: 4 +- **Default**: 4 + +### diag_subspace_method + +- **Type**: Integer +- **Description**: The method to diagonalize subspace in dav_subspace method. The available options are: + - 0: by LAPACK + - 1: by GenELPA + - 2: by ScaLAPACK + LAPACK only solve in one core, GenELPA and ScaLAPACK can solve in parallel. If the system is small (such as the band number is less than 100), LAPACK is recommended. If the system is large and MPI parallel is used, then GenELPA or ScaLAPACK is recommended, and GenELPA usually has better performance. For GenELPA and ScaLAPACK, the block size can be set by [nb2d](#nb2d). + +- **Default**: 0 ### erf_ecut diff --git a/python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp b/python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp index 8ef539902e..0eff2467fe 100644 --- a/python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp +++ b/python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp @@ -138,7 +138,9 @@ class PyDiagoDavSubspace tol, max_iter, need_subspace, - comm_info + comm_info, + PARAM.inp.diag_subspace_method, + PARAM.inp.nb2d ); return obj->diag(hpsi_func, psi, nbasis, eigenvalue, diag_ethr.data(), scf_type); diff --git a/source/module_base/blacs_connector.h b/source/module_base/blacs_connector.h index 3bcc43811a..61c67324e8 100644 --- a/source/module_base/blacs_connector.h +++ b/source/module_base/blacs_connector.h @@ -39,7 +39,7 @@ extern "C" // Informational and Miscellaneous void Cblacs_gridinfo(int icontxt, int* nprow, int *npcol, int *myprow, int *mypcol); void Cblacs_gridinit(int* icontxt, char* layout, int nprow, int npcol); - void Cblacs_gridexit(int* icontxt); + void Cblacs_gridexit(int icontxt); int Cblacs_pnum(int icontxt, int prow, int pcol); void Cblacs_pcoord(int icontxt, int pnum, int *prow, int *pcol); void Cblacs_exit(int icontxt); diff --git a/source/module_base/scalapack_connector.h b/source/module_base/scalapack_connector.h index 8e0798b0d8..210cef58a1 100644 --- a/source/module_base/scalapack_connector.h +++ b/source/module_base/scalapack_connector.h @@ -85,6 +85,17 @@ extern "C" const double* vl, const double* vu, const int* il, const int* iu, const double* abstol, int* m, int* nz, double* w, const double*orfac, std::complex* Z, const int* iz, const int* jz, const int*descz, std::complex* work, int* lwork, double* rwork, int* lrwork, int*iwork, int*liwork, int* ifail, int*iclustr, double*gap, int* info); + void pssygvx_(const int* itype, const char* jobz, const char* range, const char* uplo, + const int* n, float* A, const int* ia, const int* ja, const int*desca, float* B, const int* ib, const int* jb, const int*descb, + const float* vl, const float* vu, const int* il, const int* iu, + const float* abstol, int* m, int* nz, float* w, const float*orfac, float* Z, const int* iz, const int* jz, const int*descz, + float* work, int* lwork, int*iwork, int*liwork, int* ifail, int*iclustr, float*gap, int* info); + void pchegvx_(const int* itype, const char* jobz, const char* range, const char* uplo, + const int* n, std::complex* A, const int* ia, const int* ja, const int*desca, std::complex* B, const int* ib, const int* jb, const int*descb, + const float* vl, const float* vu, const int* il, const int* iu, + const float* abstol, int* m, int* nz, float* w, const float*orfac, std::complex* Z, const int* iz, const int* jz, const int*descz, + std::complex* work, int* lwork, float* rwork, int* lrwork, int*iwork, int*liwork, int* ifail, int*iclustr, float*gap, int* info); + void pzgetri_( const int *n, diff --git a/source/module_hsolver/CMakeLists.txt b/source/module_hsolver/CMakeLists.txt index 8e1ff69905..93a708f21d 100644 --- a/source/module_hsolver/CMakeLists.txt +++ b/source/module_hsolver/CMakeLists.txt @@ -9,6 +9,9 @@ list(APPEND objects hsolver_pw_sdft.cpp diago_iter_assist.cpp hsolver.cpp + diago_pxxxgvx.cpp + diag_hs_para.cpp + ) if(ENABLE_LCAO) diff --git a/source/module_hsolver/diag_hs_para.cpp b/source/module_hsolver/diag_hs_para.cpp new file mode 100644 index 0000000000..d227c9c53b --- /dev/null +++ b/source/module_hsolver/diag_hs_para.cpp @@ -0,0 +1,201 @@ +#include +#include "module_hsolver/diag_hs_para.h" +#include "module_basis/module_ao/parallel_2d.h" +#include "module_hsolver/diago_pxxxgvx.h" +#include "module_base/scalapack_connector.h" +#include "module_hsolver/genelpa/elpa_solver.h" + +namespace hsolver +{ + +#ifdef __ELPA + void elpa_diag(MPI_Comm comm, + const int nband, + std::complex* h_local, + std::complex* s_local, + double* ekb, + std::complex* wfc_2d, + Parallel_2D& para2d_local) + { + int DecomposedState = 0; + ELPA_Solver es(false, + comm, + nband, + para2d_local.get_row_size(), + para2d_local.get_col_size(), + para2d_local.desc); + es.generalized_eigenvector(h_local, s_local, DecomposedState, ekb, wfc_2d); + es.exit(); + } + + void elpa_diag(MPI_Comm comm, + const int nband, + double* h_local, + double* s_local, + double* ekb, + double* wfc_2d, + Parallel_2D& para2d_local) + { + int DecomposedState = 0; + ELPA_Solver es(true, + comm, + nband, + para2d_local.get_row_size(), + para2d_local.get_col_size(), + para2d_local.desc); + es.generalized_eigenvector(h_local, s_local, DecomposedState, ekb, wfc_2d); + es.exit(); + } + + void elpa_diag(MPI_Comm comm, + const int nband, + std::complex* h_local, + std::complex* s_local, + float* ekb, + std::complex* wfc_2d, + Parallel_2D& para2d_local) + { + std::cout << "Error: ELPA do not support single precision. " << std::endl; + exit(1); + } + + void elpa_diag(MPI_Comm comm, + const int nband, + float* h_local, + float* s_local, + float* ekb, + float* wfc_2d, + Parallel_2D& para2d_local) + { + std::cout << "Error: ELPA do not support single precision. " << std::endl; + exit(1); + } + +#endif + + +#ifdef __MPI + +template +void Diago_HS_para( + T* h, + T* s, + const int lda, + const int nband, + typename GetTypeReal::type *const ekb, + T *const wfc, + const MPI_Comm& comm, + const int diag_subspace_method, + const int block_size) +{ + int myrank; + MPI_Comm_rank(comm, &myrank); + Parallel_2D para2d_global; + Parallel_2D para2d_local; + para2d_global.init(lda,lda,lda,comm); + + int max_nb = block_size; + if (block_size == 0) + { + if (nband > 500) + { + max_nb = 32; + } + else + { + max_nb = 16; + } + } + else if (block_size < 0) + { + std::cout << "Error: block_size in diago_subspace should be a positive integer. " << std::endl; + exit(1); + } + + // for genelpa, if the block size is too large that some cores have no data, then it will cause error. + if (diag_subspace_method == 1) + { + if (max_nb * (std::max(para2d_global.dim0, para2d_global.dim1) - 1) >= lda) + { + max_nb = lda / std::max(para2d_global.dim0, para2d_global.dim1); + } + } + + para2d_local.init(lda,lda,max_nb,comm); + std::vector h_local(para2d_local.get_col_size() * para2d_local.get_row_size()); + std::vector s_local(para2d_local.get_col_size() * para2d_local.get_row_size()); + std::vector wfc_2d(para2d_local.get_col_size() * para2d_local.get_row_size()); + + // distribute h and s to 2D + Cpxgemr2d(lda,lda,h,1,1,para2d_global.desc,h_local.data(),1,1,para2d_local.desc,para2d_local.blacs_ctxt); + Cpxgemr2d(lda,lda,s,1,1,para2d_global.desc,s_local.data(),1,1,para2d_local.desc,para2d_local.blacs_ctxt); + + if (diag_subspace_method == 1) + { +#ifdef __ELPA + elpa_diag(comm, nband, h_local.data(), s_local.data(), ekb, wfc_2d.data(), para2d_local); +#else + std::cout << "ERROR: try to use ELPA to solve the generalized eigenvalue problem, but ELPA is not compiled. " << std::endl; + exit(1); +#endif + } + else if (diag_subspace_method == 2) + { + hsolver::pxxxgvx_diag(para2d_local.desc, para2d_local.get_row_size(), para2d_local.get_col_size(),nband, h_local.data(), s_local.data(), ekb, wfc_2d.data()); + } + else{ + std::cout << "Error: parallel diagonalization method is not supported. " << "diag_subspace_method = " << diag_subspace_method << std::endl; + exit(1); + } + + // gather wfc + Cpxgemr2d(lda,lda,wfc_2d.data(),1,1,para2d_local.desc,wfc,1,1,para2d_global.desc,para2d_local.blacs_ctxt); + + // free the context + Cblacs_gridexit(para2d_local.blacs_ctxt); + Cblacs_gridexit(para2d_global.blacs_ctxt); +} + +// template instantiation +template void Diago_HS_para(double* h, + double* s, + const int lda, + const int nband, + typename GetTypeReal::type *const ekb, + double *const wfc, + const MPI_Comm& comm, + const int diag_subspace_method, + const int block_size); +template void Diago_HS_para>(std::complex* h, + std::complex* s, + const int lda, + const int nband, + typename GetTypeReal>::type *const ekb, + std::complex *const wfc, + const MPI_Comm& comm, + const int diag_subspace_method, + const int block_size); +template void Diago_HS_para(float* h, + float* s, + const int lda, + const int nband, + typename GetTypeReal::type *const ekb, + float *const wfc, + const MPI_Comm& comm, + const int diag_subspace_method, + const int block_size); +template void Diago_HS_para>(std::complex* h, + std::complex* s, + const int lda, + const int nband, + typename GetTypeReal>::type *const ekb, + std::complex *const wfc, + const MPI_Comm& comm, + const int diag_subspace_method, + const int block_size); + + + +#endif + +} // namespace hsolver \ No newline at end of file diff --git a/source/module_hsolver/diag_hs_para.h b/source/module_hsolver/diag_hs_para.h new file mode 100644 index 0000000000..ce73199a00 --- /dev/null +++ b/source/module_hsolver/diag_hs_para.h @@ -0,0 +1,47 @@ +#include "module_basis/module_ao/parallel_2d.h" +#include "module_base/macros.h" + +#ifdef __MPI +#include +#endif + +namespace hsolver +{ + + +#ifdef __MPI + +/** + * @brief Parallel do the generalized eigenvalue problem + * + * @tparam T double or complex or float or complex + * @param H the hermitian matrix H. + * @param S the overlap matrix S. + * @param lda the leading dimension of H and S + * @param nband the number of bands to be calculated + * @param ekb to store the eigenvalues. + * @param wfc to store the eigenvectors + * @param comm the communicator + * @param diag_subspace_method the method to solve the generalized eigenvalue problem + * @param block_size the block size in 2d block cyclic distribution if use elpa or scalapack. + * + * @note 1. h and s should be full matrix in rank 0 of the communicator, and the other ranks is not concerned. + * @note 2. wfc is complete in rank 0, and not store in other ranks. + * @note 3. diag_subspace_method should be 1: by elpa, 2: by scalapack + * @note 4. block_size should be 0 or a positive integer. If it is 0, then will use a value as large as possible that is allowed + */ +template +void Diago_HS_para( + T* h, + T* s, + const int lda, + const int nband, + typename GetTypeReal::type *const ekb, + T *const wfc, + const MPI_Comm& comm, + const int diag_subspace_method, + const int block_size=0); +#endif + +} // namespace hsolver + \ No newline at end of file diff --git a/source/module_hsolver/diago_dav_subspace.cpp b/source/module_hsolver/diago_dav_subspace.cpp index d11d7093f1..3aa4b8c45b 100644 --- a/source/module_hsolver/diago_dav_subspace.cpp +++ b/source/module_hsolver/diago_dav_subspace.cpp @@ -8,7 +8,10 @@ #include "module_hsolver/kernels/math_kernel_op.h" #include "module_base/kernels/dsp/dsp_connector.h" +#include "module_hsolver/diag_hs_para.h" + #include +#include using namespace hsolver; @@ -20,9 +23,12 @@ Diago_DavSubspace::Diago_DavSubspace(const std::vector& precond const double& diag_thr_in, const int& diag_nmax_in, const bool& need_subspace_in, - const diag_comm_info& diag_comm_in) + const diag_comm_info& diag_comm_in, + const int diag_subspace_method_in, + const int diago_subspace_bs_in) : precondition(precondition_in), n_band(nband_in), dim(nbasis_in), nbase_x(nband_in * david_ndim_in), - diag_thr(diag_thr_in), iter_nmax(diag_nmax_in), is_subspace(need_subspace_in), diag_comm(diag_comm_in) + diag_thr(diag_thr_in), iter_nmax(diag_nmax_in), is_subspace(need_subspace_in), diag_comm(diag_comm_in), + diag_subspace_method(diag_subspace_method_in), diago_subspace_bs(diago_subspace_bs_in) { this->device = base_device::get_device_type(this->ctx); @@ -32,6 +38,7 @@ Diago_DavSubspace::Diago_DavSubspace(const std::vector& precond assert(david_ndim_in > 1); assert(david_ndim_in * nband_in < nbasis_in * this->diag_comm.nproc); + assert(diag_subspace_method >= 0 && diag_subspace_method < 3); // TODO: Added memory usage statistics @@ -526,14 +533,13 @@ void Diago_DavSubspace::diag_zhegvx(const int& nbase, T* vcc) { ModuleBase::timer::tick("Diago_DavSubspace", "diag_zhegvx"); + assert(nbase_x >= std::max(1, nbase)); - if (this->diag_comm.rank == 0) + if (this->device == base_device::GpuDevice) { - assert(nbase_x >= std::max(1, nbase)); - - if (this->device == base_device::GpuDevice) - { #if defined(__CUDA) || defined(__ROCM) + if (this->diag_comm.rank == 0) + { Real* eigenvalue_gpu = nullptr; resmem_real_op()(this->ctx, eigenvalue_gpu, this->nbase_x); @@ -562,46 +568,96 @@ void Diago_DavSubspace::diag_zhegvx(const int& nbase, syncmem_var_d2h_op()(this->cpu_ctx, this->ctx, (*eigenvalue_iter).data(), eigenvalue_gpu, this->nbase_x); delmem_real_op()(this->ctx, eigenvalue_gpu); -#endif } - else +#endif + } + else + { + if (this->diag_subspace_method == 0) { - std::vector> h_diag(nbase, std::vector(nbase, *this->zero)); - std::vector> s_diag(nbase, std::vector(nbase, *this->zero)); - - for (size_t i = 0; i < nbase; i++) + if (this->diag_comm.rank == 0) { - for (size_t j = 0; j < nbase; j++) + std::vector> h_diag(nbase, std::vector(nbase, *this->zero)); + std::vector> s_diag(nbase, std::vector(nbase, *this->zero)); + + for (size_t i = 0; i < nbase; i++) + { + for (size_t j = 0; j < nbase; j++) + { + h_diag[i][j] = hcc[i * this->nbase_x + j]; + s_diag[i][j] = scc[i * this->nbase_x + j]; + } + } + dngvx_op()(this->ctx, + nbase, + this->nbase_x, + this->hcc, + this->scc, + nband, + (*eigenvalue_iter).data(), + this->vcc); + // reset: + for (size_t i = 0; i < nbase; i++) { - h_diag[i][j] = hcc[i * this->nbase_x + j]; - s_diag[i][j] = scc[i * this->nbase_x + j]; + for (size_t j = 0; j < nbase; j++) + { + hcc[i * this->nbase_x + j] = h_diag[i][j]; + scc[i * this->nbase_x + j] = s_diag[i][j]; + } + + for (size_t j = nbase; j < this->nbase_x; j++) + { + hcc[i * this->nbase_x + j] = *this->zero; + hcc[j * this->nbase_x + i] = *this->zero; + scc[i * this->nbase_x + j] = *this->zero; + scc[j * this->nbase_x + i] = *this->zero; + } } } - dngvx_op()(this->ctx, - nbase, - this->nbase_x, - this->hcc, - this->scc, - nband, - (*eigenvalue_iter).data(), - this->vcc); - // reset: - for (size_t i = 0; i < nbase; i++) + } + else + { +#ifdef __MPI + std::vector h_diag; + std::vector s_diag; + std::vector vcc_tmp; + if (this->diag_comm.rank == 0) { - for (size_t j = 0; j < nbase; j++) + h_diag.resize(nbase * nbase, *this->zero); + s_diag.resize(nbase * nbase, *this->zero); + vcc_tmp.resize(nbase * nbase, *this->zero); + for (size_t i = 0; i < nbase; i++) { - hcc[i * this->nbase_x + j] = h_diag[i][j]; - scc[i * this->nbase_x + j] = s_diag[i][j]; + for (size_t j = 0; j < nbase; j++) + { + h_diag[i * nbase + j] = hcc[i * this->nbase_x + j]; + s_diag[i * nbase + j] = scc[i * this->nbase_x + j]; + } } - - for (size_t j = nbase; j < this->nbase_x; j++) + } + Diago_HS_para(h_diag.data(), + s_diag.data(), + nbase, + nband, + (*eigenvalue_iter).data(), + vcc_tmp.data(), + this->diag_comm.comm, + this->diag_subspace_method, + this->diago_subspace_bs); + if (this->diag_comm.rank == 0) + { + for (size_t i = 0; i < nband; i++) { - hcc[i * this->nbase_x + j] = *this->zero; - hcc[j * this->nbase_x + i] = *this->zero; - scc[i * this->nbase_x + j] = *this->zero; - scc[j * this->nbase_x + i] = *this->zero; + for (size_t j = 0; j < nbase; j++) + { + vcc[i * this->nbase_x + j] = vcc_tmp[i * nbase + j]; + } } } +#else + std::cout << "Error: parallel diagonalization is not supported in serial mode." << std::endl; + exit(1); +#endif } } diff --git a/source/module_hsolver/diago_dav_subspace.h b/source/module_hsolver/diago_dav_subspace.h index d93c252602..baf984d999 100644 --- a/source/module_hsolver/diago_dav_subspace.h +++ b/source/module_hsolver/diago_dav_subspace.h @@ -31,7 +31,9 @@ class Diago_DavSubspace const double& diag_thr_in, const int& diag_nmax_in, const bool& need_subspace_in, - const diag_comm_info& diag_comm_in); + const diag_comm_info& diag_comm_in, + const int diago_dav_method_in, + const int block_size_in); ~Diago_DavSubspace(); @@ -139,6 +141,9 @@ class Diago_DavSubspace bool test_exit_cond(const int& ntry, const int& notconv, const bool& scf); + int diag_subspace_method; // 0: LAPACK, 1: Gen-ELPA, 2: ScaLAPACK + int diago_subspace_bs = 0; // the block size in 2d block cyclic distribution if use elpa or scalapack + #ifdef __DSP using resmem_complex_op = base_device::memory::resize_memory_op_mt; using delmem_complex_op = base_device::memory::delete_memory_op_mt; diff --git a/source/module_hsolver/diago_pxxxgvx.cpp b/source/module_hsolver/diago_pxxxgvx.cpp new file mode 100644 index 0000000000..ac639d7996 --- /dev/null +++ b/source/module_hsolver/diago_pxxxgvx.cpp @@ -0,0 +1,344 @@ +#include +#include +#include "string.h" +#include + +#include "diago_pxxxgvx.h" +#include "module_base/scalapack_connector.h" +#include "module_base/blacs_connector.h" + +namespace hsolver +{ + +#ifdef __MPI + +void pxxxgvx_(const int* itype, const char* jobz, const char* range, const char* uplo, + const int* n, double* A, const int* ia, const int* ja, const int*desca, double* B, const int* ib, const int* jb, const int*descb, + const double* vl, const double* vu, const int* il, const int* iu, + const double* abstol, int* m, int* nz, double* w, const double*orfac, double* Z, const int* iz, const int* jz, const int*descz, + double* work, int* lwork, double* rwork, int* lrwork, int*iwork, int*liwork, int* ifail, int*iclustr, double*gap, int* info) +{ + // for a uniform interface, rwork and lrwork are input arguments, but not used in pdsygvx_/pssygvx_ + pdsygvx_(itype, jobz, range, uplo, + n, A, ia, ja, desca, B, ib, jb, descb, + vl, vu, il, iu, + abstol, m, nz, w, orfac, Z, iz, jz, descz, + work, lwork, iwork, liwork, ifail, iclustr, gap, info); +} + +void pxxxgvx_(const int* itype, const char* jobz, const char* range, const char* uplo, + const int* n, std::complex* A, const int* ia, const int* ja, const int*desca, std::complex* B, const int* ib, const int* jb, const int*descb, + const double* vl, const double* vu, const int* il, const int* iu, + const double* abstol, int* m, int* nz, double* w, const double*orfac, std::complex* Z, const int* iz, const int* jz, const int*descz, + std::complex* work, int* lwork, double* rwork, int* lrwork, int*iwork, int*liwork, int* ifail, int*iclustr, double*gap, int* info) +{ + pzhegvx_(itype, jobz, range, uplo, + n, A, ia, ja, desca, B, ib, jb, descb, + vl, vu, il, iu, + abstol, m, nz, w, orfac, Z, iz, jz, descz, + work, lwork,rwork, lrwork, iwork, liwork, ifail, iclustr, gap, info); +} + +void pxxxgvx_(const int* itype, const char* jobz, const char* range, const char* uplo, + const int* n, float* A, const int* ia, const int* ja, const int*desca, float* B, const int* ib, const int* jb, const int*descb, + const float* vl, const float* vu, const int* il, const int* iu, + const float* abstol, int* m, int* nz, float* w, const float*orfac, float* Z, const int* iz, const int* jz, const int*descz, + float* work, int* lwork, float* rwork, int* lrwork, int*iwork, int*liwork, int* ifail, int*iclustr, float*gap, int* info) +{ + pssygvx_(itype, jobz, range, uplo, + n, A, ia, ja, desca, B, ib, jb, descb, + vl, vu, il, iu, + abstol, m, nz, w, orfac, Z, iz, jz, descz, + work, lwork, iwork, liwork, ifail, iclustr, gap, info); +} + +void pxxxgvx_(const int* itype, const char* jobz, const char* range, const char* uplo, + const int* n, std::complex* A, const int* ia, const int* ja, const int*desca, std::complex* B, const int* ib, const int* jb, const int*descb, + const float* vl, const float* vu, const int* il, const int* iu, + const float* abstol, int* m, int* nz, float* w, const float*orfac, std::complex* Z, const int* iz, const int* jz, const int*descz, + std::complex* work, int* lwork, float* rwork, int* lrwork, int*iwork, int*liwork, int* ifail, int*iclustr, float*gap, int* info) +{ + pchegvx_(itype, jobz, range, uplo, + n, A, ia, ja, desca, B, ib, jb, descb, + vl, vu, il, iu, + abstol, m, nz, w, orfac, Z, iz, jz, descz, + work, lwork,rwork, lrwork, iwork, liwork, ifail, iclustr, gap, info); +} + +// post processing for pdsygvx/pzhegvx/pdsygvx/pzhegvx +void pxxxgvx_post_processing(const int info, + const std::vector& ifail, + const std::vector& iclustr, + const int M, + const int NZ, + const int nbands, + int& degeneracy_max) +{ + const std::string str_info = "Scalapack diagonalization: \n info = " + std::to_string(info) + ".\n"; + + if (info == 0) + { + return; + } + else if (info < 0) + { + const int info_negative = -info; + const std::string str_index + = (info_negative > 100) + ? std::to_string(info_negative / 100) + "-th argument " + + std::to_string(info_negative % 100) + "-entry is illegal.\n" + : std::to_string(info_negative) + "-th argument is illegal.\n"; + throw std::runtime_error(str_info + str_index); + } + else if (info % 2) + { + std::string str_ifail = "ifail = "; + for (const int i: ifail) + { + str_ifail += std::to_string(i) + " "; + } + throw std::runtime_error(str_info + str_ifail); + } + else if (info / 2 % 2) + { + int degeneracy_need = 0; + for (int irank = 0; irank < iclustr.size()/2; ++irank) + { + degeneracy_need = std::max(degeneracy_need, iclustr[2 * irank + 1] - iclustr[2 * irank]); + } + const std::string str_need = "degeneracy_need = " + std::to_string(degeneracy_need) + ".\n"; + const std::string str_saved + = "degeneracy_saved = " + std::to_string(degeneracy_max) + ".\n"; + if (degeneracy_need <= degeneracy_max) + { + throw std::runtime_error(str_info + str_need + str_saved); + } + else + { + std::cout << str_need << str_saved; + degeneracy_max = degeneracy_need; + return; + } + } + else if (info / 4 % 2) + { + const std::string str_M = "M = " + std::to_string(M) + ".\n"; + const std::string str_NZ = "NZ = " + std::to_string(NZ) + ".\n"; + const std::string str_NBANDS + = "Number of eigenvalues solved = " + std::to_string(nbands) + ".\n"; + throw std::runtime_error(str_info + str_M + str_NZ + str_NBANDS); + } + else if (info / 16 % 2) + { + const std::string str_npos = "Not positive definite = " + std::to_string(ifail[0]) + ".\n"; + throw std::runtime_error(str_info + str_npos); + } + else + { + throw std::runtime_error(str_info); + } +} + +void get_lwork(int& lwork, std::vector& work) +{ + lwork = work[0]; +} + +void get_lwork(int& lwork, std::vector& work) +{ + lwork = work[0]; +} + +void get_lwork(int& lwork, std::vector>& work) +{ + lwork = work[0].real(); +} + +void get_lwork(int& lwork, std::vector>& work) +{ + lwork = work[0].real(); +} + +template +void pxxxgvx_diag(const int *const desc, + const int ncol, + const int nrow, + const int nbands, + const T *const h_mat, + const T *const s_mat, + typename GetTypeReal::type *const ekb, + T *const wfc_2d) +{ + int nprow, npcol, myprow, mypcol; + Cblacs_gridinfo(desc[1], &nprow, &npcol, &myprow, &mypcol); + int dsize = nprow * npcol; + + int degeneracy_max = 12; // only used for complex and complex + while (true) + { + std::vector h_tmp(ncol * nrow); + std::vector s_tmp(ncol * nrow); + memcpy(h_tmp.data(), h_mat, sizeof(T) * ncol * nrow); + memcpy(s_tmp.data(), s_mat, sizeof(T) * ncol * nrow); + + int ndim_global = desc[2]; + const char jobz = 'V', range = 'I', uplo = 'U'; + const int itype = 1, il = 1, iu = nbands, one = 1; + int M = 0, NZ = 0, lwork = -1, lrwork = -1, liwork = -1, info = 0; + const typename GetTypeReal::type abstol = 0, orfac = -1; + const typename GetTypeReal::type vl = 0, vu = 0; + std::vector work(1, 0); + std::vector::type> rwork(3, 0); // only used for complex and complex + std::vector iwork(1, 0); + std::vector ifail(ndim_global, 0); + std::vector iclustr(2 * dsize); + std::vector::type> gap(dsize); + + pxxxgvx_(&itype, + &jobz, + &range, + &uplo, + &ndim_global, + h_tmp.data(), + &one, + &one, + desc, + s_tmp.data(), + &one, + &one, + desc, + &vl, + &vu, + &il, + &iu, + &abstol, + &M, + &NZ, + ekb, + &orfac, + wfc_2d, + &one, + &one, + desc, + work.data(), + &lwork, // is not used for real data type + rwork.data(), // is not used for real data type + &lrwork, + iwork.data(), + &liwork, + ifail.data(), + iclustr.data(), + gap.data(), + &info); + + if (info) + { + throw std::runtime_error("Scalapack diagonalization: \n info = " + std::to_string(info) + ".\n"); + } + + if (std::is_same>::value || std::is_same>::value) + { + get_lwork(lwork, work); + work.resize(lwork, 0); + liwork = iwork[0]; + iwork.resize(liwork, 0); + lrwork = rwork[0] + degeneracy_max * ndim_global; + int maxlrwork = std::max(lrwork, 3); + rwork.resize(maxlrwork, 0); + } + else + { + get_lwork(lwork, work); + work.resize(std::max(lwork, 3), 0); + liwork = iwork[0]; + iwork.resize(liwork, 0); + } + + pxxxgvx_(&itype, + &jobz, + &range, + &uplo, + &ndim_global, + h_tmp.data(), + &one, + &one, + desc, + s_tmp.data(), + &one, + &one, + desc, + &vl, + &vu, + &il, + &iu, + &abstol, + &M, + &NZ, + ekb, + &orfac, + wfc_2d, + &one, + &one, + desc, + work.data(), + &lwork, + rwork.data(), // is not used for real data type + &lrwork, // is not used for real data type + iwork.data(), + &liwork, + ifail.data(), + iclustr.data(), + gap.data(), + &info); + + if (info == 0) + { + return; + } + pxxxgvx_post_processing(info, ifail,iclustr, M,NZ,nbands,degeneracy_max); + + // break the loop for real data type + if (std::is_same::value || std::is_same::value) + { + return; + } + } +} + +// template instantiation +template void pxxxgvx_diag(const int *const desc, + const int ncol, + const int nrow, + const int nbands, + const double *const h_mat, + const double *const s_mat, + double *const ekb, + double *const wfc_2d); +template void pxxxgvx_diag(const int *const desc, + const int ncol, + const int nrow, + const int nbands, + const std::complex *const h_mat, + const std::complex *const s_mat, + double *const ekb, + std::complex *const wfc_2d); +template void pxxxgvx_diag(const int *const desc, + const int ncol, + const int nrow, + const int nbands, + const float *const h_mat, + const float *const s_mat, + float *const ekb, + float *const wfc_2d); +template void pxxxgvx_diag(const int *const desc, + const int ncol, + const int nrow, + const int nbands, + const std::complex *const h_mat, + const std::complex *const s_mat, + float *const ekb, + std::complex *const wfc_2d); + +#endif + +} // namespace \ No newline at end of file diff --git a/source/module_hsolver/diago_pxxxgvx.h b/source/module_hsolver/diago_pxxxgvx.h new file mode 100644 index 0000000000..6684a5ea6f --- /dev/null +++ b/source/module_hsolver/diago_pxxxgvx.h @@ -0,0 +1,37 @@ +#ifndef HSOLVER_DIAGO_PXXXGVX_H +#define HSOLVER_DIAGO_PXXXGVX_H +#include +#include "module_base/macros.h" + +namespace hsolver +{ + +#ifdef __MPI +/** + * @brief generalized eigenvalue problem using pdsygvx/pzhegvx/pdsygvx/pzhegvx + * + * @param desc the descriptor of scalapack descriptor + * @param ncol the number of columns of the H/S matrix in current processor + * @param nrow the number of rows of the H/S matrix in current processor + * @param nbands the number of bands to be solved + * @param h_mat the Hamiltonian matrix + * @param s_mat the overlap matrix + * @param ekb the eigenvalues + * @param wfc_2d the eigenvectors in 2D block cyclic distribution + * + */ + +template +void pxxxgvx_diag(const int* const desc, + const int ncol, + const int nrow, + const int nbands, + const T* const h_mat, + const T* const s_mat, + typename GetTypeReal::type* const ekb, + T* const wfc_2d); +#endif + +} // namespace hsolver + +#endif // HSOLVER_DIAGO_PXXXGVX_H \ No newline at end of file diff --git a/source/module_hsolver/hsolver_pw.cpp b/source/module_hsolver/hsolver_pw.cpp index 05a4a893eb..83bac220f8 100644 --- a/source/module_hsolver/hsolver_pw.cpp +++ b/source/module_hsolver/hsolver_pw.cpp @@ -500,7 +500,9 @@ void HSolverPW::hamiltSolvePsiK(hamilt::Hamilt* hm, this->diag_thr, this->diag_iter_max, this->need_subspace, - comm_info); + comm_info, + PARAM.inp.diag_subspace_method, + PARAM.inp.nb2d); DiagoIterAssist::avg_iter += static_cast( dav_subspace.diag(hpsi_func, psi.get_pointer(), psi.get_nbasis(), eigenvalue, this->ethr_band.data(), scf)); diff --git a/source/module_hsolver/test/CMakeLists.txt b/source/module_hsolver/test/CMakeLists.txt index 53698c37cb..7ed27cb6c2 100644 --- a/source/module_hsolver/test/CMakeLists.txt +++ b/source/module_hsolver/test/CMakeLists.txt @@ -153,6 +153,12 @@ AddTest( SOURCES parallel_k2d_test.cpp ../parallel_k2d.cpp ../../module_cell/parallel_kpoints.cpp ../../module_basis/module_ao/parallel_2d.cpp ) +AddTest( + TARGET hsolver_diago_hs_parallel + LIBS parameter ${math_libs} ELPA::ELPA base device MPI::MPI_CXX genelpa psi + SOURCES test_diago_hs_para.cpp ../diag_hs_para.cpp ../diago_pxxxgvx.cpp ../../module_basis/module_ao/parallel_2d.cpp ../diago_elpa.cpp ../diago_scalapack.cpp +) + find_program(BASH bash) if (ENABLE_MPI) add_test(NAME HSolver_cg_parallel @@ -172,11 +178,11 @@ if (ENABLE_MPI) COMMAND ${BASH} diago_lcao_parallel_test.sh WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) - if(ENABLE_PEXSI) - add_test(NAME HSolver_LCAO_PEXSI_parallel - COMMAND ${BASH} diago_pexsi_parallel_test.sh - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - ) -endif() -endif() + if(ENABLE_PEXSI) + add_test(NAME HSolver_LCAO_PEXSI_parallel + COMMAND ${BASH} diago_pexsi_parallel_test.sh + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + ) + endif() + endif() endif() \ No newline at end of file diff --git a/source/module_hsolver/test/test_diago_hs_para.cpp b/source/module_hsolver/test/test_diago_hs_para.cpp new file mode 100644 index 0000000000..29ed7e30a1 --- /dev/null +++ b/source/module_hsolver/test/test_diago_hs_para.cpp @@ -0,0 +1,359 @@ +#ifndef TEST_DIAGO_PXXXGVX_H +#define TEST_DIAGO_PXXXGVX_H + +#include +#include +#include +#include +#include +#include +#include + +#include "../diag_hs_para.h" +#include "module_hsolver/kernels/dngvd_op.h" + +template +typename std::enable_if::value || std::is_same::value>::type +generate_random_hs_impl(int d, std::mt19937& gen, std::uniform_real_distribution::type>& dis, std::vector& h_mat, std::vector& s_mat) { + // For S matrix, firstly we generate a random symmetric matrix s_tmp, then we set S = s_tmp * s_tmp^T + n * I + std::vector s_tmp(d*d,0); + for (int i = 0; i < d; ++i) { + for (int j = i; j < d; ++j) { + typename GetTypeReal::type value1 = static_cast::type>(dis(gen)); + h_mat[i * d + j] = value1; + h_mat[j * d + i] = value1; + + // construct a random overlap matrix + typename GetTypeReal::type value2 = static_cast::type>(dis(gen)); + s_tmp[i * d + j] = value2; + } + } + + // set S = s_tmp * s_tmp^T + n * I + for (int i = 0; i < d; ++i) { + for (int j = 0; j < d; ++j) { + s_mat[i * d + j] = 0; + for (int k = 0; k < d; ++k) { + s_mat[i * d + j] += s_tmp[i * d + k] * s_tmp[j * d + k]; + } + if (i == j) { + s_mat[i * d + j] += 2.0; + } + } + } +} + +template +typename std::enable_if>::value || std::is_same>::value>::type +generate_random_hs_impl(int d, std::mt19937& gen, std::uniform_real_distribution::type>& dis, std::vector& h_mat, std::vector& s_mat) { + std::vector s_tmp(d*d,0); + for (int i = 0; i < d; ++i) { + for (int j = i; j < d; ++j) { + typename GetTypeReal::type value1 = static_cast::type>(dis(gen)); + typename GetTypeReal::type value2 = static_cast::type>(dis(gen)); + h_mat[i * d + j] = T(value1, value2); + if (i != j) + { + h_mat[j * d + i] = T(value1, -value2); + } + else{ + h_mat[j * d + i] = T(value1, 0); + } + + // construct a random overlap matrix + value1 = static_cast::type>(dis(gen)); + value2 = static_cast::type>(dis(gen)); + s_tmp[i * d + j] = T(value1, value2); + } + } + + // set S = s_tmp * s_tmp^T + n * I + for (int i = 0; i < d; ++i) { + for (int j = 0; j < d; ++j) { + s_mat[i * d + j] = T(0, 0); + for (int k = 0; k < d; ++k) { + s_mat[i * d + j] += s_tmp[i * d + k] * std::conj(s_tmp[j * d + k]); + } + if (i == j) { + s_mat[i * d + j] += T(2.0, 0); + } + } + } +} + +template +void generate_random_hs(int d, int random_seed ,std::vector& h_mat, std::vector& s_mat) { + std::mt19937 gen(random_seed); + std::uniform_real_distribution::type> dis(-1.0,1.0); + + h_mat.resize(d * d); + s_mat.resize(d * d); + generate_random_hs_impl(d, gen, dis, h_mat, s_mat); +} + + +template +typename std::enable_if::value || std::is_same::value>::type +verify_results(const std::vector& h_psi, const std::vector& s_psi, const std::vector::type>& ekb, int lda, int nbands, double threshold) { + for (int i = 0; i < lda; ++i) { + for (int j = 0; j < nbands; ++j) { + ASSERT_NEAR(h_psi[j * lda + i], ekb[j] * s_psi[j * lda + i], threshold); + } + } +} + +template +typename std::enable_if>::value || std::is_same>::value>::type +verify_results(const std::vector& h_psi, const std::vector& s_psi, const std::vector::type>& ekb, int lda, int nbands, double threshold) { + for (int i = 0; i < lda; ++i) { + for (int j = 0; j < nbands; ++j) { + ASSERT_NEAR(h_psi[j * lda + i].real(), ekb[j] * s_psi[j * lda + i].real(), threshold); + ASSERT_NEAR(h_psi[j * lda + i].imag(), ekb[j] * s_psi[j * lda + i].imag(), threshold); + } + } +} + +template +void test_diago_hs(int lda, int nb, int random_seed, int nbands, int diag_type, MPI_Comm comm) { + // diag_type should be 1 (for elpa) or 2 (for scalapack) + int my_rank; + MPI_Comm_rank(comm, &my_rank); + + std::vector h_mat, s_mat, wfc, h_psi, s_psi; + std::vector::type> ekb(lda); + if (my_rank==0) + { + h_mat.resize(lda * lda); + s_mat.resize(lda * lda); + wfc.resize(lda * lda); + //generate_random_hs(lda, random_seed, h_mat, s_mat); + //read h from h.dat and s from s.dat, + std::ifstream hfile("h.dat"); + hfile.read((char*)h_mat.data(), lda * lda * sizeof(T)); + hfile.close(); + std::ifstream sfile("s.dat"); + sfile.read((char*)s_mat.data(), lda * lda * sizeof(T)); + sfile.close(); + } + hsolver::Diago_HS_para(h_mat.data(), s_mat.data(), lda, nbands,ekb.data(), wfc.data(), comm, diag_type, nb); + + // Verify results + if (my_rank == 0){ + double threshold = 1e-6; + if (std::is_same>::value || std::is_same::value) { + threshold = 1e-12; + } + + h_psi.resize(lda * nbands, 0); + s_psi.resize(lda * nbands, 0); + + for (int i = 0; i < lda; ++i) { + for (int j = 0; j < nbands; ++j) { + for (int k = 0; k < lda; ++k) { + h_psi[j * lda + i] += h_mat[k * lda + i] * wfc[j * lda + k]; + s_psi[j * lda + i] += s_mat[k * lda + i] * wfc[j * lda + k]; + } + } + } + verify_results(h_psi, s_psi, ekb, lda, nbands, threshold); + } +} + +template +void test_performance(int lda, int nb, int nbands, MPI_Comm comm,int case_numb, int loop_numb) { + // generate 10 random H/S, and do the diagonalization 100 times by using elpa/scalapack and lapack. + int my_rank, nproc; + MPI_Comm_rank(comm, &my_rank); + MPI_Comm_size(comm, &nproc); + + std::vector h_mat, s_mat, wfc, h_psi, s_psi; + std::vector::type> ekb_elpa(lda); + std::vector::type> ekb_scalap(lda); + std::vector::type> ekb_lapack(lda); + + if (my_rank==0) + { + std::cout << "\nMatrix size: " << lda << " x " << lda << std::endl; + std::cout << "Number of bands: " << nbands << std::endl; + std::cout << "Number of processors: " << nproc << std::endl; + std::cout << "Block size of 2D distribution: " << nb << std::endl; + h_mat.resize(lda * lda); + s_mat.resize(lda * lda); + wfc.resize(lda * lda); + } + + // store all the times in a vector + std::vector time_elpa(case_numb, 0); + std::vector time_scalap(case_numb, 0); + std::vector time_lapack(case_numb, 0); + + if (my_rank == 0) std::cout << "Random matrix "; + for (int randomi = 0; randomi < case_numb; ++randomi) + { + + if (my_rank == 0) { + std::cout << randomi << " "; + generate_random_hs(lda, randomi, h_mat, s_mat); + } + + // ELPA + MPI_Barrier(comm); + auto start = std::chrono::high_resolution_clock::now(); + for (int j=0;j(h_mat.data(), s_mat.data(), lda, nbands,ekb_elpa.data(), wfc.data(), comm, 1, nb); + MPI_Barrier(comm); + } + MPI_Barrier(comm); + auto end = std::chrono::high_resolution_clock::now(); + time_elpa[randomi] = std::chrono::duration_cast(end - start).count(); + + + // scalapack + start = std::chrono::high_resolution_clock::now(); + for (int j=0;j(h_mat.data(), s_mat.data(), lda, nbands,ekb_scalap.data(), wfc.data(), comm, 2, nb); + MPI_Barrier(comm); + } + MPI_Barrier(comm); + end = std::chrono::high_resolution_clock::now(); + time_scalap[randomi] = std::chrono::duration_cast(end - start).count(); + + //LApack + if (my_rank == 0) + { + std::vector h_tmp, s_tmp; + start = std::chrono::high_resolution_clock::now(); + base_device::DEVICE_CPU* ctx = {}; + + for (int j=0;j()(ctx, + lda, + lda, + h_tmp.data(), + s_tmp.data(), + nbands, + ekb_lapack.data(), + wfc.data()); + } + end = std::chrono::high_resolution_clock::now(); + time_lapack[randomi] = std::chrono::duration_cast(end - start).count(); + + //COMPARE EKB + for (int i = 0; i < nbands; ++i) { + typename GetTypeReal::type diff_elpa_lapack = std::abs(ekb_elpa[i] - ekb_lapack[i]); + typename GetTypeReal::type diff_scalap_lapack = std::abs(ekb_scalap[i] - ekb_lapack[i]); + if (diff_elpa_lapack > 1e-6 || diff_scalap_lapack > 1e-6) + { + std::cout << "eigenvalue " << i << " by ELPA: " << ekb_elpa[i] << std::endl; + std::cout << "eigenvalue " << i << " by Scalapack: " << ekb_scalap[i] << std::endl; + std::cout << "eigenvalue " << i << " by Lapack: " << ekb_lapack[i] << std::endl; + } + } + } + MPI_Barrier(comm); + + } + + if (my_rank == 0) + { + std::cout << "\nELPA Time : "; + for (int i=0; i < case_numb;i++) + {std::cout << time_elpa[i] << " ";} + std::cout << std::endl; + + std::cout << "scalapack Time: "; + for (int i=0; i < case_numb;i++) + {std::cout << time_scalap[i] << " ";} + std::cout << std::endl; + + std::cout << "lapack Time : "; + for (int i=0; i < case_numb;i++) + {std::cout << time_lapack[i] << " ";} + std::cout << std::endl; + + // print out the average time and speedup + double avg_time_elpa = 0; + double avg_time_scalap = 0; + double avg_time_lapack = 0; + for (int i=0; i < case_numb;i++) + { + avg_time_elpa += time_elpa[i]; + avg_time_scalap += time_scalap[i]; + avg_time_lapack += time_lapack[i]; + } + + avg_time_elpa /= case_numb; + avg_time_scalap /= case_numb; + avg_time_lapack /= case_numb; + std::cout << "Average Lapack Time : " << avg_time_lapack << " ms" << std::endl; + std::cout << "Average ELPA Time : " << avg_time_elpa << " ms, Speedup: " << avg_time_lapack / avg_time_elpa << std::endl; + std::cout << "Average Scalapack Time: " << avg_time_scalap << " ms, Speedup: " << avg_time_lapack / avg_time_scalap << std::endl; + } +} + +//test_diago_hs(int lda, int nb, int random_seed, int nbands, int diag_type, MPI_Comm comm) +TEST(DiagoPxxxgvxElpaTest, Double) { + test_diago_hs(16, 4, 0, 10, 1, MPI_COMM_WORLD); + test_diago_hs(20, 6, 0, 18, 1, MPI_COMM_WORLD); +} + +TEST(DiagoPxxxgvxElpaTest, ComplexDouble) { + test_diago_hs>(16, 6, 0, 10, 1, MPI_COMM_WORLD); + test_diago_hs>(20, 8, 0, 18, 1, MPI_COMM_WORLD); +} + +TEST(DiagoPxxxgvxScalapackTest, Double) { + test_diago_hs(16, 4, 0, 10, 2,MPI_COMM_WORLD); + test_diago_hs(20, 6, 0, 18, 2,MPI_COMM_WORLD); +} + +TEST(DiagoPxxxgvxScalapackTest, ComplexDouble) { + test_diago_hs>(16, 4, 0, 10, 2, MPI_COMM_WORLD); +} +TEST(DiagoPxxxgvxScalapackTest, Float) { + test_diago_hs(16, 4, 0, 10,2,MPI_COMM_WORLD); +} + +TEST(DiagoPxxxgvxScalapackTest, ComplexFloat) { + test_diago_hs>(16, 4, 0, 10,2,MPI_COMM_WORLD); +} + +//TEST(DiagoPxxxgvxPerformanceTest, Double) { +// int ndim = 200; +// int nband = 180; +// int case_numb = 10; +// int loop_numb = 10; +// test_performance>(ndim, 32, nband, MPI_COMM_WORLD, case_numb, loop_numb); +//} + +int main(int argc, char** argv) { + MPI_Init(&argc, &argv); + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + testing::InitGoogleTest(&argc, argv); + ::testing::TestEventListeners& listeners = ::testing::UnitTest::GetInstance()->listeners(); + + if (myrank != 0) { + delete listeners.Release(listeners.default_result_printer()); + } + + int result = RUN_ALL_TESTS(); + if (myrank == 0 && result != 0) + { + std::cout << "ERROR:some tests are not passed" << std::endl; + MPI_Finalize(); + return result; + } + + MPI_Finalize(); + + return 0; +} + +#endif // TEST_DIAGO_PXXXGVX_H \ No newline at end of file diff --git a/source/module_io/read_input_item_system.cpp b/source/module_io/read_input_item_system.cpp index 7398dc2e6a..3fd2e48bd5 100644 --- a/source/module_io/read_input_item_system.cpp +++ b/source/module_io/read_input_item_system.cpp @@ -477,6 +477,12 @@ void ReadInput::item_system() read_sync_int(input.fft_mode); this->add_item(item); } + { + Input_Item item("diag_subspace_method"); + item.annotation = "method of subspace diagonalization in dav_subspace. 0:LaPack; 1:genelpa, 2:scalapack"; + read_sync_int(input.diag_subspace_method); + this->add_item(item); + } { Input_Item item("init_wfc"); item.annotation = "start wave functions are from 'atomic', " diff --git a/source/module_lr/hsolver_lrtd.hpp b/source/module_lr/hsolver_lrtd.hpp index 6c8be619eb..3694631cfc 100644 --- a/source/module_lr/hsolver_lrtd.hpp +++ b/source/module_lr/hsolver_lrtd.hpp @@ -95,7 +95,9 @@ namespace LR diag_ethr, maxiter, false, //always do the subspace diag (check the implementation) - comm_info); + comm_info, + PARAM.inp.diag_subspace_method, + PARAM.inp.nb2d); std::vector ethr_band(nband, diag_ethr); hsolver::DiagoIterAssist::avg_iter += static_cast(dav_subspace.diag( diff --git a/source/module_parameter/input_parameter.h b/source/module_parameter/input_parameter.h index db71d32b63..dfdf33b704 100644 --- a/source/module_parameter/input_parameter.h +++ b/source/module_parameter/input_parameter.h @@ -88,6 +88,7 @@ struct Input_para double pw_diag_thr = 0.01; ///< used in cg method int pw_diag_ndim = 4; ///< dimension of workspace for Davidson diagonalization int diago_cg_prec = 1; ///< mohan add 2012-03-31 + int diag_subspace_method = 0; // 0: Lapack, 1: elpa, 2: scalapack std::string smearing_method = "gauss"; ///< "gauss", ///< "mp","methfessel-paxton" diff --git a/tests/integrate/102_PW_DS_davsubspace_sca/INPUT b/tests/integrate/102_PW_DS_davsubspace_sca/INPUT new file mode 100644 index 0000000000..0cbffad0f8 --- /dev/null +++ b/tests/integrate/102_PW_DS_davsubspace_sca/INPUT @@ -0,0 +1,29 @@ +INPUT_PARAMETERS +#Parameters (1.General) +suffix autotest +calculation scf + +nbands 6 +symmetry 1 +pseudo_dir ../../PP_ORB + +#Parameters (2.Iteration) +ecutwfc 20 +scf_thr 1e-8 +scf_nmax 100 + + +#Parameters (3.Basis) +basis_type pw + +#Parameters (4.Smearing) +smearing_method gauss +smearing_sigma 0.002 + +#Parameters (5.Mixing) +mixing_type plain +mixing_beta 0.5 + +ks_solver dav_subspace +diag_subspace_method 2 +nb2d 4 diff --git a/tests/integrate/102_PW_DS_davsubspace_sca/KPT b/tests/integrate/102_PW_DS_davsubspace_sca/KPT new file mode 100644 index 0000000000..c289c0158a --- /dev/null +++ b/tests/integrate/102_PW_DS_davsubspace_sca/KPT @@ -0,0 +1,4 @@ +K_POINTS +0 +Gamma +1 1 1 0 0 0 diff --git a/tests/integrate/102_PW_DS_davsubspace_sca/STRU b/tests/integrate/102_PW_DS_davsubspace_sca/STRU new file mode 100644 index 0000000000..cdb8157cce --- /dev/null +++ b/tests/integrate/102_PW_DS_davsubspace_sca/STRU @@ -0,0 +1,19 @@ +ATOMIC_SPECIES +Si 14 Si.pz-vbc.UPF + +LATTICE_CONSTANT +10.2 // add lattice constant + +LATTICE_VECTORS +0.5 0.5 0.0 +0.5 0.0 0.5 +0.0 0.5 0.5 + +ATOMIC_POSITIONS +Direct + +Si // Element type +0.0 // magnetism +2 +0.00 0.00 0.00 1 1 1 +0.25 0.25 0.25 1 1 1 diff --git a/tests/integrate/102_PW_DS_davsubspace_sca/jd b/tests/integrate/102_PW_DS_davsubspace_sca/jd new file mode 100644 index 0000000000..70376ea55e --- /dev/null +++ b/tests/integrate/102_PW_DS_davsubspace_sca/jd @@ -0,0 +1 @@ +test subspace diagonalization with scalapack \ No newline at end of file diff --git a/tests/integrate/102_PW_DS_davsubspace_sca/result.ref b/tests/integrate/102_PW_DS_davsubspace_sca/result.ref new file mode 100644 index 0000000000..46352c76f5 --- /dev/null +++ b/tests/integrate/102_PW_DS_davsubspace_sca/result.ref @@ -0,0 +1,6 @@ +etotref -198.2238296207179 +etotperatomref -99.1119148104 +pointgroupref T_d +spacegroupref O_h +nksibzref 1 +totaltimeref diff --git a/tests/integrate/CASES_CPU.txt b/tests/integrate/CASES_CPU.txt index 822bc4b126..07de643103 100644 --- a/tests/integrate/CASES_CPU.txt +++ b/tests/integrate/CASES_CPU.txt @@ -25,6 +25,7 @@ 102_PW_BPCG 102_PW_CG 102_PW_DS_davsubspace +102_PW_DS_davsubspace_sca 102_PW_PINT_RKS 102_PW_PINT_UKS 103_PW_15_CF_CS_S1_smallg From 2d528b1319f510b2cacadc93649dfd59289291ef Mon Sep 17 00:00:00 2001 From: "pre-commit-ci-lite[bot]" <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com> Date: Thu, 21 Nov 2024 03:23:43 +0000 Subject: [PATCH 02/12] [pre-commit.ci lite] apply automatic fixes --- source/module_hsolver/diago_pxxxgvx.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/module_hsolver/diago_pxxxgvx.cpp b/source/module_hsolver/diago_pxxxgvx.cpp index ac639d7996..24168894dd 100644 --- a/source/module_hsolver/diago_pxxxgvx.cpp +++ b/source/module_hsolver/diago_pxxxgvx.cpp @@ -1,6 +1,6 @@ #include #include -#include "string.h" +#include #include #include "diago_pxxxgvx.h" From 2c3bb1b64a4bfbbfbd40eee0265945d78d76ccb3 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 21 Nov 2024 15:33:18 +0800 Subject: [PATCH 03/12] fix Makefile --- source/Makefile.Objects | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/Makefile.Objects b/source/Makefile.Objects index 3f240d9b52..29b6df8073 100644 --- a/source/Makefile.Objects +++ b/source/Makefile.Objects @@ -338,6 +338,8 @@ OBJS_HSOLVER=diago_cg.o\ math_kernel_op.o\ dngvd_op.o\ diag_const_nums.o\ + diag_hs_para.o\ + diago_pxxxgvx.o\ OBJS_HSOLVER_LCAO=hsolver_lcao.o\ diago_scalapack.o\ From 2cf60e2677869bee58f55267ec8be19dfa3323fe Mon Sep 17 00:00:00 2001 From: root Date: Fri, 22 Nov 2024 18:29:02 +0800 Subject: [PATCH 04/12] fix ut --- docs/advanced/input_files/input-main.md | 4 +- python/pyabacus/src/hsolver/CMakeLists.txt | 3 + .../src/hsolver/py_diago_dav_subspace.hpp | 2 +- source/module_base/scalapack_connector.h | 3 + source/module_hsolver/diag_hs_para.cpp | 251 ++++---- source/module_hsolver/diag_hs_para.h | 23 +- source/module_hsolver/diago_dav_subspace.cpp | 24 +- source/module_hsolver/diago_dav_subspace.h | 2 +- source/module_hsolver/diago_pxxxgvx.cpp | 546 ++++++++++++++---- source/module_hsolver/diago_pxxxgvx.h | 2 +- source/module_hsolver/hsolver_pw.cpp | 2 +- .../module_hsolver/test/test_hsolver_pw.cpp | 56 ++ .../module_hsolver/test/test_hsolver_sdft.cpp | 52 ++ source/module_io/read_input_item_system.cpp | 4 +- source/module_lr/hsolver_lrtd.hpp | 2 +- source/module_parameter/input_parameter.h | 2 +- 16 files changed, 709 insertions(+), 269 deletions(-) diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md index 019e28829a..798da7c9e8 100644 --- a/docs/advanced/input_files/input-main.md +++ b/docs/advanced/input_files/input-main.md @@ -39,7 +39,7 @@ - [pw\_diag\_thr](#pw_diag_thr) - [pw\_diag\_nmax](#pw_diag_nmax) - [pw\_diag\_ndim](#pw_diag_ndim) - - [diag\_subspace\_method](#diag_subspace_method) + - [diag\_subspace\_method](#diag_subspace) - [erf\_ecut](#erf_ecut) - [fft\_mode](#fft_mode) - [erf\_height](#erf_height) @@ -787,7 +787,7 @@ These variables are used to control the plane wave related parameters. - **Description**: Only useful when you use `ks_solver = dav` or `ks_solver = dav_subspace`. It indicates dimension of workspace(number of wavefunction packets, at least 2 needed) for the Davidson method. A larger value may yield a smaller number of iterations in the algorithm but uses more memory and more CPU time in subspace diagonalization. - **Default**: 4 -### diag_subspace_method +### diag_subspace - **Type**: Integer - **Description**: The method to diagonalize subspace in dav_subspace method. The available options are: diff --git a/python/pyabacus/src/hsolver/CMakeLists.txt b/python/pyabacus/src/hsolver/CMakeLists.txt index 853973e653..f0f04f97a7 100644 --- a/python/pyabacus/src/hsolver/CMakeLists.txt +++ b/python/pyabacus/src/hsolver/CMakeLists.txt @@ -5,6 +5,9 @@ list(APPEND _diago ${HSOLVER_PATH}/diago_cg.cpp ${HSOLVER_PATH}/diag_const_nums.cpp ${HSOLVER_PATH}/diago_iter_assist.cpp + ${HSOLVER_PATH}/diag_hs_para.cpp + ${HSOLVER_PATH}/diago_pxxxgvx.cpp + ${HSOLVER_PATH}/kernels/dngvd_op.cpp ${HSOLVER_PATH}/kernels/math_kernel_op.cpp diff --git a/python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp b/python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp index 0eff2467fe..5374ac29a8 100644 --- a/python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp +++ b/python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp @@ -139,7 +139,7 @@ class PyDiagoDavSubspace max_iter, need_subspace, comm_info, - PARAM.inp.diag_subspace_method, + PARAM.inp.diag_subspace, PARAM.inp.nb2d ); diff --git a/source/module_base/scalapack_connector.h b/source/module_base/scalapack_connector.h index 210cef58a1..ebd1946aac 100644 --- a/source/module_base/scalapack_connector.h +++ b/source/module_base/scalapack_connector.h @@ -80,16 +80,19 @@ extern "C" const double* vl, const double* vu, const int* il, const int* iu, const double* abstol, int* m, int* nz, double* w, const double*orfac, double* Z, const int* iz, const int* jz, const int*descz, double* work, int* lwork, int*iwork, int*liwork, int* ifail, int*iclustr, double*gap, int* info); + void pzhegvx_(const int* itype, const char* jobz, const char* range, const char* uplo, const int* n, std::complex* A, const int* ia, const int* ja, const int*desca, std::complex* B, const int* ib, const int* jb, const int*descb, const double* vl, const double* vu, const int* il, const int* iu, const double* abstol, int* m, int* nz, double* w, const double*orfac, std::complex* Z, const int* iz, const int* jz, const int*descz, std::complex* work, int* lwork, double* rwork, int* lrwork, int*iwork, int*liwork, int* ifail, int*iclustr, double*gap, int* info); + void pssygvx_(const int* itype, const char* jobz, const char* range, const char* uplo, const int* n, float* A, const int* ia, const int* ja, const int*desca, float* B, const int* ib, const int* jb, const int*descb, const float* vl, const float* vu, const int* il, const int* iu, const float* abstol, int* m, int* nz, float* w, const float*orfac, float* Z, const int* iz, const int* jz, const int*descz, float* work, int* lwork, int*iwork, int*liwork, int* ifail, int*iclustr, float*gap, int* info); + void pchegvx_(const int* itype, const char* jobz, const char* range, const char* uplo, const int* n, std::complex* A, const int* ia, const int* ja, const int*desca, std::complex* B, const int* ib, const int* jb, const int*descb, const float* vl, const float* vu, const int* il, const int* iu, diff --git a/source/module_hsolver/diag_hs_para.cpp b/source/module_hsolver/diag_hs_para.cpp index d227c9c53b..ed558e492d 100644 --- a/source/module_hsolver/diag_hs_para.cpp +++ b/source/module_hsolver/diag_hs_para.cpp @@ -1,98 +1,88 @@ -#include #include "module_hsolver/diag_hs_para.h" + +#include "module_base/scalapack_connector.h" #include "module_basis/module_ao/parallel_2d.h" #include "module_hsolver/diago_pxxxgvx.h" -#include "module_base/scalapack_connector.h" #include "module_hsolver/genelpa/elpa_solver.h" +#include + namespace hsolver { -#ifdef __ELPA - void elpa_diag(MPI_Comm comm, - const int nband, - std::complex* h_local, - std::complex* s_local, - double* ekb, - std::complex* wfc_2d, - Parallel_2D& para2d_local) - { - int DecomposedState = 0; - ELPA_Solver es(false, - comm, - nband, - para2d_local.get_row_size(), - para2d_local.get_col_size(), - para2d_local.desc); - es.generalized_eigenvector(h_local, s_local, DecomposedState, ekb, wfc_2d); - es.exit(); - } +#ifdef __ELPA +void elpa_diag(MPI_Comm comm, + const int nband, + std::complex* h_local, + std::complex* s_local, + double* ekb, + std::complex* wfc_2d, + Parallel_2D& para2d_local) +{ + int DecomposedState = 0; + ELPA_Solver es(false, comm, nband, para2d_local.get_row_size(), para2d_local.get_col_size(), para2d_local.desc); + es.generalized_eigenvector(h_local, s_local, DecomposedState, ekb, wfc_2d); + es.exit(); +} - void elpa_diag(MPI_Comm comm, - const int nband, - double* h_local, - double* s_local, - double* ekb, - double* wfc_2d, - Parallel_2D& para2d_local) - { - int DecomposedState = 0; - ELPA_Solver es(true, - comm, - nband, - para2d_local.get_row_size(), - para2d_local.get_col_size(), - para2d_local.desc); - es.generalized_eigenvector(h_local, s_local, DecomposedState, ekb, wfc_2d); - es.exit(); - } +void elpa_diag(MPI_Comm comm, + const int nband, + double* h_local, + double* s_local, + double* ekb, + double* wfc_2d, + Parallel_2D& para2d_local) +{ + int DecomposedState = 0; + ELPA_Solver es(true, comm, nband, para2d_local.get_row_size(), para2d_local.get_col_size(), para2d_local.desc); + es.generalized_eigenvector(h_local, s_local, DecomposedState, ekb, wfc_2d); + es.exit(); +} - void elpa_diag(MPI_Comm comm, - const int nband, - std::complex* h_local, - std::complex* s_local, - float* ekb, - std::complex* wfc_2d, - Parallel_2D& para2d_local) - { - std::cout << "Error: ELPA do not support single precision. " << std::endl; - exit(1); - } +void elpa_diag(MPI_Comm comm, + const int nband, + std::complex* h_local, + std::complex* s_local, + float* ekb, + std::complex* wfc_2d, + Parallel_2D& para2d_local) +{ + std::cout << "Error: ELPA do not support single precision. " << std::endl; + exit(1); +} - void elpa_diag(MPI_Comm comm, - const int nband, - float* h_local, - float* s_local, - float* ekb, - float* wfc_2d, - Parallel_2D& para2d_local) - { - std::cout << "Error: ELPA do not support single precision. " << std::endl; - exit(1); - } +void elpa_diag(MPI_Comm comm, + const int nband, + float* h_local, + float* s_local, + float* ekb, + float* wfc_2d, + Parallel_2D& para2d_local) +{ + std::cout << "Error: ELPA do not support single precision. " << std::endl; + exit(1); +} #endif - #ifdef __MPI template -void Diago_HS_para( - T* h, - T* s, - const int lda, - const int nband, - typename GetTypeReal::type *const ekb, - T *const wfc, - const MPI_Comm& comm, - const int diag_subspace_method, - const int block_size) +void Diago_HS_para(T* h, + T* s, + const int lda, + const int nband, + typename GetTypeReal::type* const ekb, + T* const wfc, + const MPI_Comm& comm, + const int diag_subspace, + const int block_size) { - int myrank; + int myrank = 0; MPI_Comm_rank(comm, &myrank); Parallel_2D para2d_global; Parallel_2D para2d_local; - para2d_global.init(lda,lda,lda,comm); + para2d_global.init(lda, lda, lda, comm); int max_nb = block_size; if (block_size == 0) @@ -113,43 +103,53 @@ void Diago_HS_para( } // for genelpa, if the block size is too large that some cores have no data, then it will cause error. - if (diag_subspace_method == 1) + if (diag_subspace == 1) { if (max_nb * (std::max(para2d_global.dim0, para2d_global.dim1) - 1) >= lda) { max_nb = lda / std::max(para2d_global.dim0, para2d_global.dim1); } } - - para2d_local.init(lda,lda,max_nb,comm); + + para2d_local.init(lda, lda, max_nb, comm); std::vector h_local(para2d_local.get_col_size() * para2d_local.get_row_size()); std::vector s_local(para2d_local.get_col_size() * para2d_local.get_row_size()); std::vector wfc_2d(para2d_local.get_col_size() * para2d_local.get_row_size()); - + // distribute h and s to 2D - Cpxgemr2d(lda,lda,h,1,1,para2d_global.desc,h_local.data(),1,1,para2d_local.desc,para2d_local.blacs_ctxt); - Cpxgemr2d(lda,lda,s,1,1,para2d_global.desc,s_local.data(),1,1,para2d_local.desc,para2d_local.blacs_ctxt); + Cpxgemr2d(lda, lda, h, 1, 1, para2d_global.desc, h_local.data(), 1, 1, para2d_local.desc, para2d_local.blacs_ctxt); + Cpxgemr2d(lda, lda, s, 1, 1, para2d_global.desc, s_local.data(), 1, 1, para2d_local.desc, para2d_local.blacs_ctxt); - if (diag_subspace_method == 1) + if (diag_subspace == 1) { -#ifdef __ELPA +#ifdef __ELPA elpa_diag(comm, nband, h_local.data(), s_local.data(), ekb, wfc_2d.data(), para2d_local); #else - std::cout << "ERROR: try to use ELPA to solve the generalized eigenvalue problem, but ELPA is not compiled. " << std::endl; + std::cout << "ERROR: try to use ELPA to solve the generalized eigenvalue problem, but ELPA is not compiled. " + << std::endl; exit(1); -#endif +#endif } - else if (diag_subspace_method == 2) + else if (diag_subspace == 2) { - hsolver::pxxxgvx_diag(para2d_local.desc, para2d_local.get_row_size(), para2d_local.get_col_size(),nband, h_local.data(), s_local.data(), ekb, wfc_2d.data()); + hsolver::pxxxgvx_diag(para2d_local.desc, + para2d_local.get_row_size(), + para2d_local.get_col_size(), + nband, + h_local.data(), + s_local.data(), + ekb, + wfc_2d.data()); } - else{ - std::cout << "Error: parallel diagonalization method is not supported. " << "diag_subspace_method = " << diag_subspace_method << std::endl; + else + { + std::cout << "Error: parallel diagonalization method is not supported. " << "diag_subspace = " << diag_subspace + << std::endl; exit(1); } // gather wfc - Cpxgemr2d(lda,lda,wfc_2d.data(),1,1,para2d_local.desc,wfc,1,1,para2d_global.desc,para2d_local.blacs_ctxt); + Cpxgemr2d(lda, lda, wfc_2d.data(), 1, 1, para2d_local.desc, wfc, 1, 1, para2d_global.desc, para2d_local.blacs_ctxt); // free the context Cblacs_gridexit(para2d_local.blacs_ctxt); @@ -157,44 +157,45 @@ void Diago_HS_para( } // template instantiation -template void Diago_HS_para(double* h, - double* s, - const int lda, - const int nband, - typename GetTypeReal::type *const ekb, - double *const wfc, - const MPI_Comm& comm, - const int diag_subspace_method, - const int block_size); -template void Diago_HS_para>(std::complex* h, - std::complex* s, - const int lda, - const int nband, - typename GetTypeReal>::type *const ekb, - std::complex *const wfc, - const MPI_Comm& comm, - const int diag_subspace_method, - const int block_size); -template void Diago_HS_para(float* h, - float* s, - const int lda, - const int nband, - typename GetTypeReal::type *const ekb, - float *const wfc, - const MPI_Comm& comm, - const int diag_subspace_method, +template void Diago_HS_para(double* h, + double* s, + const int lda, + const int nband, + typename GetTypeReal::type* const ekb, + double* const wfc, + const MPI_Comm& comm, + const int diag_subspace, const int block_size); -template void Diago_HS_para>(std::complex* h, - std::complex* s, - const int lda, - const int nband, - typename GetTypeReal>::type *const ekb, - std::complex *const wfc, - const MPI_Comm& comm, - const int diag_subspace_method, - const int block_size); - +template void Diago_HS_para>(std::complex* h, + std::complex* s, + const int lda, + const int nband, + typename GetTypeReal>::type* const ekb, + std::complex* const wfc, + const MPI_Comm& comm, + const int diag_subspace, + const int block_size); + +template void Diago_HS_para(float* h, + float* s, + const int lda, + const int nband, + typename GetTypeReal::type* const ekb, + float* const wfc, + const MPI_Comm& comm, + const int diag_subspace, + const int block_size); + +template void Diago_HS_para>(std::complex* h, + std::complex* s, + const int lda, + const int nband, + typename GetTypeReal>::type* const ekb, + std::complex* const wfc, + const MPI_Comm& comm, + const int diag_subspace, + const int block_size); #endif diff --git a/source/module_hsolver/diag_hs_para.h b/source/module_hsolver/diag_hs_para.h index ce73199a00..fd54ebcae1 100644 --- a/source/module_hsolver/diag_hs_para.h +++ b/source/module_hsolver/diag_hs_para.h @@ -22,25 +22,24 @@ namespace hsolver * @param ekb to store the eigenvalues. * @param wfc to store the eigenvectors * @param comm the communicator - * @param diag_subspace_method the method to solve the generalized eigenvalue problem + * @param diag_subspace the method to solve the generalized eigenvalue problem * @param block_size the block size in 2d block cyclic distribution if use elpa or scalapack. * * @note 1. h and s should be full matrix in rank 0 of the communicator, and the other ranks is not concerned. * @note 2. wfc is complete in rank 0, and not store in other ranks. - * @note 3. diag_subspace_method should be 1: by elpa, 2: by scalapack + * @note 3. diag_subspace should be 1: by elpa, 2: by scalapack * @note 4. block_size should be 0 or a positive integer. If it is 0, then will use a value as large as possible that is allowed */ template -void Diago_HS_para( - T* h, - T* s, - const int lda, - const int nband, - typename GetTypeReal::type *const ekb, - T *const wfc, - const MPI_Comm& comm, - const int diag_subspace_method, - const int block_size=0); +void Diago_HS_para(T* h, + T* s, + const int lda, + const int nband, + typename GetTypeReal::type* const ekb, + T* const wfc, + const MPI_Comm& comm, + const int diag_subspace, + const int block_size = 0); #endif } // namespace hsolver diff --git a/source/module_hsolver/diago_dav_subspace.cpp b/source/module_hsolver/diago_dav_subspace.cpp index 3aa4b8c45b..83d9b50db3 100644 --- a/source/module_hsolver/diago_dav_subspace.cpp +++ b/source/module_hsolver/diago_dav_subspace.cpp @@ -24,11 +24,11 @@ Diago_DavSubspace::Diago_DavSubspace(const std::vector& precond const int& diag_nmax_in, const bool& need_subspace_in, const diag_comm_info& diag_comm_in, - const int diag_subspace_method_in, + const int diag_subspace_in, const int diago_subspace_bs_in) : precondition(precondition_in), n_band(nband_in), dim(nbasis_in), nbase_x(nband_in * david_ndim_in), diag_thr(diag_thr_in), iter_nmax(diag_nmax_in), is_subspace(need_subspace_in), diag_comm(diag_comm_in), - diag_subspace_method(diag_subspace_method_in), diago_subspace_bs(diago_subspace_bs_in) + diag_subspace(diag_subspace_in), diago_subspace_bs(diago_subspace_bs_in) { this->device = base_device::get_device_type(this->ctx); @@ -38,7 +38,7 @@ Diago_DavSubspace::Diago_DavSubspace(const std::vector& precond assert(david_ndim_in > 1); assert(david_ndim_in * nband_in < nbasis_in * this->diag_comm.nproc); - assert(diag_subspace_method >= 0 && diag_subspace_method < 3); + assert(diag_subspace >= 0 && diag_subspace < 3); // TODO: Added memory usage statistics @@ -573,7 +573,7 @@ void Diago_DavSubspace::diag_zhegvx(const int& nbase, } else { - if (this->diag_subspace_method == 0) + if (this->diag_subspace == 0) { if (this->diag_comm.rank == 0) { @@ -636,14 +636,14 @@ void Diago_DavSubspace::diag_zhegvx(const int& nbase, } } Diago_HS_para(h_diag.data(), - s_diag.data(), - nbase, - nband, - (*eigenvalue_iter).data(), - vcc_tmp.data(), - this->diag_comm.comm, - this->diag_subspace_method, - this->diago_subspace_bs); + s_diag.data(), + nbase, + nband, + (*eigenvalue_iter).data(), + vcc_tmp.data(), + this->diag_comm.comm, + this->diag_subspace, + this->diago_subspace_bs); if (this->diag_comm.rank == 0) { for (size_t i = 0; i < nband; i++) diff --git a/source/module_hsolver/diago_dav_subspace.h b/source/module_hsolver/diago_dav_subspace.h index baf984d999..7d9fb185fc 100644 --- a/source/module_hsolver/diago_dav_subspace.h +++ b/source/module_hsolver/diago_dav_subspace.h @@ -141,7 +141,7 @@ class Diago_DavSubspace bool test_exit_cond(const int& ntry, const int& notconv, const bool& scf); - int diag_subspace_method; // 0: LAPACK, 1: Gen-ELPA, 2: ScaLAPACK + int diag_subspace; // 0: LAPACK, 1: Gen-ELPA, 2: ScaLAPACK int diago_subspace_bs = 0; // the block size in 2d block cyclic distribution if use elpa or scalapack #ifdef __DSP diff --git a/source/module_hsolver/diago_pxxxgvx.cpp b/source/module_hsolver/diago_pxxxgvx.cpp index 24168894dd..51e29f27ff 100644 --- a/source/module_hsolver/diago_pxxxgvx.cpp +++ b/source/module_hsolver/diago_pxxxgvx.cpp @@ -1,78 +1,385 @@ +#include "diago_pxxxgvx.h" + +#include "module_base/blacs_connector.h" +#include "module_base/scalapack_connector.h" + #include -#include #include #include - -#include "diago_pxxxgvx.h" -#include "module_base/scalapack_connector.h" -#include "module_base/blacs_connector.h" +#include namespace hsolver { #ifdef __MPI -void pxxxgvx_(const int* itype, const char* jobz, const char* range, const char* uplo, - const int* n, double* A, const int* ia, const int* ja, const int*desca, double* B, const int* ib, const int* jb, const int*descb, - const double* vl, const double* vu, const int* il, const int* iu, - const double* abstol, int* m, int* nz, double* w, const double*orfac, double* Z, const int* iz, const int* jz, const int*descz, - double* work, int* lwork, double* rwork, int* lrwork, int*iwork, int*liwork, int* ifail, int*iclustr, double*gap, int* info) +/** + * @file diago_pxxxgvx.cpp + * @brief This file contains functions for performing parallel diagonalization using Scalapack. + * + * The functions in this file are designed to handle different data types (double, float, std::complex, + * std::complex) and perform diagonalization using Scalapack routines (pdsygvx_, pzhegvx_, pssygvx_, pchegvx_). + * + * The main functions are: + * - pxxxgvx_: Wrapper functions for Scalapack diagonalization routines. + * - pxxxgvx_post_processing: Post-processing function to handle errors and adjust parameters based on Scalapack's info + * output. + * - get_lwork: Helper functions to retrieve the optimal workspace size. + * - pxxxgvx_diag: Template function to perform diagonalization for different data types. + * + * Template instantiations for pxxxgvx_diag are provided for double, float, std::complex, and + * std::complex. + */ + +/** + * @brief Wrapper function for Scalapack's generalized eigensolver routines. + * + * @param itype Specifies the problem type to be solved. + * @param jobz Specifies whether to compute eigenvectors. + * @param range Specifies the range of eigenvalues to be found. + * @param uplo Specifies whether the upper or lower triangular part of the matrices is referenced. + * @param n The order of the matrices A and B. + * @param A The array containing the matrix A. + * @param ia The row index in the global array A. + * @param ja The column index in the global array A. + * @param desca The array descriptor for the distributed matrix A. + * @param B The array containing the matrix B. + * @param ib The row index in the global array B. + * @param jb The column index in the global array B. + * @param descb The array descriptor for the distributed matrix B. + * @param vl Lower bound of the interval to be searched for eigenvalues. + * @param vu Upper bound of the interval to be searched for eigenvalues. + * @param il Index of the smallest eigenvalue to be returned. + * @param iu Index of the largest eigenvalue to be returned. + * @param abstol The absolute error tolerance for the eigenvalues. + * @param m The total number of eigenvalues found. + * @param nz The total number of eigenvalues found in the interval (vl, vu]. + * @param w The array to store the eigenvalues. + * @param orfac The orthogonality factor. + * @param Z The array to store the eigenvectors. + * @param iz The row index in the global array Z. + * @param jz The column index in the global array Z. + * @param descz The array descriptor for the distributed matrix Z. + * @param work Workspace array. + * @param lwork The dimension of the array work. + * @param rwork Workspace array (not used in this function). + * @param lrwork The dimension of the array rwork (not used in this function). + * @param iwork Workspace array. + * @param liwork The dimension of the array iwork. + * @param ifail The array to store the indices of the eigenvectors that failed to converge. + * @param iclustr The array to store the indices of the eigenvalue clusters. + * @param gap The array to store the gaps between eigenvalue clusters. + * @param info Output status of the computation. + * + * @note for a uniform interface, rwork and lrwork are input arguments, but not used in pdsygvx_/pssygvx_ + */ +void pxxxgvx_(const int* itype, + const char* jobz, + const char* range, + const char* uplo, + const int* n, + double* A, + const int* ia, + const int* ja, + const int* desca, + double* B, + const int* ib, + const int* jb, + const int* descb, + const double* vl, + const double* vu, + const int* il, + const int* iu, + const double* abstol, + int* m, + int* nz, + double* w, + const double* orfac, + double* Z, + const int* iz, + const int* jz, + const int* descz, + double* work, + int* lwork, + double* rwork, + int* lrwork, + int* iwork, + int* liwork, + int* ifail, + int* iclustr, + double* gap, + int* info) { - // for a uniform interface, rwork and lrwork are input arguments, but not used in pdsygvx_/pssygvx_ - pdsygvx_(itype, jobz, range, uplo, - n, A, ia, ja, desca, B, ib, jb, descb, - vl, vu, il, iu, - abstol, m, nz, w, orfac, Z, iz, jz, descz, - work, lwork, iwork, liwork, ifail, iclustr, gap, info); + // double + pdsygvx_(itype, + jobz, + range, + uplo, + n, + A, + ia, + ja, + desca, + B, + ib, + jb, + descb, + vl, + vu, + il, + iu, + abstol, + m, + nz, + w, + orfac, + Z, + iz, + jz, + descz, + work, + lwork, + iwork, + liwork, + ifail, + iclustr, + gap, + info); } -void pxxxgvx_(const int* itype, const char* jobz, const char* range, const char* uplo, - const int* n, std::complex* A, const int* ia, const int* ja, const int*desca, std::complex* B, const int* ib, const int* jb, const int*descb, - const double* vl, const double* vu, const int* il, const int* iu, - const double* abstol, int* m, int* nz, double* w, const double*orfac, std::complex* Z, const int* iz, const int* jz, const int*descz, - std::complex* work, int* lwork, double* rwork, int* lrwork, int*iwork, int*liwork, int* ifail, int*iclustr, double*gap, int* info) +void pxxxgvx_(const int* itype, + const char* jobz, + const char* range, + const char* uplo, + const int* n, + std::complex* A, + const int* ia, + const int* ja, + const int* desca, + std::complex* B, + const int* ib, + const int* jb, + const int* descb, + const double* vl, + const double* vu, + const int* il, + const int* iu, + const double* abstol, + int* m, + int* nz, + double* w, + const double* orfac, + std::complex* Z, + const int* iz, + const int* jz, + const int* descz, + std::complex* work, + int* lwork, + double* rwork, + int* lrwork, + int* iwork, + int* liwork, + int* ifail, + int* iclustr, + double* gap, + int* info) { - pzhegvx_(itype, jobz, range, uplo, - n, A, ia, ja, desca, B, ib, jb, descb, - vl, vu, il, iu, - abstol, m, nz, w, orfac, Z, iz, jz, descz, - work, lwork,rwork, lrwork, iwork, liwork, ifail, iclustr, gap, info); + // std::complex + pzhegvx_(itype, + jobz, + range, + uplo, + n, + A, + ia, + ja, + desca, + B, + ib, + jb, + descb, + vl, + vu, + il, + iu, + abstol, + m, + nz, + w, + orfac, + Z, + iz, + jz, + descz, + work, + lwork, + rwork, + lrwork, + iwork, + liwork, + ifail, + iclustr, + gap, + info); } -void pxxxgvx_(const int* itype, const char* jobz, const char* range, const char* uplo, - const int* n, float* A, const int* ia, const int* ja, const int*desca, float* B, const int* ib, const int* jb, const int*descb, - const float* vl, const float* vu, const int* il, const int* iu, - const float* abstol, int* m, int* nz, float* w, const float*orfac, float* Z, const int* iz, const int* jz, const int*descz, - float* work, int* lwork, float* rwork, int* lrwork, int*iwork, int*liwork, int* ifail, int*iclustr, float*gap, int* info) +void pxxxgvx_(const int* itype, + const char* jobz, + const char* range, + const char* uplo, + const int* n, + float* A, + const int* ia, + const int* ja, + const int* desca, + float* B, + const int* ib, + const int* jb, + const int* descb, + const float* vl, + const float* vu, + const int* il, + const int* iu, + const float* abstol, + int* m, + int* nz, + float* w, + const float* orfac, + float* Z, + const int* iz, + const int* jz, + const int* descz, + float* work, + int* lwork, + float* rwork, + int* lrwork, + int* iwork, + int* liwork, + int* ifail, + int* iclustr, + float* gap, + int* info) { - pssygvx_(itype, jobz, range, uplo, - n, A, ia, ja, desca, B, ib, jb, descb, - vl, vu, il, iu, - abstol, m, nz, w, orfac, Z, iz, jz, descz, - work, lwork, iwork, liwork, ifail, iclustr, gap, info); + // float + pssygvx_(itype, + jobz, + range, + uplo, + n, + A, + ia, + ja, + desca, + B, + ib, + jb, + descb, + vl, + vu, + il, + iu, + abstol, + m, + nz, + w, + orfac, + Z, + iz, + jz, + descz, + work, + lwork, + iwork, + liwork, + ifail, + iclustr, + gap, + info); } -void pxxxgvx_(const int* itype, const char* jobz, const char* range, const char* uplo, - const int* n, std::complex* A, const int* ia, const int* ja, const int*desca, std::complex* B, const int* ib, const int* jb, const int*descb, - const float* vl, const float* vu, const int* il, const int* iu, - const float* abstol, int* m, int* nz, float* w, const float*orfac, std::complex* Z, const int* iz, const int* jz, const int*descz, - std::complex* work, int* lwork, float* rwork, int* lrwork, int*iwork, int*liwork, int* ifail, int*iclustr, float*gap, int* info) +void pxxxgvx_(const int* itype, + const char* jobz, + const char* range, + const char* uplo, + const int* n, + std::complex* A, + const int* ia, + const int* ja, + const int* desca, + std::complex* B, + const int* ib, + const int* jb, + const int* descb, + const float* vl, + const float* vu, + const int* il, + const int* iu, + const float* abstol, + int* m, + int* nz, + float* w, + const float* orfac, + std::complex* Z, + const int* iz, + const int* jz, + const int* descz, + std::complex* work, + int* lwork, + float* rwork, + int* lrwork, + int* iwork, + int* liwork, + int* ifail, + int* iclustr, + float* gap, + int* info) { - pchegvx_(itype, jobz, range, uplo, - n, A, ia, ja, desca, B, ib, jb, descb, - vl, vu, il, iu, - abstol, m, nz, w, orfac, Z, iz, jz, descz, - work, lwork,rwork, lrwork, iwork, liwork, ifail, iclustr, gap, info); + // std::complex + pchegvx_(itype, + jobz, + range, + uplo, + n, + A, + ia, + ja, + desca, + B, + ib, + jb, + descb, + vl, + vu, + il, + iu, + abstol, + m, + nz, + w, + orfac, + Z, + iz, + jz, + descz, + work, + lwork, + rwork, + lrwork, + iwork, + liwork, + ifail, + iclustr, + gap, + info); } -// post processing for pdsygvx/pzhegvx/pdsygvx/pzhegvx -void pxxxgvx_post_processing(const int info, - const std::vector& ifail, - const std::vector& iclustr, - const int M, - const int NZ, - const int nbands, - int& degeneracy_max) +void pxxxgvx_post_processing(const int info, + const std::vector& ifail, + const std::vector& iclustr, + const int M, + const int NZ, + const int nbands, + int& degeneracy_max) { const std::string str_info = "Scalapack diagonalization: \n info = " + std::to_string(info) + ".\n"; @@ -83,11 +390,10 @@ void pxxxgvx_post_processing(const int info, else if (info < 0) { const int info_negative = -info; - const std::string str_index - = (info_negative > 100) - ? std::to_string(info_negative / 100) + "-th argument " - + std::to_string(info_negative % 100) + "-entry is illegal.\n" - : std::to_string(info_negative) + "-th argument is illegal.\n"; + const std::string str_index = (info_negative > 100) + ? std::to_string(info_negative / 100) + "-th argument " + + std::to_string(info_negative % 100) + "-entry is illegal.\n" + : std::to_string(info_negative) + "-th argument is illegal.\n"; throw std::runtime_error(str_info + str_index); } else if (info % 2) @@ -102,13 +408,12 @@ void pxxxgvx_post_processing(const int info, else if (info / 2 % 2) { int degeneracy_need = 0; - for (int irank = 0; irank < iclustr.size()/2; ++irank) + for (int irank = 0; irank < iclustr.size() / 2; ++irank) { degeneracy_need = std::max(degeneracy_need, iclustr[2 * irank + 1] - iclustr[2 * irank]); } const std::string str_need = "degeneracy_need = " + std::to_string(degeneracy_need) + ".\n"; - const std::string str_saved - = "degeneracy_saved = " + std::to_string(degeneracy_max) + ".\n"; + const std::string str_saved = "degeneracy_saved = " + std::to_string(degeneracy_max) + ".\n"; if (degeneracy_need <= degeneracy_max) { throw std::runtime_error(str_info + str_need + str_saved); @@ -124,8 +429,7 @@ void pxxxgvx_post_processing(const int info, { const std::string str_M = "M = " + std::to_string(M) + ".\n"; const std::string str_NZ = "NZ = " + std::to_string(NZ) + ".\n"; - const std::string str_NBANDS - = "Number of eigenvalues solved = " + std::to_string(nbands) + ".\n"; + const std::string str_NBANDS = "Number of eigenvalues solved = " + std::to_string(nbands) + ".\n"; throw std::runtime_error(str_info + str_M + str_NZ + str_NBANDS); } else if (info / 16 % 2) @@ -160,33 +464,48 @@ void get_lwork(int& lwork, std::vector>& work) } template -void pxxxgvx_diag(const int *const desc, - const int ncol, - const int nrow, - const int nbands, - const T *const h_mat, - const T *const s_mat, - typename GetTypeReal::type *const ekb, - T *const wfc_2d) +void pxxxgvx_diag(const int* const desc, + const int ncol, + const int nrow, + const int nbands, + const T* const h_mat, + const T* const s_mat, + typename GetTypeReal::type* const ekb, + T* const wfc_2d) { - int nprow, npcol, myprow, mypcol; + int nprow = 1; + int npcol = 1; + int myprow = 0; + int mypcol = 0; Cblacs_gridinfo(desc[1], &nprow, &npcol, &myprow, &mypcol); int dsize = nprow * npcol; int degeneracy_max = 12; // only used for complex and complex while (true) { - std::vector h_tmp(ncol * nrow); - std::vector s_tmp(ncol * nrow); + std::vector h_tmp(ncol * nrow, 0); + std::vector s_tmp(ncol * nrow, 0); memcpy(h_tmp.data(), h_mat, sizeof(T) * ncol * nrow); memcpy(s_tmp.data(), s_mat, sizeof(T) * ncol * nrow); int ndim_global = desc[2]; - const char jobz = 'V', range = 'I', uplo = 'U'; - const int itype = 1, il = 1, iu = nbands, one = 1; - int M = 0, NZ = 0, lwork = -1, lrwork = -1, liwork = -1, info = 0; - const typename GetTypeReal::type abstol = 0, orfac = -1; - const typename GetTypeReal::type vl = 0, vu = 0; + const char jobz = 'V'; + const char range = 'I'; + const char uplo = 'U'; + const int itype = 1; + const int il = 1; + const int iu = nbands; + const int one = 1; + int M = 0; + int NZ = 0; + int lwork = -1; + int lrwork = -1; + int liwork = -1; + int info = 0; + const typename GetTypeReal::type abstol = 0; + const typename GetTypeReal::type orfac = -1; + const typename GetTypeReal::type vl = 0; + const typename GetTypeReal::type vu = 0; std::vector work(1, 0); std::vector::type> rwork(3, 0); // only used for complex and complex std::vector iwork(1, 0); @@ -221,7 +540,7 @@ void pxxxgvx_diag(const int *const desc, &one, desc, work.data(), - &lwork, // is not used for real data type + &lwork, // is not used for real data type rwork.data(), // is not used for real data type &lrwork, iwork.data(), @@ -295,10 +614,10 @@ void pxxxgvx_diag(const int *const desc, { return; } - pxxxgvx_post_processing(info, ifail,iclustr, M,NZ,nbands,degeneracy_max); + pxxxgvx_post_processing(info, ifail, iclustr, M, NZ, nbands, degeneracy_max); // break the loop for real data type - if (std::is_same::value || std::is_same::value) + if (std::is_same::value || std::is_same::value) { return; } @@ -306,39 +625,46 @@ void pxxxgvx_diag(const int *const desc, } // template instantiation -template void pxxxgvx_diag(const int *const desc, +// double +template void pxxxgvx_diag(const int* const desc, + const int ncol, + const int nrow, + const int nbands, + const double* const h_mat, + const double* const s_mat, + double* const ekb, + double* const wfc_2d); + +// std::complex +template void pxxxgvx_diag(const int* const desc, const int ncol, const int nrow, const int nbands, - const double *const h_mat, - const double *const s_mat, - double *const ekb, - double *const wfc_2d); -template void pxxxgvx_diag(const int *const desc, + const std::complex* const h_mat, + const std::complex* const s_mat, + double* const ekb, + std::complex* const wfc_2d); + +// float +template void pxxxgvx_diag(const int* const desc, + const int ncol, + const int nrow, + const int nbands, + const float* const h_mat, + const float* const s_mat, + float* const ekb, + float* const wfc_2d); + +// std::complex +template void pxxxgvx_diag(const int* const desc, const int ncol, const int nrow, const int nbands, - const std::complex *const h_mat, - const std::complex *const s_mat, - double *const ekb, - std::complex *const wfc_2d); -template void pxxxgvx_diag(const int *const desc, - const int ncol, - const int nrow, - const int nbands, - const float *const h_mat, - const float *const s_mat, - float *const ekb, - float *const wfc_2d); -template void pxxxgvx_diag(const int *const desc, - const int ncol, - const int nrow, - const int nbands, - const std::complex *const h_mat, - const std::complex *const s_mat, - float *const ekb, - std::complex *const wfc_2d); + const std::complex* const h_mat, + const std::complex* const s_mat, + float* const ekb, + std::complex* const wfc_2d); #endif -} // namespace \ No newline at end of file +} // namespace hsolver \ No newline at end of file diff --git a/source/module_hsolver/diago_pxxxgvx.h b/source/module_hsolver/diago_pxxxgvx.h index 6684a5ea6f..e4f9ba7896 100644 --- a/source/module_hsolver/diago_pxxxgvx.h +++ b/source/module_hsolver/diago_pxxxgvx.h @@ -8,7 +8,7 @@ namespace hsolver #ifdef __MPI /** - * @brief generalized eigenvalue problem using pdsygvx/pzhegvx/pdsygvx/pzhegvx + * @brief Wrapper function for Scalapack's generalized eigensolver routines: pdsygvx_, pzhegvx_, pssygvx_, pchegvx_. * * @param desc the descriptor of scalapack descriptor * @param ncol the number of columns of the H/S matrix in current processor diff --git a/source/module_hsolver/hsolver_pw.cpp b/source/module_hsolver/hsolver_pw.cpp index 83bac220f8..eb71f90b5a 100644 --- a/source/module_hsolver/hsolver_pw.cpp +++ b/source/module_hsolver/hsolver_pw.cpp @@ -501,7 +501,7 @@ void HSolverPW::hamiltSolvePsiK(hamilt::Hamilt* hm, this->diag_iter_max, this->need_subspace, comm_info, - PARAM.inp.diag_subspace_method, + PARAM.inp.diag_subspace, PARAM.inp.nb2d); DiagoIterAssist::avg_iter += static_cast( diff --git a/source/module_hsolver/test/test_hsolver_pw.cpp b/source/module_hsolver/test/test_hsolver_pw.cpp index 8c5da0d1c5..505f00a9df 100644 --- a/source/module_hsolver/test/test_hsolver_pw.cpp +++ b/source/module_hsolver/test/test_hsolver_pw.cpp @@ -32,6 +32,62 @@ * - 8. solve() * - lcao_in_pw specific implementation */ + +// mock Diago_HS_para +namespace hsolver { +template +void Diago_HS_para(T* h, + T* s, + const int lda, + const int nband, + typename GetTypeReal::type* const ekb, + T* const wfc, + const MPI_Comm& comm, + const int diag_subspace, + const int block_size = 0) +{} +template void Diago_HS_para(double* h, + double* s, + const int lda, + const int nband, + typename GetTypeReal::type* const ekb, + double* const wfc, + const MPI_Comm& comm, + const int diag_subspace, + const int block_size); + +template void Diago_HS_para>(std::complex* h, + std::complex* s, + const int lda, + const int nband, + typename GetTypeReal>::type* const ekb, + std::complex* const wfc, + const MPI_Comm& comm, + const int diag_subspace, + const int block_size); + +template void Diago_HS_para(float* h, + float* s, + const int lda, + const int nband, + typename GetTypeReal::type* const ekb, + float* const wfc, + const MPI_Comm& comm, + const int diag_subspace, + const int block_size); + +template void Diago_HS_para>(std::complex* h, + std::complex* s, + const int lda, + const int nband, + typename GetTypeReal>::type* const ekb, + std::complex* const wfc, + const MPI_Comm& comm, + const int diag_subspace, + const int block_size); + +} + class TestHSolverPW : public ::testing::Test { public: ModulePW::PW_Basis_K pwbk; diff --git a/source/module_hsolver/test/test_hsolver_sdft.cpp b/source/module_hsolver/test/test_hsolver_sdft.cpp index 7b234e953e..5bf2c650d4 100644 --- a/source/module_hsolver/test/test_hsolver_sdft.cpp +++ b/source/module_hsolver/test/test_hsolver_sdft.cpp @@ -23,6 +23,58 @@ Sto_Func::Sto_Func() } template class Sto_Func; +// mock Diago_HS_para +namespace hsolver { +template +void Diago_HS_para(T* h, + T* s, + const int lda, + const int nband, + typename GetTypeReal::type* const ekb, + T* const wfc, + const MPI_Comm& comm, + const int diag_subspace, + const int block_size = 0) +{} +template void Diago_HS_para(double* h, + double* s, + const int lda, + const int nband, + typename GetTypeReal::type* const ekb, + double* const wfc, + const MPI_Comm& comm, + const int diag_subspace, + const int block_size); +template void Diago_HS_para>(std::complex* h, + std::complex* s, + const int lda, + const int nband, + typename GetTypeReal>::type* const ekb, + std::complex* const wfc, + const MPI_Comm& comm, + const int diag_subspace, + const int block_size); +template void Diago_HS_para(float* h, + float* s, + const int lda, + const int nband, + typename GetTypeReal::type* const ekb, + float* const wfc, + const MPI_Comm& comm, + const int diag_subspace, + const int block_size); +template void Diago_HS_para>(std::complex* h, + std::complex* s, + const int lda, + const int nband, + typename GetTypeReal>::type* const ekb, + std::complex* const wfc, + const MPI_Comm& comm, + const int diag_subspace, + const int block_size); + +} + template <> elecstate::ElecStatePW, base_device::DEVICE_CPU>::ElecStatePW(ModulePW::PW_Basis_K* wfc_basis_in, diff --git a/source/module_io/read_input_item_system.cpp b/source/module_io/read_input_item_system.cpp index 3fd2e48bd5..184bafd363 100644 --- a/source/module_io/read_input_item_system.cpp +++ b/source/module_io/read_input_item_system.cpp @@ -478,9 +478,9 @@ void ReadInput::item_system() this->add_item(item); } { - Input_Item item("diag_subspace_method"); + Input_Item item("diag_subspace"); item.annotation = "method of subspace diagonalization in dav_subspace. 0:LaPack; 1:genelpa, 2:scalapack"; - read_sync_int(input.diag_subspace_method); + read_sync_int(input.diag_subspace); this->add_item(item); } { diff --git a/source/module_lr/hsolver_lrtd.hpp b/source/module_lr/hsolver_lrtd.hpp index 3694631cfc..1a96991f47 100644 --- a/source/module_lr/hsolver_lrtd.hpp +++ b/source/module_lr/hsolver_lrtd.hpp @@ -96,7 +96,7 @@ namespace LR maxiter, false, //always do the subspace diag (check the implementation) comm_info, - PARAM.inp.diag_subspace_method, + PARAM.inp.diag_subspace, PARAM.inp.nb2d); std::vector ethr_band(nband, diag_ethr); hsolver::DiagoIterAssist::avg_iter diff --git a/source/module_parameter/input_parameter.h b/source/module_parameter/input_parameter.h index a593223f43..6d897888fd 100644 --- a/source/module_parameter/input_parameter.h +++ b/source/module_parameter/input_parameter.h @@ -88,7 +88,7 @@ struct Input_para double pw_diag_thr = 0.01; ///< used in cg method int pw_diag_ndim = 4; ///< dimension of workspace for Davidson diagonalization int diago_cg_prec = 1; ///< mohan add 2012-03-31 - int diag_subspace_method = 0; // 0: Lapack, 1: elpa, 2: scalapack + int diag_subspace = 0; // 0: Lapack, 1: elpa, 2: scalapack std::string smearing_method = "gauss"; ///< "gauss", ///< "mp","methfessel-paxton" From d729dd55c012056ddc417b2574eb5dbe6617ba82 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 22 Nov 2024 18:37:36 +0800 Subject: [PATCH 05/12] fix --- source/module_hsolver/diago_dav_subspace.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/source/module_hsolver/diago_dav_subspace.cpp b/source/module_hsolver/diago_dav_subspace.cpp index 83d9b50db3..d4947e7715 100644 --- a/source/module_hsolver/diago_dav_subspace.cpp +++ b/source/module_hsolver/diago_dav_subspace.cpp @@ -11,7 +11,10 @@ #include "module_hsolver/diag_hs_para.h" #include + +#ifdef __MPI #include +#endif using namespace hsolver; From 7bb7414560fa3c5292910b39a30b1d9ad11bc6f1 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 22 Nov 2024 19:09:30 +0800 Subject: [PATCH 06/12] fix --- source/module_hsolver/diag_hs_para.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/source/module_hsolver/diag_hs_para.cpp b/source/module_hsolver/diag_hs_para.cpp index ed558e492d..0ac87b9eaf 100644 --- a/source/module_hsolver/diag_hs_para.cpp +++ b/source/module_hsolver/diag_hs_para.cpp @@ -3,7 +3,10 @@ #include "module_base/scalapack_connector.h" #include "module_basis/module_ao/parallel_2d.h" #include "module_hsolver/diago_pxxxgvx.h" + +#ifdef __ELPA #include "module_hsolver/genelpa/elpa_solver.h" +#endif #include @@ -186,7 +189,7 @@ template void Diago_HS_para(float* h, const MPI_Comm& comm, const int diag_subspace, const int block_size); - + template void Diago_HS_para>(std::complex* h, std::complex* s, const int lda, From 68cb71c27b3f9087fb7353b22efaf4e882f2691c Mon Sep 17 00:00:00 2001 From: root Date: Mon, 25 Nov 2024 10:24:45 +0800 Subject: [PATCH 07/12] fix test --- source/module_hsolver/test/test_diago_hs_para.cpp | 9 +-------- tests/integrate/102_PW_DS_davsubspace_sca/INPUT | 2 +- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/source/module_hsolver/test/test_diago_hs_para.cpp b/source/module_hsolver/test/test_diago_hs_para.cpp index 29ed7e30a1..6115d930c2 100644 --- a/source/module_hsolver/test/test_diago_hs_para.cpp +++ b/source/module_hsolver/test/test_diago_hs_para.cpp @@ -126,14 +126,7 @@ void test_diago_hs(int lda, int nb, int random_seed, int nbands, int diag_type, h_mat.resize(lda * lda); s_mat.resize(lda * lda); wfc.resize(lda * lda); - //generate_random_hs(lda, random_seed, h_mat, s_mat); - //read h from h.dat and s from s.dat, - std::ifstream hfile("h.dat"); - hfile.read((char*)h_mat.data(), lda * lda * sizeof(T)); - hfile.close(); - std::ifstream sfile("s.dat"); - sfile.read((char*)s_mat.data(), lda * lda * sizeof(T)); - sfile.close(); + generate_random_hs(lda, random_seed, h_mat, s_mat); } hsolver::Diago_HS_para(h_mat.data(), s_mat.data(), lda, nbands,ekb.data(), wfc.data(), comm, diag_type, nb); diff --git a/tests/integrate/102_PW_DS_davsubspace_sca/INPUT b/tests/integrate/102_PW_DS_davsubspace_sca/INPUT index 0cbffad0f8..b060c92bdd 100644 --- a/tests/integrate/102_PW_DS_davsubspace_sca/INPUT +++ b/tests/integrate/102_PW_DS_davsubspace_sca/INPUT @@ -25,5 +25,5 @@ mixing_type plain mixing_beta 0.5 ks_solver dav_subspace -diag_subspace_method 2 +diag_subspace 2 nb2d 4 From ed488e7a2176fb033ada236b6daeed78dae5c454 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 13 Dec 2024 16:35:37 +0800 Subject: [PATCH 08/12] fix --- source/module_hsolver/diag_hs_para.cpp | 2 +- source/module_hsolver/diag_hs_para.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/source/module_hsolver/diag_hs_para.cpp b/source/module_hsolver/diag_hs_para.cpp index 0ac87b9eaf..2722a02c07 100644 --- a/source/module_hsolver/diag_hs_para.cpp +++ b/source/module_hsolver/diag_hs_para.cpp @@ -1,7 +1,7 @@ #include "module_hsolver/diag_hs_para.h" #include "module_base/scalapack_connector.h" -#include "module_basis/module_ao/parallel_2d.h" +#include "module_base/parallel_2d.h" #include "module_hsolver/diago_pxxxgvx.h" #ifdef __ELPA diff --git a/source/module_hsolver/diag_hs_para.h b/source/module_hsolver/diag_hs_para.h index fd54ebcae1..222adca0f5 100644 --- a/source/module_hsolver/diag_hs_para.h +++ b/source/module_hsolver/diag_hs_para.h @@ -1,4 +1,4 @@ -#include "module_basis/module_ao/parallel_2d.h" +#include "module_base/parallel_2d.h" #include "module_base/macros.h" #ifdef __MPI From 32066231f6ae129787674fa6cc63ebb2d510fb8c Mon Sep 17 00:00:00 2001 From: root Date: Fri, 13 Dec 2024 17:26:20 +0800 Subject: [PATCH 09/12] fix pyabacus --- .../pyabacus/src/hsolver/py_diago_dav_subspace.hpp | 8 +++++--- python/pyabacus/src/hsolver/py_hsolver.cpp | 11 ++++++++++- python/pyabacus/src/pyabacus/hsolver/_hsolver.py | 13 +++++++++++-- source/module_hsolver/test/CMakeLists.txt | 2 +- 4 files changed, 27 insertions(+), 7 deletions(-) diff --git a/python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp b/python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp index 5af16e11d4..897d8d668e 100644 --- a/python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp +++ b/python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp @@ -108,7 +108,9 @@ class PyDiagoDavSubspace bool need_subspace, std::vector& diag_ethr, bool scf_type, - hsolver::diag_comm_info comm_info + hsolver::diag_comm_info comm_info, + int diag_subspace, + int nb2d ) { auto hpsi_func = [mm_op] ( std::complex *psi_in, @@ -139,8 +141,8 @@ class PyDiagoDavSubspace max_iter, need_subspace, comm_info, - PARAM.inp.diag_subspace, - PARAM.inp.nb2d + diag_subspace, + nb2d ); return obj->diag(hpsi_func, psi, nbasis, eigenvalue, diag_ethr, scf_type); diff --git a/python/pyabacus/src/hsolver/py_hsolver.cpp b/python/pyabacus/src/hsolver/py_hsolver.cpp index dfe7063ea4..e791fe9f09 100644 --- a/python/pyabacus/src/hsolver/py_hsolver.cpp +++ b/python/pyabacus/src/hsolver/py_hsolver.cpp @@ -67,6 +67,13 @@ void bind_hsolver(py::module& m) where the initial precision of eigenvalue calculation can be coarse. If false, it indicates a non-self-consistent field (non-SCF) calculation, where high precision in eigenvalue calculation is required from the start. + comm_info : diag_comm_info + The communicator information. + diago_subspace : int + The method to solve the generalized eigenvalue problem. + 0: LAPACK, 1: Gen-ELPA, 2: ScaLAPACK + nb2d : int + The block size in 2d block cyclic distribution if use elpa or scalapack. )pbdoc", "mm_op"_a, "precond_vec"_a, @@ -76,7 +83,9 @@ void bind_hsolver(py::module& m) "need_subspace"_a, "diag_ethr"_a, "scf_type"_a, - "comm_info"_a) + "comm_info"_a, + "diago_subspace"_a, + "nb2d"_a) .def("set_psi", &py_hsolver::PyDiagoDavSubspace::set_psi, R"pbdoc( Set the initial guess of the eigenvectors, i.e. the wave functions. )pbdoc", "psi_in"_a) diff --git a/python/pyabacus/src/pyabacus/hsolver/_hsolver.py b/python/pyabacus/src/pyabacus/hsolver/_hsolver.py index 5d2b11d99a..7c60ce538c 100644 --- a/python/pyabacus/src/pyabacus/hsolver/_hsolver.py +++ b/python/pyabacus/src/pyabacus/hsolver/_hsolver.py @@ -34,7 +34,9 @@ def dav_subspace( max_iter: int = 1000, need_subspace: bool = False, diag_ethr: Union[List[float], None] = None, - scf_type: bool = False + scf_type: bool = False, + diag_subspace: int = 0, + nb2d: int = 0 ) -> Tuple[NDArray[np.float64], NDArray[np.complex128]]: """ A function to diagonalize a matrix using the Davidson-Subspace method. @@ -67,6 +69,11 @@ def dav_subspace( If True, the initial precision of eigenvalue calculation can be coarse. If False, it indicates a non-self-consistent field (non-SCF) calculation, where high precision in eigenvalue calculation is required from the start. + diag_subspace : int, optional + The method to do the diagonalization, by default 0. + 0: LAPACK, 1: Gen-elpa, 2: Scalapack + nb2d : int, optional + The block size for 2D decomposition, by default 0, which will be automatically set. Returns ------- @@ -101,7 +108,9 @@ def dav_subspace( need_subspace, diag_ethr, scf_type, - comm_info + comm_info, + diag_subspace, + nb2d ) e = _diago_obj_dav_subspace.get_eigenvalue() diff --git a/source/module_hsolver/test/CMakeLists.txt b/source/module_hsolver/test/CMakeLists.txt index fd93894e3e..fdb447a09d 100644 --- a/source/module_hsolver/test/CMakeLists.txt +++ b/source/module_hsolver/test/CMakeLists.txt @@ -157,7 +157,7 @@ install(FILES parallel_k2d_test.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) AddTest( TARGET hsolver_diago_hs_parallel LIBS parameter ${math_libs} ELPA::ELPA base device MPI::MPI_CXX genelpa psi - SOURCES test_diago_hs_para.cpp ../diag_hs_para.cpp ../diago_pxxxgvx.cpp ../../module_basis/module_ao/parallel_2d.cpp ../diago_elpa.cpp ../diago_scalapack.cpp + SOURCES test_diago_hs_para.cpp ../diag_hs_para.cpp ../diago_pxxxgvx.cpp ../diago_elpa.cpp ../diago_scalapack.cpp ) find_program(BASH bash) From f4d1ea869f009ce63eccf46cf46c7c904e417ae6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci-lite[bot]" <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com> Date: Fri, 13 Dec 2024 10:14:19 +0000 Subject: [PATCH 10/12] [pre-commit.ci lite] apply automatic fixes --- source/module_hsolver/test/test_diago_hs_para.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/module_hsolver/test/test_diago_hs_para.cpp b/source/module_hsolver/test/test_diago_hs_para.cpp index 6115d930c2..0196622049 100644 --- a/source/module_hsolver/test/test_diago_hs_para.cpp +++ b/source/module_hsolver/test/test_diago_hs_para.cpp @@ -180,7 +180,8 @@ void test_performance(int lda, int nb, int nbands, MPI_Comm comm,int case_numb, std::vector time_scalap(case_numb, 0); std::vector time_lapack(case_numb, 0); - if (my_rank == 0) std::cout << "Random matrix "; + if (my_rank == 0) { std::cout << "Random matrix "; +} for (int randomi = 0; randomi < case_numb; ++randomi) { From 124cdf35b9fd34d7d514824bf0016ab698c1be8c Mon Sep 17 00:00:00 2001 From: root Date: Sat, 14 Dec 2024 10:00:19 +0800 Subject: [PATCH 11/12] fix doc --- docs/advanced/input_files/input-main.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md index 2e1ebea719..bfa5879899 100644 --- a/docs/advanced/input_files/input-main.md +++ b/docs/advanced/input_files/input-main.md @@ -39,7 +39,7 @@ - [pw\_diag\_thr](#pw_diag_thr) - [pw\_diag\_nmax](#pw_diag_nmax) - [pw\_diag\_ndim](#pw_diag_ndim) - - [diag\_subspace\_method](#diag_subspace) + - [diag\_subspace](#diag_subspace) - [erf\_ecut](#erf_ecut) - [fft\_mode](#fft_mode) - [erf\_height](#erf_height) From 9825563c76beb5aa4eb7ab7359059c9730a48f5c Mon Sep 17 00:00:00 2001 From: root Date: Wed, 18 Dec 2024 23:09:07 +0800 Subject: [PATCH 12/12] update the doc --- docs/advanced/input_files/input-main.md | 24 +++++++++++-------- source/module_hsolver/diag_hs_para.cpp | 10 ++++---- source/module_hsolver/diag_hs_para.h | 2 +- source/module_hsolver/diago_dav_subspace.cpp | 2 +- .../test/test_diago_hs_para.cpp | 6 ++--- .../module_hsolver/test/test_hsolver_pw.cpp | 12 +++++----- .../module_hsolver/test/test_hsolver_sdft.cpp | 12 +++++----- 7 files changed, 36 insertions(+), 32 deletions(-) diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md index bfa5879899..3d8f82f98d 100644 --- a/docs/advanced/input_files/input-main.md +++ b/docs/advanced/input_files/input-main.md @@ -21,6 +21,7 @@ - [kspacing](#kspacing) - [min\_dist\_coef](#min_dist_coef) - [device](#device) + - [nb2d](#nb2d) - [precision](#precision) - [Variables related to input files](#variables-related-to-input-files) - [stru\_file](#stru_file) @@ -45,7 +46,6 @@ - [erf\_height](#erf_height) - [erf\_sigma](#erf_sigma) - [Numerical atomic orbitals related variables](#numerical-atomic-orbitals-related-variables) - - [nb2d](#nb2d) - [lmaxmax](#lmaxmax) - [lcao\_ecut](#lcao_ecut) - [lcao\_dk](#lcao_dk) @@ -667,6 +667,19 @@ If only one value is set (such as `kspacing 0.5`), then kspacing values of a/b/c - cg/bpcg/dav ks_solver: required by the `single` precision options - **Default**: double +### nb2d + +- **Type**: Integer +- **Description**: When using elpa or scalapack to solver the eigenvalue problem, the data should be distributed by the two-dimensional block-cyclic distribution. This paramter specifies the size of the block. It is valid for: + - [ks_solver](#ks_solver) is genelpa or scalapack_gvx. If nb2d is set to 0, then it will be automatically set in the program according to the size of atomic orbital basis: + - if size <= 500: nb2d = 1 + - if 500 < size <= 1000: nb2d = 32 + - if size > 1000: nb2d = 64; + - [ks_solver](#ks_solver) is dav_subspace, and [diag_subspace](#diag_subspace) is 1 or 2. It is the block size for the diagonization of subspace. If it is set to 0, then it will be automatically set in the program according to the number of band: + - if number of band > 500: nb2d = 32 + - if number of band < 500: nb2d = 16 +- **Default**: 0 + [back to top](#full-list-of-input-keywords) ## Variables related to input files @@ -842,15 +855,6 @@ These variables are used to control the plane wave related parameters. These variables are used to control the numerical atomic orbitals related parameters. -### nb2d - -- **Type**: Integer -- **Description**: In LCAO calculations, we arrange the total number of processors in an 2D array, so that we can partition the wavefunction matrix (number of bands*total size of atomic orbital basis) and distribute them in this 2D array. When the system is large, we group processors into sizes of nb2d, so that multiple processors take care of one row block (a group of atomic orbitals) in the wavefunction matrix. If set to 0, nb2d will be automatically set in the program according to the size of atomic orbital basis: - - if size <= 500 : nb2d = 1 - - if 500 < size <= 1000 : nb2d = 32 - - if size > 1000 : nb2d = 64; -- **Default**: 0 - ### lmaxmax - **Type**: Integer diff --git a/source/module_hsolver/diag_hs_para.cpp b/source/module_hsolver/diag_hs_para.cpp index 2722a02c07..43d25a3112 100644 --- a/source/module_hsolver/diag_hs_para.cpp +++ b/source/module_hsolver/diag_hs_para.cpp @@ -71,7 +71,7 @@ void elpa_diag(MPI_Comm comm, #ifdef __MPI template -void Diago_HS_para(T* h, +void diago_hs_para(T* h, T* s, const int lda, const int nband, @@ -160,7 +160,7 @@ void Diago_HS_para(T* h, } // template instantiation -template void Diago_HS_para(double* h, +template void diago_hs_para(double* h, double* s, const int lda, const int nband, @@ -170,7 +170,7 @@ template void Diago_HS_para(double* h, const int diag_subspace, const int block_size); -template void Diago_HS_para>(std::complex* h, +template void diago_hs_para>(std::complex* h, std::complex* s, const int lda, const int nband, @@ -180,7 +180,7 @@ template void Diago_HS_para>(std::complex* h, const int diag_subspace, const int block_size); -template void Diago_HS_para(float* h, +template void diago_hs_para(float* h, float* s, const int lda, const int nband, @@ -190,7 +190,7 @@ template void Diago_HS_para(float* h, const int diag_subspace, const int block_size); -template void Diago_HS_para>(std::complex* h, +template void diago_hs_para>(std::complex* h, std::complex* s, const int lda, const int nband, diff --git a/source/module_hsolver/diag_hs_para.h b/source/module_hsolver/diag_hs_para.h index 222adca0f5..659f3b0781 100644 --- a/source/module_hsolver/diag_hs_para.h +++ b/source/module_hsolver/diag_hs_para.h @@ -31,7 +31,7 @@ namespace hsolver * @note 4. block_size should be 0 or a positive integer. If it is 0, then will use a value as large as possible that is allowed */ template -void Diago_HS_para(T* h, +void diago_hs_para(T* h, T* s, const int lda, const int nband, diff --git a/source/module_hsolver/diago_dav_subspace.cpp b/source/module_hsolver/diago_dav_subspace.cpp index b398b52676..82dadcb0d0 100644 --- a/source/module_hsolver/diago_dav_subspace.cpp +++ b/source/module_hsolver/diago_dav_subspace.cpp @@ -638,7 +638,7 @@ void Diago_DavSubspace::diag_zhegvx(const int& nbase, } } } - Diago_HS_para(h_diag.data(), + diago_hs_para(h_diag.data(), s_diag.data(), nbase, nband, diff --git a/source/module_hsolver/test/test_diago_hs_para.cpp b/source/module_hsolver/test/test_diago_hs_para.cpp index 0196622049..51efda5e05 100644 --- a/source/module_hsolver/test/test_diago_hs_para.cpp +++ b/source/module_hsolver/test/test_diago_hs_para.cpp @@ -128,7 +128,7 @@ void test_diago_hs(int lda, int nb, int random_seed, int nbands, int diag_type, wfc.resize(lda * lda); generate_random_hs(lda, random_seed, h_mat, s_mat); } - hsolver::Diago_HS_para(h_mat.data(), s_mat.data(), lda, nbands,ekb.data(), wfc.data(), comm, diag_type, nb); + hsolver::diago_hs_para(h_mat.data(), s_mat.data(), lda, nbands,ekb.data(), wfc.data(), comm, diag_type, nb); // Verify results if (my_rank == 0){ @@ -195,7 +195,7 @@ void test_performance(int lda, int nb, int nbands, MPI_Comm comm,int case_numb, auto start = std::chrono::high_resolution_clock::now(); for (int j=0;j(h_mat.data(), s_mat.data(), lda, nbands,ekb_elpa.data(), wfc.data(), comm, 1, nb); + hsolver::diago_hs_para(h_mat.data(), s_mat.data(), lda, nbands,ekb_elpa.data(), wfc.data(), comm, 1, nb); MPI_Barrier(comm); } MPI_Barrier(comm); @@ -207,7 +207,7 @@ void test_performance(int lda, int nb, int nbands, MPI_Comm comm,int case_numb, start = std::chrono::high_resolution_clock::now(); for (int j=0;j(h_mat.data(), s_mat.data(), lda, nbands,ekb_scalap.data(), wfc.data(), comm, 2, nb); + hsolver::diago_hs_para(h_mat.data(), s_mat.data(), lda, nbands,ekb_scalap.data(), wfc.data(), comm, 2, nb); MPI_Barrier(comm); } MPI_Barrier(comm); diff --git a/source/module_hsolver/test/test_hsolver_pw.cpp b/source/module_hsolver/test/test_hsolver_pw.cpp index 87849b3e32..530491a669 100644 --- a/source/module_hsolver/test/test_hsolver_pw.cpp +++ b/source/module_hsolver/test/test_hsolver_pw.cpp @@ -33,10 +33,10 @@ * - lcao_in_pw specific implementation */ -// mock Diago_HS_para +// mock diago_hs_para namespace hsolver { template -void Diago_HS_para(T* h, +void diago_hs_para(T* h, T* s, const int lda, const int nband, @@ -46,7 +46,7 @@ void Diago_HS_para(T* h, const int diag_subspace, const int block_size = 0) {} -template void Diago_HS_para(double* h, +template void diago_hs_para(double* h, double* s, const int lda, const int nband, @@ -56,7 +56,7 @@ template void Diago_HS_para(double* h, const int diag_subspace, const int block_size); -template void Diago_HS_para>(std::complex* h, +template void diago_hs_para>(std::complex* h, std::complex* s, const int lda, const int nband, @@ -66,7 +66,7 @@ template void Diago_HS_para>(std::complex* h, const int diag_subspace, const int block_size); -template void Diago_HS_para(float* h, +template void diago_hs_para(float* h, float* s, const int lda, const int nband, @@ -76,7 +76,7 @@ template void Diago_HS_para(float* h, const int diag_subspace, const int block_size); -template void Diago_HS_para>(std::complex* h, +template void diago_hs_para>(std::complex* h, std::complex* s, const int lda, const int nband, diff --git a/source/module_hsolver/test/test_hsolver_sdft.cpp b/source/module_hsolver/test/test_hsolver_sdft.cpp index 3d60f21c4d..274a619993 100644 --- a/source/module_hsolver/test/test_hsolver_sdft.cpp +++ b/source/module_hsolver/test/test_hsolver_sdft.cpp @@ -23,10 +23,10 @@ Sto_Func::Sto_Func() } template class Sto_Func; -// mock Diago_HS_para +// mock diago_hs_para namespace hsolver { template -void Diago_HS_para(T* h, +void diago_hs_para(T* h, T* s, const int lda, const int nband, @@ -36,7 +36,7 @@ void Diago_HS_para(T* h, const int diag_subspace, const int block_size = 0) {} -template void Diago_HS_para(double* h, +template void diago_hs_para(double* h, double* s, const int lda, const int nband, @@ -45,7 +45,7 @@ template void Diago_HS_para(double* h, const MPI_Comm& comm, const int diag_subspace, const int block_size); -template void Diago_HS_para>(std::complex* h, +template void diago_hs_para>(std::complex* h, std::complex* s, const int lda, const int nband, @@ -54,7 +54,7 @@ template void Diago_HS_para>(std::complex* h, const MPI_Comm& comm, const int diag_subspace, const int block_size); -template void Diago_HS_para(float* h, +template void diago_hs_para(float* h, float* s, const int lda, const int nband, @@ -63,7 +63,7 @@ template void Diago_HS_para(float* h, const MPI_Comm& comm, const int diag_subspace, const int block_size); -template void Diago_HS_para>(std::complex* h, +template void diago_hs_para>(std::complex* h, std::complex* s, const int lda, const int nband,