diff --git a/source/source_hsolver/diago_dav_subspace.cpp b/source/source_hsolver/diago_dav_subspace.cpp index 6e4d31403c..c180155713 100644 --- a/source/source_hsolver/diago_dav_subspace.cpp +++ b/source/source_hsolver/diago_dav_subspace.cpp @@ -542,7 +542,7 @@ void Diago_DavSubspace::diag_zhegvx(const int& nbase, if (this->diag_comm.rank == 0) { base_device::memory::synchronize_memory_op()(this->d_scc, scc, nbase * this->nbase_x); - dngvd_op()(this->ctx, nbase, this->nbase_x, this->hcc, this->d_scc, this->d_eigenvalue, this->vcc); + hegvd_op()(this->ctx, nbase, this->nbase_x, this->hcc, this->d_scc, this->d_eigenvalue, this->vcc); syncmem_var_d2h_op()((*eigenvalue_iter).data(), this->d_eigenvalue, this->nbase_x); } #endif @@ -564,7 +564,7 @@ void Diago_DavSubspace::diag_zhegvx(const int& nbase, s_diag[i][j] = scc[i * this->nbase_x + j]; } } - dngvx_op()(this->ctx, + hegvx_op()(this->ctx, nbase, this->nbase_x, this->hcc, diff --git a/source/source_hsolver/diago_david.cpp b/source/source_hsolver/diago_david.cpp index ef7dd07423..0fec378e14 100644 --- a/source/source_hsolver/diago_david.cpp +++ b/source/source_hsolver/diago_david.cpp @@ -622,7 +622,7 @@ void DiagoDavid::diag_zhegvx(const int& nbase, resmem_var_op()(eigenvalue_gpu, nbase_x); syncmem_var_h2d_op()(eigenvalue_gpu, this->eigenvalue, nbase_x); - dnevx_op()(this->ctx, nbase, nbase_x, hcc, nband, eigenvalue_gpu, vcc); + heevx_op()(this->ctx, nbase, nbase_x, hcc, nband, eigenvalue_gpu, vcc); syncmem_var_d2h_op()(this->eigenvalue, eigenvalue_gpu, nbase_x); delmem_var_op()(eigenvalue_gpu); @@ -630,7 +630,7 @@ void DiagoDavid::diag_zhegvx(const int& nbase, } else { - dnevx_op()(this->ctx, nbase, nbase_x, hcc, nband, this->eigenvalue, vcc); + heevx_op()(this->ctx, nbase, nbase_x, hcc, nband, this->eigenvalue, vcc); } } diff --git a/source/source_hsolver/diago_iter_assist.cpp b/source/source_hsolver/diago_iter_assist.cpp index 916ea0d3fc..419b8b2b65 100644 --- a/source/source_hsolver/diago_iter_assist.cpp +++ b/source/source_hsolver/diago_iter_assist.cpp @@ -372,7 +372,7 @@ void DiagoIterAssist::diagH_LAPACK(const int nstart, resmem_var_op()(eigenvalues, nstart); setmem_var_op()(eigenvalues, 0, nstart); - dngvd_op()(ctx, nstart, ldh, hcc, scc, eigenvalues, vcc); + hegvd_op()(ctx, nstart, ldh, hcc, scc, eigenvalues, vcc); if (base_device::get_device_type(ctx) == base_device::GpuDevice) { diff --git a/source/source_hsolver/kernels/cuda/dngvd_op.cu b/source/source_hsolver/kernels/cuda/dngvd_op.cu index 5c1ef8ccab..232c769b2c 100644 --- a/source/source_hsolver/kernels/cuda/dngvd_op.cu +++ b/source/source_hsolver/kernels/cuda/dngvd_op.cu @@ -205,7 +205,7 @@ void xheevd_wrapper ( } template -struct dngvd_op +struct hegvd_op { using Real = typename GetTypeReal::type; void operator()(const base_device::DEVICE_GPU* d, @@ -225,7 +225,7 @@ struct dngvd_op }; template -struct dnevx_op +struct heevx_op { using Real = typename GetTypeReal::type; void operator()(const base_device::DEVICE_GPU* d, @@ -244,7 +244,7 @@ struct dnevx_op }; template -struct dngvx_op +struct hegvx_op { using Real = typename GetTypeReal::type; void operator()(const base_device::DEVICE_GPU* d, @@ -260,18 +260,18 @@ struct dngvx_op } }; -template struct dngvd_op, base_device::DEVICE_GPU>; -template struct dnevx_op, base_device::DEVICE_GPU>; -template struct dngvx_op, base_device::DEVICE_GPU>; +template struct hegvd_op, base_device::DEVICE_GPU>; +template struct heevx_op, base_device::DEVICE_GPU>; +template struct hegvx_op, base_device::DEVICE_GPU>; -template struct dngvd_op, base_device::DEVICE_GPU>; -template struct dnevx_op, base_device::DEVICE_GPU>; -template struct dngvx_op, base_device::DEVICE_GPU>; +template struct hegvd_op, base_device::DEVICE_GPU>; +template struct heevx_op, base_device::DEVICE_GPU>; +template struct hegvx_op, base_device::DEVICE_GPU>; #ifdef __LCAO -template struct dngvd_op; -template struct dnevx_op; -template struct dngvx_op; +template struct hegvd_op; +template struct heevx_op; +template struct hegvx_op; #endif } // namespace hsolver \ No newline at end of file diff --git a/source/source_hsolver/kernels/dngvd_op.cpp b/source/source_hsolver/kernels/dngvd_op.cpp index 66cb3c1233..0a700a89d9 100644 --- a/source/source_hsolver/kernels/dngvd_op.cpp +++ b/source/source_hsolver/kernels/dngvd_op.cpp @@ -6,9 +6,9 @@ namespace hsolver { - +// hegvd and sygvd; dn for dense? template -struct dngvd_op +struct hegvd_op { using Real = typename GetTypeReal::type; void operator()(const base_device::DEVICE_CPU* d, @@ -83,7 +83,7 @@ struct dngvd_op }; template -struct dngv_op +struct hegv_op { using Real = typename GetTypeReal::type; void operator()(const base_device::DEVICE_CPU* d, @@ -139,8 +139,16 @@ struct dngv_op } }; +// heevx and syevx +/** + * @brief heevx computes the first m eigenvalues and their corresponding eigenvectors of + * a complex generalized Hermitian-definite eigenproblem. + * + * both heevx and syevx are implemented through the `evx` interface of LAPACK. + * wrapped in LapackWrapper::xheevx + */ template -struct dnevx_op +struct heevx_op { using Real = typename GetTypeReal::type; void operator()(const base_device::DEVICE_CPU* /*ctx*/, @@ -235,7 +243,7 @@ struct dnevx_op }; template -struct dngvx_op +struct hegvx_op { using Real = typename GetTypeReal::type; void operator()(const base_device::DEVICE_CPU* d, @@ -321,21 +329,21 @@ struct dngvx_op } }; -template struct dngvd_op, base_device::DEVICE_CPU>; -template struct dngvd_op, base_device::DEVICE_CPU>; +template struct hegvd_op, base_device::DEVICE_CPU>; +template struct hegvd_op, base_device::DEVICE_CPU>; -template struct dnevx_op, base_device::DEVICE_CPU>; -template struct dnevx_op, base_device::DEVICE_CPU>; +template struct heevx_op, base_device::DEVICE_CPU>; +template struct heevx_op, base_device::DEVICE_CPU>; -template struct dngvx_op, base_device::DEVICE_CPU>; -template struct dngvx_op, base_device::DEVICE_CPU>; +template struct hegvx_op, base_device::DEVICE_CPU>; +template struct hegvx_op, base_device::DEVICE_CPU>; -template struct dngv_op, base_device::DEVICE_CPU>; -template struct dngv_op, base_device::DEVICE_CPU>; +template struct hegv_op, base_device::DEVICE_CPU>; +template struct hegv_op, base_device::DEVICE_CPU>; #ifdef __LCAO -template struct dngvd_op; -template struct dnevx_op; -template struct dngvx_op; -template struct dngv_op; +template struct hegvd_op; +template struct heevx_op; +template struct hegvx_op; +template struct hegv_op; #endif } // namespace hsolver \ No newline at end of file diff --git a/source/source_hsolver/kernels/dngvd_op.h b/source/source_hsolver/kernels/dngvd_op.h index c48cd576b5..bfd59ebdf2 100644 --- a/source/source_hsolver/kernels/dngvd_op.h +++ b/source/source_hsolver/kernels/dngvd_op.h @@ -1,5 +1,16 @@ // TODO: This is a temperary location for these functions. // And will be moved to a global module(module base) later. + +// DeNse Generalized eigenValue eXtended +// he stands for Hermitian +// sy stands for Symmetric +// gv stands for Generalized eigenValue problem +// ev stands for EigenValues +// dn stands for dense, maybe, who knows? +// x stands for compute a subset of the eigenvalues and, optionally, +// their corresponding eigenvectors +// d for all, x for selected + #ifndef MODULE_HSOLVER_DNGVD_H #define MODULE_HSOLVER_DNGVD_H @@ -21,10 +32,10 @@ inline float get_real(const float &x) { return x; } template -struct dngvd_op +struct hegvd_op { using Real = typename GetTypeReal::type; - /// @brief DNGVD computes all the eigenvalues and eigenvectors of a complex generalized + /// @brief HEGVD computes all the eigenvalues and eigenvectors of a complex generalized /// Hermitian-definite eigenproblem. If eigenvectors are desired, it uses a divide and conquer algorithm. /// /// In this op, the CPU version is implemented through the `gvd` interface, and the CUDA version @@ -47,10 +58,10 @@ struct dngvd_op }; template -struct dngv_op +struct hegv_op { using Real = typename GetTypeReal::type; - /// @brief DNGVX computes first m eigenvalues and eigenvectors of a complex generalized + /// @brief HEGV computes first m eigenvalues and eigenvectors of a complex generalized /// Input Parameters /// @param d : the type of device /// @param nbase : the number of dim of the matrix @@ -64,10 +75,10 @@ struct dngv_op }; template -struct dngvx_op +struct hegvx_op { using Real = typename GetTypeReal::type; - /// @brief DNGVX computes first m eigenvalues and eigenvectors of a complex generalized + /// @brief HEGVX computes first m eigenvalues and eigenvectors of a complex generalized /// Input Parameters /// @param d : the type of device /// @param nbase : the number of dim of the matrix @@ -82,10 +93,10 @@ struct dngvx_op }; template -struct dnevx_op +struct heevx_op { using Real = typename GetTypeReal::type; - /// @brief DNEVX computes the first m eigenvalues and their corresponding eigenvectors of + /// @brief heevx computes the first m eigenvalues and their corresponding eigenvectors of /// a complex generalized Hermitian-definite eigenproblem /// /// In this op, the CPU version is implemented through the `evx` interface, and the CUDA version diff --git a/source/source_hsolver/kernels/rocm/dngvd_op.hip.cu b/source/source_hsolver/kernels/rocm/dngvd_op.hip.cu index 6730e0f936..2b17394b60 100644 --- a/source/source_hsolver/kernels/rocm/dngvd_op.hip.cu +++ b/source/source_hsolver/kernels/rocm/dngvd_op.hip.cu @@ -5,7 +5,7 @@ namespace hsolver { -// NOTE: mimicked from ../cuda/dngvd_op.cu for three dngvd_op +// NOTE: mimicked from ../cuda/dngvd_op.cu for three hegvd_op static hipsolverHandle_t hipsolver_H = nullptr; // Test on DCU platform. When nstart is greater than 234, code on DCU performs better. @@ -28,7 +28,7 @@ void destroyGpuSolverHandle() { #ifdef __LCAO template <> -void dngvd_op::operator()(const base_device::DEVICE_GPU* ctx, +void hegvd_op::operator()(const base_device::DEVICE_GPU* ctx, const int nstart, const int ldh, const double* _hcc, @@ -36,7 +36,7 @@ void dngvd_op::operator()(const base_device::DE double* _eigenvalue, double* _vcc) { - // copied from ../cuda/dngvd_op.cu, "dngvd_op" + // copied from ../cuda/dngvd_op.cu, "hegvd_op" assert(nstart == ldh); if (nstart > N_DCU){ @@ -86,7 +86,7 @@ void dngvd_op::operator()(const base_device::DE hipErrcheck(hipMemcpy(hcc.data(), _hcc, sizeof(double) * hcc.size(), hipMemcpyDeviceToHost)); hipErrcheck(hipMemcpy(scc.data(), _scc, sizeof(double) * scc.size(), hipMemcpyDeviceToHost)); base_device::DEVICE_CPU* cpu_ctx = {}; - dngvd_op()(cpu_ctx, + hegvd_op()(cpu_ctx, nstart, ldh, hcc.data(), @@ -102,7 +102,7 @@ void dngvd_op::operator()(const base_device::DE #endif // __LCAO template <> -void dngvd_op, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* ctx, +void hegvd_op, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* ctx, const int nstart, const int ldh, const std::complex* _hcc, @@ -110,7 +110,7 @@ void dngvd_op, base_device::DEVICE_GPU>::operator()(const ba float* _eigenvalue, std::complex* _vcc) { - // copied from ../cuda/dngvd_op.cu, "dngvd_op" + // copied from ../cuda/dngvd_op.cu, "hegvd_op" assert(nstart == ldh); if (nstart > N_DCU){ @@ -159,7 +159,7 @@ void dngvd_op, base_device::DEVICE_GPU>::operator()(const ba hipErrcheck(hipMemcpy(hcc.data(), _hcc, sizeof(std::complex) * hcc.size(), hipMemcpyDeviceToHost)); hipErrcheck(hipMemcpy(scc.data(), _scc, sizeof(std::complex) * scc.size(), hipMemcpyDeviceToHost)); base_device::DEVICE_CPU* cpu_ctx = {}; - dngvd_op, base_device::DEVICE_CPU>()(cpu_ctx, + hegvd_op, base_device::DEVICE_CPU>()(cpu_ctx, nstart, ldh, hcc.data(), @@ -174,7 +174,7 @@ void dngvd_op, base_device::DEVICE_GPU>::operator()(const ba } template <> -void dngvd_op, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* ctx, +void hegvd_op, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* ctx, const int nstart, const int ldh, const std::complex* _hcc, @@ -183,7 +183,7 @@ void dngvd_op, base_device::DEVICE_GPU>::operator()(const b std::complex* _vcc ) { - // copied from ../cuda/dngvd_op.cu, "dngvd_op" + // copied from ../cuda/dngvd_op.cu, "hegvd_op" // assert(nstart == ldh); // save a copy of scc in case the diagonalization fails @@ -237,7 +237,7 @@ void dngvd_op, base_device::DEVICE_GPU>::operator()(const b hipErrcheck(hipMemcpy(hcc.data(), _hcc, sizeof(std::complex) * hcc.size(), hipMemcpyDeviceToHost)); hipErrcheck(hipMemcpy(scc.data(), _scc, sizeof(std::complex) * scc.size(), hipMemcpyDeviceToHost)); base_device::DEVICE_CPU* cpu_ctx = {}; - dngvd_op, base_device::DEVICE_CPU>()(cpu_ctx, + hegvd_op, base_device::DEVICE_CPU>()(cpu_ctx, nstart, ldh, hcc.data(), @@ -258,7 +258,7 @@ void dngvd_op, base_device::DEVICE_GPU>::operator()(const b #ifdef __LCAO template <> -void dnevx_op::operator()(const base_device::DEVICE_GPU* ctx, +void heevx_op::operator()(const base_device::DEVICE_GPU* ctx, const int nstart, const int ldh, const double* _hcc, @@ -271,14 +271,14 @@ void dnevx_op::operator()(const base_device::DE std::vector eigenvalue(ldh, 0); hipErrcheck(hipMemcpy(hcc.data(), _hcc, sizeof(double) * hcc.size(), hipMemcpyDeviceToHost)); base_device::DEVICE_CPU* cpu_ctx = {}; - dnevx_op()(cpu_ctx, nstart, ldh, hcc.data(), m, eigenvalue.data(), vcc.data()); + heevx_op()(cpu_ctx, nstart, ldh, hcc.data(), m, eigenvalue.data(), vcc.data()); hipErrcheck(hipMemcpy(_vcc, vcc.data(), sizeof(double) * vcc.size(), hipMemcpyHostToDevice)); hipErrcheck(hipMemcpy(_eigenvalue, eigenvalue.data(), sizeof(double) * eigenvalue.size(), hipMemcpyHostToDevice)); } #endif // __LCAO template <> -void dnevx_op, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* ctx, +void heevx_op, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* ctx, const int nstart, const int ldh, const std::complex* _hcc, @@ -291,7 +291,7 @@ void dnevx_op, base_device::DEVICE_GPU>::operator()(const ba std::vector eigenvalue(ldh, 0); hipErrcheck(hipMemcpy(hcc.data(), _hcc, sizeof(std::complex) * hcc.size(), hipMemcpyDeviceToHost)); base_device::DEVICE_CPU* cpu_ctx = {}; - dnevx_op, base_device::DEVICE_CPU>()(cpu_ctx, + heevx_op, base_device::DEVICE_CPU>()(cpu_ctx, nstart, ldh, hcc.data(), @@ -303,7 +303,7 @@ void dnevx_op, base_device::DEVICE_GPU>::operator()(const ba } template <> -void dnevx_op, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* ctx, +void heevx_op, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* ctx, const int nstart, const int ldh, const std::complex* _hcc, @@ -316,7 +316,7 @@ void dnevx_op, base_device::DEVICE_GPU>::operator()(const b std::vector eigenvalue(ldh, 0); hipErrcheck(hipMemcpy(hcc.data(), _hcc, sizeof(std::complex) * hcc.size(), hipMemcpyDeviceToHost)); base_device::DEVICE_CPU* cpu_ctx = {}; - dnevx_op, base_device::DEVICE_CPU>()(cpu_ctx, + heevx_op, base_device::DEVICE_CPU>()(cpu_ctx, nstart, ldh, hcc.data(), @@ -328,7 +328,7 @@ void dnevx_op, base_device::DEVICE_GPU>::operator()(const b } template <> -void dngvx_op, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* d, +void hegvx_op, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* d, const int nbase, const int ldh, std::complex* hcc, @@ -340,7 +340,7 @@ void dngvx_op, base_device::DEVICE_GPU>::operator()(const ba } template <> -void dngvx_op, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* d, +void hegvx_op, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* d, const int nbase, const int ldh, std::complex* hcc, @@ -353,7 +353,7 @@ void dngvx_op, base_device::DEVICE_GPU>::operator()(const b #ifdef __LCAO template <> -void dngvx_op::operator()(const base_device::DEVICE_GPU* d, +void hegvx_op::operator()(const base_device::DEVICE_GPU* d, const int nbase, const int ldh, double* hcc, diff --git a/source/source_hsolver/test/test_diago_hs_para.cpp b/source/source_hsolver/test/test_diago_hs_para.cpp index ad7d05c716..a751d72d80 100644 --- a/source/source_hsolver/test/test_diago_hs_para.cpp +++ b/source/source_hsolver/test/test_diago_hs_para.cpp @@ -231,7 +231,7 @@ void test_performance(int lda, int nb, int nbands, MPI_Comm comm,int case_numb, { h_tmp = h_mat; s_tmp = s_mat; - hsolver::dngvx_op()(ctx, + hsolver::hegvx_op()(ctx, lda, lda, h_tmp.data(),