From a8bb37faa2be278eef4b9d14792600d121444d6a Mon Sep 17 00:00:00 2001 From: maki49 <1579492865@qq.com> Date: Thu, 14 Nov 2024 13:40:18 +0800 Subject: [PATCH 1/6] extract precondition function --- .../src/hsolver/py_diago_dav_subspace.hpp | 3 +- source/module_hsolver/diago_dav_subspace.cpp | 51 +--------- source/module_hsolver/diago_dav_subspace.h | 9 +- source/module_hsolver/hsolver_pw.cpp | 3 +- source/module_hsolver/precondition_funcs.h | 96 +++++++++++++++++++ source/module_lr/hsolver_lrtd.hpp | 4 +- tests/integrate/291_NO_KP_LR/result.ref | 2 +- 7 files changed, 113 insertions(+), 55 deletions(-) create mode 100644 source/module_hsolver/precondition_funcs.h diff --git a/python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp b/python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp index 8ef539902e..c0fe7b1850 100644 --- a/python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp +++ b/python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp @@ -130,8 +130,9 @@ class PyDiagoDavSubspace std::copy(hpsi_ptr, hpsi_ptr + nvec * ld_psi, hpsi_out); }; + hsolver::PreOP> pre_op(precond_vec, hsolver::transfunc::qe_pw); obj = std::make_unique, base_device::DEVICE_CPU>>( - precond_vec, + hsolver::bind_pre_op(pre_op), nband, nbasis, dav_ndim, diff --git a/source/module_hsolver/diago_dav_subspace.cpp b/source/module_hsolver/diago_dav_subspace.cpp index d11d7093f1..ccaa4ad44e 100644 --- a/source/module_hsolver/diago_dav_subspace.cpp +++ b/source/module_hsolver/diago_dav_subspace.cpp @@ -13,7 +13,7 @@ using namespace hsolver; template -Diago_DavSubspace::Diago_DavSubspace(const std::vector& precondition_in, +Diago_DavSubspace::Diago_DavSubspace(PreFunc&& precondition_in, const int& nband_in, const int& nbasis_in, const int& david_ndim_in, @@ -21,8 +21,8 @@ Diago_DavSubspace::Diago_DavSubspace(const std::vector& precond const int& diag_nmax_in, const bool& need_subspace_in, const diag_comm_info& diag_comm_in) - : precondition(precondition_in), n_band(nband_in), dim(nbasis_in), nbase_x(nband_in * david_ndim_in), - diag_thr(diag_thr_in), iter_nmax(diag_nmax_in), is_subspace(need_subspace_in), diag_comm(diag_comm_in) + : precondition(std::forward>(precondition_in)), n_band(nband_in), dim(nbasis_in), nbase_x(nband_in* david_ndim_in), + diag_thr(diag_thr_in), iter_nmax(diag_nmax_in), is_subspace(need_subspace_in), diag_comm(diag_comm_in) { this->device = base_device::get_device_type(this->ctx); @@ -55,14 +55,6 @@ Diago_DavSubspace::Diago_DavSubspace(const std::vector& precond resmem_complex_op()(this->ctx, this->vcc, this->nbase_x * this->nbase_x, "DAV::vcc"); setmem_complex_op()(this->ctx, this->vcc, 0, this->nbase_x * this->nbase_x); //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -#if defined(__CUDA) || defined(__ROCM) - if (this->device == base_device::GpuDevice) - { - resmem_real_op()(this->ctx, this->d_precondition, nbasis_in); - // syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, this->d_precondition, this->precondition.data(), nbasis_in); - } -#endif } template @@ -74,13 +66,6 @@ Diago_DavSubspace::~Diago_DavSubspace() delmem_complex_op()(this->ctx, this->hcc); delmem_complex_op()(this->ctx, this->scc); delmem_complex_op()(this->ctx, this->vcc); - -#if defined(__CUDA) || defined(__ROCM) - if (this->device == base_device::GpuDevice) - { - delmem_real_op()(this->ctx, this->d_precondition); - } -#endif } template @@ -334,35 +319,7 @@ void Diago_DavSubspace::cal_grad(const HPsiFunc& hpsi_func, this->dim); // "precondition!!!" - std::vector pre(this->dim, 0.0); - for (int m = 0; m < notconv; m++) - { - for (size_t i = 0; i < this->dim; i++) - { - // pre[i] = std::abs(this->precondition[i] - (*eigenvalue_iter)[m]); - double x = std::abs(this->precondition[i] - (*eigenvalue_iter)[m]); - pre[i] = 0.5 * (1.0 + x + sqrt(1 + (x - 1.0) * (x - 1.0))); - } -#if defined(__CUDA) || defined(__ROCM) - if (this->device == base_device::GpuDevice) - { - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, this->d_precondition, pre.data(), this->dim); - vector_div_vector_op()(this->ctx, - this->dim, - psi_iter + (nbase + m) * this->dim, - psi_iter + (nbase + m) * this->dim, - this->d_precondition); - } - else -#endif - { - vector_div_vector_op()(this->ctx, - this->dim, - psi_iter + (nbase + m) * this->dim, - psi_iter + (nbase + m) * this->dim, - pre.data()); - } - } + this->precondition(psi_iter + nbase * this->dim, eigenvalue_iter->data(), this->dim, notconv); // "normalize!!!" in order to improve numerical stability of subspace diagonalization std::vector psi_norm(notconv, 0.0); diff --git a/source/module_hsolver/diago_dav_subspace.h b/source/module_hsolver/diago_dav_subspace.h index d93c252602..38a28b2856 100644 --- a/source/module_hsolver/diago_dav_subspace.h +++ b/source/module_hsolver/diago_dav_subspace.h @@ -10,6 +10,7 @@ #include #include +#include "module_hsolver/precondition_funcs.h" namespace hsolver { @@ -24,7 +25,7 @@ class Diago_DavSubspace using Real = typename GetTypeReal::type; public: - Diago_DavSubspace(const std::vector& precondition_in, + Diago_DavSubspace(PreFunc&& precondition_in, /// pass in a function, lambda or PreOP object const int& nband_in, const int& nbasis_in, const int& david_ndim_in, @@ -67,9 +68,9 @@ class Diago_DavSubspace /// the maximum dimension of the reduced basis set const int nbase_x = 0; - /// precondition for diag - const std::vector& precondition; - Real* d_precondition = nullptr; + /// The precondition operation, can be a function, lambda or PreOP object + const PreFunc precondition; + // note that lambdas can only passed by value /// record for how many bands not have convergence eigenvalues int notconv = 0; diff --git a/source/module_hsolver/hsolver_pw.cpp b/source/module_hsolver/hsolver_pw.cpp index e9d64e8455..c3afee204f 100644 --- a/source/module_hsolver/hsolver_pw.cpp +++ b/source/module_hsolver/hsolver_pw.cpp @@ -503,7 +503,8 @@ void HSolverPW::hamiltSolvePsiK(hamilt::Hamilt* hm, }; bool scf = this->calculation_type == "nscf" ? false : true; - Diago_DavSubspace dav_subspace(pre_condition, + PreOP pre_op(pre_condition, transfunc::qe_pw); + Diago_DavSubspace dav_subspace(bind_pre_op(pre_op), psi.get_nbands(), psi.get_k_first() ? psi.get_current_nbas() : psi.get_nk() * psi.get_nbasis(), diff --git a/source/module_hsolver/precondition_funcs.h b/source/module_hsolver/precondition_funcs.h new file mode 100644 index 0000000000..97bb862158 --- /dev/null +++ b/source/module_hsolver/precondition_funcs.h @@ -0,0 +1,96 @@ +#include +#include "module_base/module_device/types.h" +#include "module_hsolver/kernels/math_kernel_op.h" +#include // for debugging +#include +namespace hsolver +{ + /// @brief Transforming a single value, + namespace transfunc + { + template T none(const T& x) { return x; } + template T qe_pw(const T& x) { return 0.5 * (1.0 + x + sqrt(1 + (x - 1.0) * (x - 1.0))); } + } + + template + using Real = typename GetTypeReal::type; + + /// @brief to be called in the iterative eigensolver. + /// fixed parameters: object vector, eigenvalue, leading dimension, number of vectors + template + using PreFunc = const std::function*, const size_t&, const size_t&)>; + // using PreFunc = std::function*, const int&, const int&)>; + + /// type1: Divide transfunc(precon_vec - eigen_subspace[m]) for each vector[m] + ///$X \to (A-\lambda I)^{-1} X$ + // There may be other types of operation than this one. + template + void div_trans_prevec_minus_eigen(T* ptr, const Real* eig, const size_t& dim, const size_t& nvec, + const Real* const pre, Real* const d_pre = nullptr, const std::function(const Real&)>& transfunc = transfunc::none>) + { + using syncmem_var_h2d_op = base_device::memory::synchronize_memory_op, Device, base_device::DEVICE_CPU>; + std::vector> pre_trans(dim, 0.0); + const auto device = base_device::get_device_type({}); + + for (int m = 0; m < nvec; m++) + { + T* const ptr_m = ptr + m * dim; + for (size_t i = 0; i < dim; i++) { pre_trans[i] = transfunc(pre[i] - eig[m]); } + std::cout << std::endl; +#if defined(__CUDA) || defined(__ROCM) + if (device == base_device::GpuDevice) + { + assert(d_pre); + syncmem_var_h2d_op()({}, {}, d_pre, pre_trans.data(), dim); + vector_div_vector_op()({}, dim, ptr_m, ptr_m, d_pre); + } + else +#endif + { + vector_div_vector_op()({}, dim, ptr_m, ptr_m, pre_trans.data()); + } + } + } + + /// @brief A operator-like class of precondition function + /// to encapsulate the pre-allocation of memory on different devices before starting the iterative eigensolver. + /// One can pass the operatr() function of this class, or other custom lambdas/functions to eigensolvers. + template + struct PreOP + { + PreOP(const std::vector>& prevec, const std::function(const Real&)>& transfunc = transfunc::none) + : PreOP(prevec.data(), prevec.size(), transfunc) {} + PreOP(const Real* const prevec, const int& dim, const std::function(const Real&)>& transfunc = transfunc::none) + : prevec_(prevec), dim_(dim), transfunc_(transfunc), + dev_(base_device::get_device_type({})) + { +#if defined(__CUDA) || defined(__ROCM) + if (this->dev_ == base_device::GpuDevice) { resmem_real_op()({}, this->d_prevec_, dim_); } +#endif + } + PreOP(const PreOP& other) = delete; + ~PreOP() { +#if defined(__CUDA) || defined(__ROCM) + if (this->dev_ == base_device::GpuDevice) { delmem_real_op()({}, this->d_precondition); } +#endif + } + void operator()(T* ptr, const Real* eig, const size_t& dim, const size_t& nvec) const + { + assert(dim <= dim_); + div_trans_prevec_minus_eigen(ptr, eig, dim, nvec, prevec_, d_prevec_, transfunc_); + } + private: + const Real* const prevec_; + const int dim_; + Real* d_prevec_; + const std::function(const Real&)> transfunc_; + const base_device::AbacusDevice_t dev_; + }; + + /// @brief Bind a PreOP object to a function + template + PreFunc bind_pre_op(const PreOP& pre_op) + { + return std::bind(&PreOP::operator(), &pre_op, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4); + } +} diff --git a/source/module_lr/hsolver_lrtd.hpp b/source/module_lr/hsolver_lrtd.hpp index 6c8be619eb..9884b41d95 100644 --- a/source/module_lr/hsolver_lrtd.hpp +++ b/source/module_lr/hsolver_lrtd.hpp @@ -73,6 +73,8 @@ namespace LR auto hpsi_func = [&hm](T* psi_in, T* hpsi, const int ld_psi, const int nvec) {hm.hPsi(psi_in, hpsi, ld_psi, nvec);}; auto spsi_func = [&hm](const T* psi_in, T* spsi, const int ld_psi, const int nvec) { std::memcpy(spsi, psi_in, sizeof(T) * ld_psi * nvec); }; + auto pre_func = [&precondition](T* ptr, const Real* eig, const int& ld, const int& nvec)->void + { hsolver::div_trans_prevec_minus_eigen(ptr, eig, ld, nvec, precondition.data()); }; if (method == "dav") { @@ -88,7 +90,7 @@ namespace LR } else if (method == "dav_subspace") //need refactor { - hsolver::Diago_DavSubspace dav_subspace(precondition, + hsolver::Diago_DavSubspace dav_subspace(pre_func, nband, dim, PARAM.inp.pw_diag_ndim, diff --git a/tests/integrate/291_NO_KP_LR/result.ref b/tests/integrate/291_NO_KP_LR/result.ref index 722b3fff61..1fd774b2fd 100644 --- a/tests/integrate/291_NO_KP_LR/result.ref +++ b/tests/integrate/291_NO_KP_LR/result.ref @@ -1 +1 @@ -totexcitationenergyref 0.784274 \ No newline at end of file +totexcitationenergyref 0.786881 \ No newline at end of file From 4f4f356b278b3a44aa3226b44c6f1ee20bfbdc8e Mon Sep 17 00:00:00 2001 From: maki49 <1579492865@qq.com> Date: Thu, 14 Nov 2024 20:53:34 +0800 Subject: [PATCH 2/6] apply to dav --- .../src/hsolver/py_diago_dav_subspace.hpp | 5 +- .../pyabacus/src/hsolver/py_diago_david.hpp | 3 +- source/module_hsolver/diago_dav_subspace.cpp | 4 +- source/module_hsolver/diago_dav_subspace.h | 7 +- source/module_hsolver/diago_david.cpp | 54 +------ source/module_hsolver/diago_david.h | 12 +- source/module_hsolver/hsolver_pw.cpp | 9 +- source/module_hsolver/precondition_funcs.h | 151 +++++++++++------- source/module_hsolver/test/CMakeLists.txt | 10 +- source/module_lr/hsolver_lrtd.hpp | 8 +- 10 files changed, 135 insertions(+), 128 deletions(-) diff --git a/python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp b/python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp index c0fe7b1850..682b14941c 100644 --- a/python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp +++ b/python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp @@ -130,9 +130,10 @@ class PyDiagoDavSubspace std::copy(hpsi_ptr, hpsi_ptr + nvec * ld_psi, hpsi_out); }; - hsolver::PreOP> pre_op(precond_vec, hsolver::transfunc::qe_pw); + hsolver::PreOP, base_device::DEVICE_CPU, hsolver::fvec::DivTransMinusEigKernel, base_device::DEVICE_CPU>> + pre_op(precond_vec, hsolver::fvec::div_trans_prevec_minus_eigen>, hsolver::fval::qe_pw); obj = std::make_unique, base_device::DEVICE_CPU>>( - hsolver::bind_pre_op(pre_op), + pre_op.get(), nband, nbasis, dav_ndim, diff --git a/python/pyabacus/src/hsolver/py_diago_david.hpp b/python/pyabacus/src/hsolver/py_diago_david.hpp index 2008eb6b85..bf2789f32d 100644 --- a/python/pyabacus/src/hsolver/py_diago_david.hpp +++ b/python/pyabacus/src/hsolver/py_diago_david.hpp @@ -137,8 +137,9 @@ class PyDiagoDavid syncmem_op()(this->ctx, this->ctx, spsi_out, psi_in, static_cast(nbands * nrow)); }; + hsolver::PreOP> pre_op(precond_vec); obj = std::make_unique, base_device::DEVICE_CPU>>( - precond_vec.data(), + pre_op.get(), nband, nbasis, dav_ndim, diff --git a/source/module_hsolver/diago_dav_subspace.cpp b/source/module_hsolver/diago_dav_subspace.cpp index ccaa4ad44e..0636e51fb3 100644 --- a/source/module_hsolver/diago_dav_subspace.cpp +++ b/source/module_hsolver/diago_dav_subspace.cpp @@ -13,7 +13,7 @@ using namespace hsolver; template -Diago_DavSubspace::Diago_DavSubspace(PreFunc&& precondition_in, +Diago_DavSubspace::Diago_DavSubspace(PreFunc&& precondition_in, const int& nband_in, const int& nbasis_in, const int& david_ndim_in, @@ -21,7 +21,7 @@ Diago_DavSubspace::Diago_DavSubspace(PreFunc&& precondition_in, const int& diag_nmax_in, const bool& need_subspace_in, const diag_comm_info& diag_comm_in) - : precondition(std::forward>(precondition_in)), n_band(nband_in), dim(nbasis_in), nbase_x(nband_in* david_ndim_in), + : precondition(std::forward(precondition_in)), n_band(nband_in), dim(nbasis_in), nbase_x(nband_in* david_ndim_in), diag_thr(diag_thr_in), iter_nmax(diag_nmax_in), is_subspace(need_subspace_in), diag_comm(diag_comm_in) { this->device = base_device::get_device_type(this->ctx); diff --git a/source/module_hsolver/diago_dav_subspace.h b/source/module_hsolver/diago_dav_subspace.h index 38a28b2856..85488af29b 100644 --- a/source/module_hsolver/diago_dav_subspace.h +++ b/source/module_hsolver/diago_dav_subspace.h @@ -24,8 +24,9 @@ class Diago_DavSubspace // otherwise return the real type of T(complex, complex) using Real = typename GetTypeReal::type; - public: - Diago_DavSubspace(PreFunc&& precondition_in, /// pass in a function, lambda or PreOP object + using PreFunc = fvec::DivTransMinusEig; +public: + Diago_DavSubspace(PreFunc&& precondition_in, /// pass in a function, lambda or PreOP object const int& nband_in, const int& nbasis_in, const int& david_ndim_in, @@ -69,7 +70,7 @@ class Diago_DavSubspace const int nbase_x = 0; /// The precondition operation, can be a function, lambda or PreOP object - const PreFunc precondition; + const PreFunc precondition; // note that lambdas can only passed by value /// record for how many bands not have convergence eigenvalues diff --git a/source/module_hsolver/diago_david.cpp b/source/module_hsolver/diago_david.cpp index 8f404da135..84f56e6090 100644 --- a/source/module_hsolver/diago_david.cpp +++ b/source/module_hsolver/diago_david.cpp @@ -32,16 +32,16 @@ using namespace hsolver; * @note Auxiliary memory is allocated in the constructor and deallocated in the destructor. */ template -DiagoDavid::DiagoDavid(const Real* precondition_in, +DiagoDavid::DiagoDavid(PreFunc&& precondition_in, const int nband_in, const int dim_in, const int david_ndim_in, const bool use_paw_in, const diag_comm_info& diag_comm_in) - : nband(nband_in), dim(dim_in), nbase_x(david_ndim_in * nband_in), david_ndim(david_ndim_in), use_paw(use_paw_in), diag_comm(diag_comm_in) + : nband(nband_in), dim(dim_in), nbase_x(david_ndim_in* nband_in), david_ndim(david_ndim_in), use_paw(use_paw_in), diag_comm(diag_comm_in), + precondition(std::forward(precondition_in)) { this->device = base_device::get_device_type(this->ctx); - this->precondition = precondition_in; this->one = &one_; this->zero = &zero_; @@ -110,15 +110,6 @@ DiagoDavid::DiagoDavid(const Real* precondition_in, // lagrange_matrix(nband, nband); // for orthogonalization resmem_complex_op()(this->ctx, this->lagrange_matrix, nband * nband); setmem_complex_op()(this->ctx, this->lagrange_matrix, 0, nband * nband); - -#if defined(__CUDA) || defined(__ROCM) - // device precondition array - if (this->device == base_device::GpuDevice) - { - resmem_var_op()(this->ctx, this->d_precondition, dim); - syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, this->d_precondition, this->precondition, dim); - } -#endif } /** @@ -139,13 +130,6 @@ DiagoDavid::~DiagoDavid() delmem_complex_op()(this->ctx, this->vcc); delmem_complex_op()(this->ctx, this->lagrange_matrix); base_device::memory::delete_memory_op()(this->cpu_ctx, this->eigenvalue); - // If the device is a GPU device, free the d_precondition array. -#if defined(__CUDA) || defined(__ROCM) - if (this->device == base_device::GpuDevice) - { - delmem_var_op()(this->ctx, this->d_precondition); - } -#endif } template @@ -499,40 +483,14 @@ void DiagoDavid::cal_grad(const HPsiFunc& hpsi_func, dim // LDC: if(N) max(1, m) ); //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - // Preconditioning // basis[nbase] = T * basis[nbase] = T * (H - lambda * S) * psi // where T, the preconditioner, is an approximate inverse of H // T is a diagonal stored in array `precondition` // to do preconditioning, divide each column of basis by the corresponding element of precondition - for (int m = 0; m < notconv; m++) - { - //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - if (this->device == base_device::GpuDevice) - { -#if defined(__CUDA) || defined(__ROCM) - vector_div_vector_op()(this->ctx, - dim, - basis + dim*(nbase + m), - basis + dim*(nbase + m), - this->d_precondition); -#endif - } - else - { - vector_div_vector_op()(this->ctx, - dim, - basis + dim*(nbase + m), - basis + dim*(nbase + m), - this->precondition); - } - //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - // for (int ig = 0; ig < dim; ig++) - // { - // ppsi[ig] /= this->precondition[ig]; - // } - //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - } + //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + this->precondition(basis + dim * nbase, dim, notconv); + //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< // there is a nbase to nbase + notconv band orthogonalise // plan for SchmidtOrth diff --git a/source/module_hsolver/diago_david.h b/source/module_hsolver/diago_david.h index 62ffc2654f..322274a760 100644 --- a/source/module_hsolver/diago_david.h +++ b/source/module_hsolver/diago_david.h @@ -6,7 +6,7 @@ #include "module_base/module_device/memory_op.h"// base_device::memory #include "module_hsolver/diag_comm_info.h" - +#include "module_hsolver/precondition_funcs.h" #include #include @@ -21,10 +21,11 @@ class DiagoDavid // return T if T is real type(float, double), // otherwise return the real type of T(complex, complex) using Real = typename GetTypeReal::type; - - public: - DiagoDavid(const Real* precondition_in, + using PreFunc = fvec::Div; +public: + + DiagoDavid(PreFunc&& precondition_in, const int nband_in, const int dim_in, const int david_ndim_in, @@ -102,8 +103,7 @@ class DiagoDavid int notconv = 0; /// precondition for diag, diagonal approximation of matrix A(i.e. Hamilt) - const Real* precondition = nullptr; - Real* d_precondition = nullptr; + const PreFunc precondition; /// eigenvalue results Real* eigenvalue = nullptr; diff --git a/source/module_hsolver/hsolver_pw.cpp b/source/module_hsolver/hsolver_pw.cpp index c3afee204f..012fdb7123 100644 --- a/source/module_hsolver/hsolver_pw.cpp +++ b/source/module_hsolver/hsolver_pw.cpp @@ -503,8 +503,9 @@ void HSolverPW::hamiltSolvePsiK(hamilt::Hamilt* hm, }; bool scf = this->calculation_type == "nscf" ? false : true; - PreOP pre_op(pre_condition, transfunc::qe_pw); - Diago_DavSubspace dav_subspace(bind_pre_op(pre_op), + // const auto pre_op = make_pre_op(pre_condition, fvec::div_trans_prevec_minus_eigen, fval::qe_pw); + const PreOP> pre_op(pre_condition, fvec::div_trans_prevec_minus_eigen, fval::qe_pw); + Diago_DavSubspace dav_subspace(pre_op.get(), psi.get_nbands(), psi.get_k_first() ? psi.get_current_nbas() : psi.get_nk() * psi.get_nbasis(), @@ -573,7 +574,9 @@ void HSolverPW::hamiltSolvePsiK(hamilt::Hamilt* hm, ModuleBase::timer::tick("David", "spsi_func"); }; - DiagoDavid david(pre_condition.data(), + // const auto pre_op = make_pre_op(pre_condition, fvec::div_prevec); + const PreOP pre_op(pre_condition); + DiagoDavid david(pre_op.get(), nband, dim, PARAM.inp.pw_diag_ndim, diff --git a/source/module_hsolver/precondition_funcs.h b/source/module_hsolver/precondition_funcs.h index 97bb862158..17035121e7 100644 --- a/source/module_hsolver/precondition_funcs.h +++ b/source/module_hsolver/precondition_funcs.h @@ -1,96 +1,137 @@ +#pragma once +#include #include #include "module_base/module_device/types.h" +#include "module_base/module_device/memory_op.h" #include "module_hsolver/kernels/math_kernel_op.h" -#include // for debugging -#include namespace hsolver { - /// @brief Transforming a single value, - namespace transfunc + template + using Real = typename GetTypeReal::type; + + /// @brief Transform a single value + namespace fval { template T none(const T& x) { return x; } template T qe_pw(const T& x) { return 0.5 * (1.0 + x + sqrt(1 + (x - 1.0) * (x - 1.0))); } } - template - using Real = typename GetTypeReal::type; - - /// @brief to be called in the iterative eigensolver. - /// fixed parameters: object vector, eigenvalue, leading dimension, number of vectors - template - using PreFunc = const std::function*, const size_t&, const size_t&)>; - // using PreFunc = std::function*, const int&, const int&)>; - - /// type1: Divide transfunc(precon_vec - eigen_subspace[m]) for each vector[m] - ///$X \to (A-\lambda I)^{-1} X$ - // There may be other types of operation than this one. - template - void div_trans_prevec_minus_eigen(T* ptr, const Real* eig, const size_t& dim, const size_t& nvec, - const Real* const pre, Real* const d_pre = nullptr, const std::function(const Real&)>& transfunc = transfunc::none>) + /// @brief Transform vectors + namespace fvec { - using syncmem_var_h2d_op = base_device::memory::synchronize_memory_op, Device, base_device::DEVICE_CPU>; - std::vector> pre_trans(dim, 0.0); - const auto device = base_device::get_device_type({}); + /// @brief To be called in the iterative eigensolver. + /// Users can add other types of operation than the following ones at one's need. + /// fixed parameters: object vector, eigenvalue, leading dimension, number of vectors - for (int m = 0; m < nvec; m++) + ///--------------------------------------------------------------------------------------------- + /// type 1: directly divide each vector by the precondition vector + ///--------------------------------------------------------------------------------------------- + template + void div_prevec(T* ptr, const size_t& dim, const size_t& nvec, + const Real* const pre) { - T* const ptr_m = ptr + m * dim; - for (size_t i = 0; i < dim; i++) { pre_trans[i] = transfunc(pre[i] - eig[m]); } - std::cout << std::endl; -#if defined(__CUDA) || defined(__ROCM) - if (device == base_device::GpuDevice) + for (int m = 0; m < nvec; m++) { - assert(d_pre); - syncmem_var_h2d_op()({}, {}, d_pre, pre_trans.data(), dim); - vector_div_vector_op()({}, dim, ptr_m, ptr_m, d_pre); + T* const ptr_m = ptr + m * dim; + vector_div_vector_op()({}, dim, ptr_m, ptr_m, pre); } - else -#endif + } + /// calling intereface in the eigensolver + template + using Div = std::function; + // Kernel function full of dependence + template + using DivKernel = std::function)>; + + ///--------------------------------------------------------------------------------------------- + ///type2: Divide transval(precon_vec - eigen_subspace[m]) for each vector[m] + ///$X \to (A-\lambda I)^{-1} X$ + ///--------------------------------------------------------------------------------------------- + template + void div_trans_prevec_minus_eigen(T* ptr, const Real* eig, const size_t& dim, const size_t& nvec, + const Real* const pre, Real* const d_pre = nullptr, const std::function(const Real&)>& transval = fval::none>) + { + using syncmem_var_h2d_op = base_device::memory::synchronize_memory_op, Device, base_device::DEVICE_CPU>; + std::vector> pre_trans(dim, 0.0); + const auto device = base_device::get_device_type({}); + + for (int m = 0; m < nvec; m++) { - vector_div_vector_op()({}, dim, ptr_m, ptr_m, pre_trans.data()); + T* const ptr_m = ptr + m * dim; + for (size_t i = 0; i < dim; i++) { pre_trans[i] = transval(pre[i] - eig[m]); } +#if defined(__CUDA) || defined(__ROCM) + if (device == base_device::GpuDevice) + { + assert(d_pre); + syncmem_var_h2d_op()({}, {}, d_pre, pre_trans.data(), dim); + vector_div_vector_op()({}, dim, ptr_m, ptr_m, d_pre); + } + else +#endif + { + vector_div_vector_op()({}, dim, ptr_m, ptr_m, pre_trans.data()); + } } } + /// calling intereface in the eigensolver + template + using DivTransMinusEig = std::function*, const size_t&, const size_t&)>; + // Kernel function full of dependence + template + using DivTransMinusEigKernel = std::function)>; } /// @brief A operator-like class of precondition function /// to encapsulate the pre-allocation of memory on different devices before starting the iterative eigensolver. /// One can pass the operatr() function of this class, or other custom lambdas/functions to eigensolvers. - template + template > struct PreOP { - PreOP(const std::vector>& prevec, const std::function(const Real&)>& transfunc = transfunc::none) - : PreOP(prevec.data(), prevec.size(), transfunc) {} - PreOP(const Real* const prevec, const int& dim, const std::function(const Real&)>& transfunc = transfunc::none) - : prevec_(prevec), dim_(dim), transfunc_(transfunc), + using FVal_t = std::function(const Real&)>; //single-value transformer + using resmem_real_op = base_device::memory::resize_memory_op, Device>; + using delmem_real_op = base_device::memory::delete_memory_op, Device>; + PreOP(const std::vector>& prevec, + const Kernel_t& transvec = fvec::div_prevec, + const FVal_t& transval = fval::none>) + : PreOP(prevec.data(), prevec.size(), transvec, transval) {} + PreOP(const Real* const prevec, const int& dim, + const Kernel_t& transvec = fvec::div_prevec, + const FVal_t& transval = fval::none>) + : prevec_(prevec), dim_(dim), transvec_(transvec), transval_(transval), dev_(base_device::get_device_type({})) { #if defined(__CUDA) || defined(__ROCM) - if (this->dev_ == base_device::GpuDevice) { resmem_real_op()({}, this->d_prevec_, dim_); } + if (this->dev_ == base_device::GpuDevice) { resmem_real_op()({}, this->d_prevec_, dim_); } #endif } - PreOP(const PreOP& other) = delete; + PreOP(const PreOP&) = delete; ~PreOP() { #if defined(__CUDA) || defined(__ROCM) - if (this->dev_ == base_device::GpuDevice) { delmem_real_op()({}, this->d_precondition); } + if (this->dev_ == base_device::GpuDevice) { delmem_real_op()({}, this->d_prevec_); } #endif } - void operator()(T* ptr, const Real* eig, const size_t& dim, const size_t& nvec) const + + /// @brief Bind a PreOP object to a function + template>::value, bool>::type = 0> + fvec::Div get() const + { + return std::bind(PreOP::transvec_, + std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, this->prevec_); + } + template>::value, bool>::type = 0> + fvec::DivTransMinusEig get() const { - assert(dim <= dim_); - div_trans_prevec_minus_eigen(ptr, eig, dim, nvec, prevec_, d_prevec_, transfunc_); + return std::bind(PreOP::transvec_, + std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4, + this->prevec_, this->d_prevec_, this->transval_); } + private: - const Real* const prevec_; + const Real* const prevec_ = nullptr; const int dim_; - Real* d_prevec_; - const std::function(const Real&)> transfunc_; + Real* d_prevec_ = nullptr; + const Kernel_t transvec_; + const FVal_t transval_; const base_device::AbacusDevice_t dev_; }; - - /// @brief Bind a PreOP object to a function - template - PreFunc bind_pre_op(const PreOP& pre_op) - { - return std::bind(&PreOP::operator(), &pre_op, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4); - } } diff --git a/source/module_hsolver/test/CMakeLists.txt b/source/module_hsolver/test/CMakeLists.txt index 53698c37cb..a8b22cd169 100644 --- a/source/module_hsolver/test/CMakeLists.txt +++ b/source/module_hsolver/test/CMakeLists.txt @@ -75,11 +75,11 @@ if (ENABLE_MPI) SOURCES test_hsolver_pw.cpp ../hsolver_pw.cpp ../hsolver_lcaopw.cpp ../diago_bpcg.cpp ../diago_dav_subspace.cpp ../diag_const_nums.cpp ../diago_iter_assist.cpp ) - AddTest( - TARGET HSolver_sdft - LIBS parameter ${math_libs} psi device base container - SOURCES test_hsolver_sdft.cpp ../hsolver_pw_sdft.cpp ../hsolver_pw.cpp ../diago_bpcg.cpp ../diago_dav_subspace.cpp ../diag_const_nums.cpp ../diago_iter_assist.cpp - ) + # AddTest( + # TARGET HSolver_sdft + # LIBS parameter ${math_libs} psi device base container + # SOURCES test_hsolver_sdft.cpp ../hsolver_pw_sdft.cpp ../hsolver_pw.cpp ../diago_bpcg.cpp ../diago_dav_subspace.cpp ../diag_const_nums.cpp ../diago_iter_assist.cpp + # ) if(ENABLE_LCAO) if(USE_ELPA) diff --git a/source/module_lr/hsolver_lrtd.hpp b/source/module_lr/hsolver_lrtd.hpp index 9884b41d95..259b0a6d5c 100644 --- a/source/module_lr/hsolver_lrtd.hpp +++ b/source/module_lr/hsolver_lrtd.hpp @@ -73,23 +73,25 @@ namespace LR auto hpsi_func = [&hm](T* psi_in, T* hpsi, const int ld_psi, const int nvec) {hm.hPsi(psi_in, hpsi, ld_psi, nvec);}; auto spsi_func = [&hm](const T* psi_in, T* spsi, const int ld_psi, const int nvec) { std::memcpy(spsi, psi_in, sizeof(T) * ld_psi * nvec); }; - auto pre_func = [&precondition](T* ptr, const Real* eig, const int& ld, const int& nvec)->void - { hsolver::div_trans_prevec_minus_eigen(ptr, eig, ld, nvec, precondition.data()); }; if (method == "dav") { + auto pre_func = [&precondition](T* ptr, const int& ld, const int& nvec)->void + { hsolver::fvec::div_prevec(ptr, ld, nvec, precondition.data()); }; // Allow 5 tries at most. If ntry > ntry_max = 5, exit diag loop. const int ntry_max = 5; // In non-self consistent calculation, do until totally converged. Else allow 5 eigenvecs to be NOT // converged. const int notconv_max = ("nscf" == PARAM.inp.calculation) ? 0 : 5; // do diag and add davidson iteration counts up to avg_iter - hsolver::DiagoDavid david(precondition.data(), nband, dim, PARAM.inp.pw_diag_ndim, PARAM.inp.use_paw, comm_info); + hsolver::DiagoDavid david(pre_func, nband, dim, PARAM.inp.pw_diag_ndim, PARAM.inp.use_paw, comm_info); hsolver::DiagoIterAssist::avg_iter += static_cast(david.diag(hpsi_func, spsi_func, dim, psi, eigenvalue.data(), diag_ethr, maxiter, ntry_max, 0)); } else if (method == "dav_subspace") //need refactor { + auto pre_func = [&precondition](T* ptr, const Real* eig, const int& ld, const int& nvec)->void + { hsolver::fvec::div_trans_prevec_minus_eigen(ptr, eig, ld, nvec, precondition.data()); }; hsolver::Diago_DavSubspace dav_subspace(pre_func, nband, dim, From 17cac27434f0cda49825b2085eeec329556ede65 Mon Sep 17 00:00:00 2001 From: maki49 <1579492865@qq.com> Date: Fri, 15 Nov 2024 04:05:59 +0800 Subject: [PATCH 3/6] fix UTs --- source/module_hsolver/diago_dav_subspace.cpp | 2 +- source/module_hsolver/diago_david.cpp | 2 +- source/module_hsolver/precondition_funcs.h | 27 ++++++++++--------- source/module_hsolver/test/CMakeLists.txt | 10 +++---- .../test/diago_david_float_test.cpp | 5 ++-- .../test/diago_david_real_test.cpp | 3 ++- .../module_hsolver/test/diago_david_test.cpp | 6 +++-- source/module_hsolver/test/hsolver_pw_sup.h | 7 ++--- 8 files changed, 35 insertions(+), 27 deletions(-) diff --git a/source/module_hsolver/diago_dav_subspace.cpp b/source/module_hsolver/diago_dav_subspace.cpp index 0636e51fb3..0b8620dd71 100644 --- a/source/module_hsolver/diago_dav_subspace.cpp +++ b/source/module_hsolver/diago_dav_subspace.cpp @@ -21,7 +21,7 @@ Diago_DavSubspace::Diago_DavSubspace(PreFunc&& precondition_in, const int& diag_nmax_in, const bool& need_subspace_in, const diag_comm_info& diag_comm_in) - : precondition(std::forward(precondition_in)), n_band(nband_in), dim(nbasis_in), nbase_x(nband_in* david_ndim_in), + : precondition(precondition_in), n_band(nband_in), dim(nbasis_in), nbase_x(nband_in* david_ndim_in), diag_thr(diag_thr_in), iter_nmax(diag_nmax_in), is_subspace(need_subspace_in), diag_comm(diag_comm_in) { this->device = base_device::get_device_type(this->ctx); diff --git a/source/module_hsolver/diago_david.cpp b/source/module_hsolver/diago_david.cpp index 84f56e6090..1c55349387 100644 --- a/source/module_hsolver/diago_david.cpp +++ b/source/module_hsolver/diago_david.cpp @@ -39,7 +39,7 @@ DiagoDavid::DiagoDavid(PreFunc&& precondition_in, const bool use_paw_in, const diag_comm_info& diag_comm_in) : nband(nband_in), dim(dim_in), nbase_x(david_ndim_in* nband_in), david_ndim(david_ndim_in), use_paw(use_paw_in), diag_comm(diag_comm_in), - precondition(std::forward(precondition_in)) + precondition(precondition_in) { this->device = base_device::get_device_type(this->ctx); diff --git a/source/module_hsolver/precondition_funcs.h b/source/module_hsolver/precondition_funcs.h index 17035121e7..e942fc66c8 100644 --- a/source/module_hsolver/precondition_funcs.h +++ b/source/module_hsolver/precondition_funcs.h @@ -4,6 +4,9 @@ #include "module_base/module_device/types.h" #include "module_base/module_device/memory_op.h" #include "module_hsolver/kernels/math_kernel_op.h" + +/// @brief Preconditioner Function Library +/// Users can add other types of operation than the following ones at one's need. namespace hsolver { template @@ -19,10 +22,6 @@ namespace hsolver /// @brief Transform vectors namespace fvec { - /// @brief To be called in the iterative eigensolver. - /// Users can add other types of operation than the following ones at one's need. - /// fixed parameters: object vector, eigenvalue, leading dimension, number of vectors - ///--------------------------------------------------------------------------------------------- /// type 1: directly divide each vector by the precondition vector ///--------------------------------------------------------------------------------------------- @@ -30,13 +29,14 @@ namespace hsolver void div_prevec(T* ptr, const size_t& dim, const size_t& nvec, const Real* const pre) { + Device* ctx = {}; for (int m = 0; m < nvec; m++) { T* const ptr_m = ptr + m * dim; - vector_div_vector_op()({}, dim, ptr_m, ptr_m, pre); + vector_div_vector_op()(ctx, dim, ptr_m, ptr_m, pre); } } - /// calling intereface in the eigensolver + /// Intereface to be called in the eigensolver template using Div = std::function; // Kernel function full of dependence @@ -54,6 +54,8 @@ namespace hsolver using syncmem_var_h2d_op = base_device::memory::synchronize_memory_op, Device, base_device::DEVICE_CPU>; std::vector> pre_trans(dim, 0.0); const auto device = base_device::get_device_type({}); + Device* ctx = {}; + base_device::DEVICE_CPU* cpu_ctx = {}; for (int m = 0; m < nvec; m++) { @@ -63,27 +65,28 @@ namespace hsolver if (device == base_device::GpuDevice) { assert(d_pre); - syncmem_var_h2d_op()({}, {}, d_pre, pre_trans.data(), dim); - vector_div_vector_op()({}, dim, ptr_m, ptr_m, d_pre); + syncmem_var_h2d_op()(ctx, cpu_ctx, d_pre, pre_trans.data(), dim); + vector_div_vector_op()(ctx, dim, ptr_m, ptr_m, d_pre); } else #endif { - vector_div_vector_op()({}, dim, ptr_m, ptr_m, pre_trans.data()); + vector_div_vector_op()(ctx, dim, ptr_m, ptr_m, pre_trans.data()); } } } - /// calling intereface in the eigensolver + /// Intereface to be called in the eigensolver template using DivTransMinusEig = std::function*, const size_t&, const size_t&)>; - // Kernel function full of dependence + /// Kernel function full of dependence template using DivTransMinusEigKernel = std::function)>; } /// @brief A operator-like class of precondition function /// to encapsulate the pre-allocation of memory on different devices before starting the iterative eigensolver. - /// One can pass the operatr() function of this class, or other custom lambdas/functions to eigensolvers. + /// One can use `.get()` interface to get the function to be called by the eigensovler, + /// or pass a custom lambdas/function to replace the one returned by `.get()`. template > struct PreOP { diff --git a/source/module_hsolver/test/CMakeLists.txt b/source/module_hsolver/test/CMakeLists.txt index a8b22cd169..53698c37cb 100644 --- a/source/module_hsolver/test/CMakeLists.txt +++ b/source/module_hsolver/test/CMakeLists.txt @@ -75,11 +75,11 @@ if (ENABLE_MPI) SOURCES test_hsolver_pw.cpp ../hsolver_pw.cpp ../hsolver_lcaopw.cpp ../diago_bpcg.cpp ../diago_dav_subspace.cpp ../diag_const_nums.cpp ../diago_iter_assist.cpp ) - # AddTest( - # TARGET HSolver_sdft - # LIBS parameter ${math_libs} psi device base container - # SOURCES test_hsolver_sdft.cpp ../hsolver_pw_sdft.cpp ../hsolver_pw.cpp ../diago_bpcg.cpp ../diago_dav_subspace.cpp ../diag_const_nums.cpp ../diago_iter_assist.cpp - # ) + AddTest( + TARGET HSolver_sdft + LIBS parameter ${math_libs} psi device base container + SOURCES test_hsolver_sdft.cpp ../hsolver_pw_sdft.cpp ../hsolver_pw.cpp ../diago_bpcg.cpp ../diago_dav_subspace.cpp ../diag_const_nums.cpp ../diago_iter_assist.cpp + ) if(ENABLE_LCAO) if(USE_ELPA) diff --git a/source/module_hsolver/test/diago_david_float_test.cpp b/source/module_hsolver/test/diago_david_float_test.cpp index da52b7272b..f1569564ca 100644 --- a/source/module_hsolver/test/diago_david_float_test.cpp +++ b/source/module_hsolver/test/diago_david_float_test.cpp @@ -92,8 +92,9 @@ class DiagoDavPrepare const int dim = phi.get_current_nbas() ; const int nband = phi.get_nbands(); - const int ld_psi =phi.get_nbasis(); - hsolver::DiagoDavid> dav(precondition, nband, dim, order, false, comm_info); + const int ld_psi = phi.get_nbasis(); + const hsolver::PreOP> pre_op(precondition, dim); + hsolver::DiagoDavid> dav(pre_op.get(), nband, dim, order, false, comm_info); hsolver::DiagoIterAssist>::PW_DIAG_NMAX = maxiter; hsolver::DiagoIterAssist>::PW_DIAG_THR = eps; diff --git a/source/module_hsolver/test/diago_david_real_test.cpp b/source/module_hsolver/test/diago_david_real_test.cpp index e451d45fbe..3432fcc3bf 100644 --- a/source/module_hsolver/test/diago_david_real_test.cpp +++ b/source/module_hsolver/test/diago_david_real_test.cpp @@ -92,7 +92,8 @@ class DiagoDavPrepare const int dim = phi.get_current_nbas(); const int nband = phi.get_nbands(); const int ld_psi = phi.get_nbasis(); - hsolver::DiagoDavid dav(precondition, nband, dim, order, false, comm_info); + const hsolver::PreOP pre_op(precondition, dim); + hsolver::DiagoDavid dav(pre_op.get(), nband, dim, order, false, comm_info); hsolver::DiagoIterAssist::PW_DIAG_NMAX = maxiter; hsolver::DiagoIterAssist::PW_DIAG_THR = eps; diff --git a/source/module_hsolver/test/diago_david_test.cpp b/source/module_hsolver/test/diago_david_test.cpp index 4911d40a4f..96852719cd 100644 --- a/source/module_hsolver/test/diago_david_test.cpp +++ b/source/module_hsolver/test/diago_david_test.cpp @@ -91,8 +91,10 @@ class DiagoDavPrepare const int dim = phi.get_current_nbas(); const int nband = phi.get_nbands(); - const int ld_psi = phi.get_nbasis(); - hsolver::DiagoDavid> dav(precondition, nband, dim, order, false, comm_info); + const int ld_psi = phi.get_nbasis(); + const auto pre_func = [&precondition](std::complex* ptr, const int& ld, const int& nvec)->void + { hsolver::fvec::div_prevec(ptr, ld, nvec, precondition); }; + hsolver::DiagoDavid> dav(pre_func, nband, dim, order, false, comm_info); hsolver::DiagoIterAssist>::PW_DIAG_NMAX = maxiter; hsolver::DiagoIterAssist>::PW_DIAG_THR = eps; diff --git a/source/module_hsolver/test/hsolver_pw_sup.h b/source/module_hsolver/test/hsolver_pw_sup.h index c70025a2c2..c2f4d98c9d 100644 --- a/source/module_hsolver/test/hsolver_pw_sup.h +++ b/source/module_hsolver/test/hsolver_pw_sup.h @@ -1,4 +1,5 @@ #include "module_basis/module_pw/pw_basis_k.h" +#include "module_hsolver/precondition_funcs.h" namespace ModulePW { @@ -121,15 +122,15 @@ template class DiagoCG, base_device::DEVICE_CPU>; template class DiagoCG, base_device::DEVICE_CPU>; template -DiagoDavid::DiagoDavid(const Real* precondition_in, +DiagoDavid::DiagoDavid(PreFunc&& precondition_in, const int nband_in, const int dim_in, const int david_ndim_in, const bool use_paw_in, const diag_comm_info& diag_comm_in) - : nband(nband_in), dim(dim_in), nbase_x(david_ndim_in * nband_in), david_ndim(david_ndim_in), use_paw(use_paw_in), diag_comm(diag_comm_in) { + : nband(nband_in), dim(dim_in), nbase_x(david_ndim_in* nband_in), david_ndim(david_ndim_in), use_paw(use_paw_in), diag_comm(diag_comm_in), + precondition(std::forward(precondition_in)) { this->device = base_device::get_device_type(this->ctx); - this->precondition = precondition_in; test_david = 2; // 1: check which function is called and which step is executed From 06876435b10b746db2dbaf5b648eb9322a8698d7 Mon Sep 17 00:00:00 2001 From: maki49 <1579492865@qq.com> Date: Fri, 15 Nov 2024 15:15:07 +0800 Subject: [PATCH 4/6] separate sorce and destination pointer --- source/module_hsolver/diago_dav_subspace.cpp | 3 +- source/module_hsolver/diago_david.cpp | 3 +- source/module_hsolver/precondition_funcs.h | 30 +++++++++++--------- source/module_lr/hsolver_lrtd.hpp | 8 +++--- 4 files changed, 24 insertions(+), 20 deletions(-) diff --git a/source/module_hsolver/diago_dav_subspace.cpp b/source/module_hsolver/diago_dav_subspace.cpp index 0b8620dd71..037740813d 100644 --- a/source/module_hsolver/diago_dav_subspace.cpp +++ b/source/module_hsolver/diago_dav_subspace.cpp @@ -319,7 +319,8 @@ void Diago_DavSubspace::cal_grad(const HPsiFunc& hpsi_func, this->dim); // "precondition!!!" - this->precondition(psi_iter + nbase * this->dim, eigenvalue_iter->data(), this->dim, notconv); + auto* start_ptr = psi_iter + nbase * this->dim; + this->precondition(start_ptr, start_ptr, eigenvalue_iter->data(), this->dim, notconv); // "normalize!!!" in order to improve numerical stability of subspace diagonalization std::vector psi_norm(notconv, 0.0); diff --git a/source/module_hsolver/diago_david.cpp b/source/module_hsolver/diago_david.cpp index 1c55349387..f6da73b799 100644 --- a/source/module_hsolver/diago_david.cpp +++ b/source/module_hsolver/diago_david.cpp @@ -489,7 +489,8 @@ void DiagoDavid::cal_grad(const HPsiFunc& hpsi_func, // T is a diagonal stored in array `precondition` // to do preconditioning, divide each column of basis by the corresponding element of precondition //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - this->precondition(basis + dim * nbase, dim, notconv); + auto* start_ptr = basis + dim * nbase; + this->precondition(start_ptr, start_ptr, dim, notconv); //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< // there is a nbase to nbase + notconv band orthogonalise diff --git a/source/module_hsolver/precondition_funcs.h b/source/module_hsolver/precondition_funcs.h index e942fc66c8..4b8258c556 100644 --- a/source/module_hsolver/precondition_funcs.h +++ b/source/module_hsolver/precondition_funcs.h @@ -26,19 +26,19 @@ namespace hsolver /// type 1: directly divide each vector by the precondition vector ///--------------------------------------------------------------------------------------------- template - void div_prevec(T* ptr, const size_t& dim, const size_t& nvec, + void div_prevec(T* const dst, const T* const src, const size_t& dim, const size_t& nvec, const Real* const pre) { Device* ctx = {}; - for (int m = 0; m < nvec; m++) + for (size_t m = 0; m < nvec; m++) { - T* const ptr_m = ptr + m * dim; - vector_div_vector_op()(ctx, dim, ptr_m, ptr_m, pre); + const size_t offset = m * dim; + vector_div_vector_op()(ctx, dim, dst + offset, src + offset, pre); } } /// Intereface to be called in the eigensolver template - using Div = std::function; + using Div = std::function; // Kernel function full of dependence template using DivKernel = std::function)>; @@ -48,7 +48,7 @@ namespace hsolver ///$X \to (A-\lambda I)^{-1} X$ ///--------------------------------------------------------------------------------------------- template - void div_trans_prevec_minus_eigen(T* ptr, const Real* eig, const size_t& dim, const size_t& nvec, + void div_trans_prevec_minus_eigen(T* const dst, const T* const src, const Real* eig, const size_t& dim, const size_t& nvec, const Real* const pre, Real* const d_pre = nullptr, const std::function(const Real&)>& transval = fval::none>) { using syncmem_var_h2d_op = base_device::memory::synchronize_memory_op, Device, base_device::DEVICE_CPU>; @@ -57,27 +57,27 @@ namespace hsolver Device* ctx = {}; base_device::DEVICE_CPU* cpu_ctx = {}; - for (int m = 0; m < nvec; m++) + for (size_t m = 0; m < nvec; m++) { - T* const ptr_m = ptr + m * dim; + const size_t offset = m * dim; for (size_t i = 0; i < dim; i++) { pre_trans[i] = transval(pre[i] - eig[m]); } #if defined(__CUDA) || defined(__ROCM) if (device == base_device::GpuDevice) { assert(d_pre); syncmem_var_h2d_op()(ctx, cpu_ctx, d_pre, pre_trans.data(), dim); - vector_div_vector_op()(ctx, dim, ptr_m, ptr_m, d_pre); + vector_div_vector_op()(ctx, dim, dst + offset, src + offset, d_pre); } else #endif { - vector_div_vector_op()(ctx, dim, ptr_m, ptr_m, pre_trans.data()); + vector_div_vector_op()(ctx, dim, dst + offset, src + offset, pre_trans.data()); } } } /// Intereface to be called in the eigensolver template - using DivTransMinusEig = std::function*, const size_t&, const size_t&)>; + using DivTransMinusEig = std::function*, const size_t&, const size_t&)>; /// Kernel function full of dependence template using DivTransMinusEigKernel = std::function)>; @@ -104,7 +104,9 @@ namespace hsolver dev_(base_device::get_device_type({})) { #if defined(__CUDA) || defined(__ROCM) - if (this->dev_ == base_device::GpuDevice) { resmem_real_op()({}, this->d_prevec_, dim_); } + if (this->dev_ == base_device::GpuDevice) { + resmem_real_op()({}, this->d_prevec_, dim_); + } #endif } PreOP(const PreOP&) = delete; @@ -119,13 +121,13 @@ namespace hsolver fvec::Div get() const { return std::bind(PreOP::transvec_, - std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, this->prevec_); + std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4, this->prevec_); } template>::value, bool>::type = 0> fvec::DivTransMinusEig get() const { return std::bind(PreOP::transvec_, - std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4, + std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4, std::placeholders::_5, this->prevec_, this->d_prevec_, this->transval_); } diff --git a/source/module_lr/hsolver_lrtd.hpp b/source/module_lr/hsolver_lrtd.hpp index 259b0a6d5c..9d9802f54d 100644 --- a/source/module_lr/hsolver_lrtd.hpp +++ b/source/module_lr/hsolver_lrtd.hpp @@ -76,8 +76,8 @@ namespace LR if (method == "dav") { - auto pre_func = [&precondition](T* ptr, const int& ld, const int& nvec)->void - { hsolver::fvec::div_prevec(ptr, ld, nvec, precondition.data()); }; + auto pre_func = [&precondition](T* const dst, const T* const src, const int& ld, const int& nvec)->void + { hsolver::fvec::div_prevec(dst, src, ld, nvec, precondition.data()); }; // Allow 5 tries at most. If ntry > ntry_max = 5, exit diag loop. const int ntry_max = 5; // In non-self consistent calculation, do until totally converged. Else allow 5 eigenvecs to be NOT @@ -90,8 +90,8 @@ namespace LR } else if (method == "dav_subspace") //need refactor { - auto pre_func = [&precondition](T* ptr, const Real* eig, const int& ld, const int& nvec)->void - { hsolver::fvec::div_trans_prevec_minus_eigen(ptr, eig, ld, nvec, precondition.data()); }; + auto pre_func = [&precondition](T* const dst, const T* const src, const Real* eig, const int& ld, const int& nvec)->void + { hsolver::fvec::div_trans_prevec_minus_eigen(dst, src, eig, ld, nvec, precondition.data()); }; hsolver::Diago_DavSubspace dav_subspace(pre_func, nband, dim, From 3ac0de28115102adcdd94ed105c00680fdadce9e Mon Sep 17 00:00:00 2001 From: maki49 <1579492865@qq.com> Date: Fri, 15 Nov 2024 16:24:54 +0800 Subject: [PATCH 5/6] fix sdft test compile bug on icpx --- source/module_hsolver/precondition_funcs.h | 14 +++++++------- source/module_hsolver/test/diago_david_test.cpp | 4 ++-- source/module_hsolver/test/test_hsolver_sdft.cpp | 5 +++-- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/source/module_hsolver/precondition_funcs.h b/source/module_hsolver/precondition_funcs.h index 4b8258c556..08c1424b77 100644 --- a/source/module_hsolver/precondition_funcs.h +++ b/source/module_hsolver/precondition_funcs.h @@ -53,10 +53,9 @@ namespace hsolver { using syncmem_var_h2d_op = base_device::memory::synchronize_memory_op, Device, base_device::DEVICE_CPU>; std::vector> pre_trans(dim, 0.0); - const auto device = base_device::get_device_type({}); Device* ctx = {}; base_device::DEVICE_CPU* cpu_ctx = {}; - + const auto device = base_device::get_device_type(ctx); for (size_t m = 0; m < nvec; m++) { const size_t offset = m * dim; @@ -100,19 +99,19 @@ namespace hsolver PreOP(const Real* const prevec, const int& dim, const Kernel_t& transvec = fvec::div_prevec, const FVal_t& transval = fval::none>) - : prevec_(prevec), dim_(dim), transvec_(transvec), transval_(transval), - dev_(base_device::get_device_type({})) + : prevec_(prevec), dim_(dim), transvec_(transvec), transval_(transval) { + this->dev_ = base_device::get_device_type(this->ctx_); #if defined(__CUDA) || defined(__ROCM) if (this->dev_ == base_device::GpuDevice) { - resmem_real_op()({}, this->d_prevec_, dim_); + resmem_real_op()(this->ctx_, this->d_prevec_, dim_); } #endif } PreOP(const PreOP&) = delete; ~PreOP() { #if defined(__CUDA) || defined(__ROCM) - if (this->dev_ == base_device::GpuDevice) { delmem_real_op()({}, this->d_prevec_); } + if (this->dev_ == base_device::GpuDevice) { delmem_real_op()(this->ctx_, this->d_prevec_); } #endif } @@ -137,6 +136,7 @@ namespace hsolver Real* d_prevec_ = nullptr; const Kernel_t transvec_; const FVal_t transval_; - const base_device::AbacusDevice_t dev_; + base_device::AbacusDevice_t dev_; + Device* ctx_ = {}; }; } diff --git a/source/module_hsolver/test/diago_david_test.cpp b/source/module_hsolver/test/diago_david_test.cpp index 96852719cd..4e8f59359e 100644 --- a/source/module_hsolver/test/diago_david_test.cpp +++ b/source/module_hsolver/test/diago_david_test.cpp @@ -92,8 +92,8 @@ class DiagoDavPrepare const int dim = phi.get_current_nbas(); const int nband = phi.get_nbands(); const int ld_psi = phi.get_nbasis(); - const auto pre_func = [&precondition](std::complex* ptr, const int& ld, const int& nvec)->void - { hsolver::fvec::div_prevec(ptr, ld, nvec, precondition); }; + const auto pre_func = [&precondition](std::complex* const dst, const std::complex* const src, const int& ld, const int& nvec)->void + { hsolver::fvec::div_prevec(dst, src, ld, nvec, precondition); }; hsolver::DiagoDavid> dav(pre_func, nband, dim, order, false, comm_info); hsolver::DiagoIterAssist>::PW_DIAG_NMAX = maxiter; diff --git a/source/module_hsolver/test/test_hsolver_sdft.cpp b/source/module_hsolver/test/test_hsolver_sdft.cpp index 72f690d804..349ed6da80 100644 --- a/source/module_hsolver/test/test_hsolver_sdft.cpp +++ b/source/module_hsolver/test/test_hsolver_sdft.cpp @@ -78,8 +78,7 @@ Stochastic_Iter::Stochastic_Iter() } template -Stochastic_Iter::~Stochastic_Iter(){}; -template class Stochastic_Iter, base_device::DEVICE_CPU>; +Stochastic_Iter::~Stochastic_Iter() {}; template void Stochastic_Iter::init(K_Vectors* pkv_in, @@ -151,6 +150,8 @@ void Stochastic_Iter::sum_stoband(Stochastic_WF& stowf, return; } +template class Stochastic_Iter, base_device::DEVICE_CPU>; + Charge::Charge(){}; Charge::~Charge(){}; From a0571fb0fd9404c3c00c6fcb264dca489d1a12e1 Mon Sep 17 00:00:00 2001 From: maki49 <1579492865@qq.com> Date: Fri, 15 Nov 2024 17:52:28 +0800 Subject: [PATCH 6/6] fix error passing host pointer to gpu kernel --- source/module_hsolver/precondition_funcs.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/source/module_hsolver/precondition_funcs.h b/source/module_hsolver/precondition_funcs.h index 08c1424b77..58834a5572 100644 --- a/source/module_hsolver/precondition_funcs.h +++ b/source/module_hsolver/precondition_funcs.h @@ -101,10 +101,13 @@ namespace hsolver const FVal_t& transval = fval::none>) : prevec_(prevec), dim_(dim), transvec_(transvec), transval_(transval) { + using syncmem_var_h2d_op = base_device::memory::synchronize_memory_op, Device, base_device::DEVICE_CPU>; this->dev_ = base_device::get_device_type(this->ctx_); #if defined(__CUDA) || defined(__ROCM) if (this->dev_ == base_device::GpuDevice) { resmem_real_op()(this->ctx_, this->d_prevec_, dim_); + base_device::DEVICE_CPU* cpu_ctx = {}; + syncmem_var_h2d_op()(this->ctx_, cpu_ctx, this->d_prevec_, prevec_, dim_); } #endif } @@ -120,7 +123,8 @@ namespace hsolver fvec::Div get() const { return std::bind(PreOP::transvec_, - std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4, this->prevec_); + std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4, + this->dev_ == base_device::GpuDevice ? this->d_prevec_ : this->prevec_); } template>::value, bool>::type = 0> fvec::DivTransMinusEig get() const