|
| 1 | +#include <functional> |
| 2 | +#include "module_base/module_device/types.h" |
| 3 | +#include "module_hsolver/kernels/math_kernel_op.h" |
| 4 | +#include <iostream> // for debugging |
| 5 | +#include <vector> |
| 6 | +namespace hsolver |
| 7 | +{ |
| 8 | + /// @brief Transforming a single value, |
| 9 | + namespace transfunc |
| 10 | + { |
| 11 | + template <typename T> T none(const T& x) { return x; } |
| 12 | + template <typename T> T qe_pw(const T& x) { return 0.5 * (1.0 + x + sqrt(1 + (x - 1.0) * (x - 1.0))); } |
| 13 | + } |
| 14 | + |
| 15 | + template <typename T> |
| 16 | + using Real = typename GetTypeReal<T>::type; |
| 17 | + |
| 18 | + /// @brief to be called in the iterative eigensolver. |
| 19 | + /// fixed parameters: object vector, eigenvalue, leading dimension, number of vectors |
| 20 | + template <typename T> |
| 21 | + using PreFunc = const std::function<void(T*, const Real<T>*, const size_t&, const size_t&)>; |
| 22 | + // using PreFunc = std::function<void(T*, const Real<T>*, const int&, const int&)>; |
| 23 | + |
| 24 | + /// type1: Divide transfunc(precon_vec - eigen_subspace[m]) for each vector[m] |
| 25 | + ///$X \to (A-\lambda I)^{-1} X$ |
| 26 | + // There may be other types of operation than this one. |
| 27 | + template <typename T, typename Device = base_device::DEVICE_CPU> |
| 28 | + void div_trans_prevec_minus_eigen(T* ptr, const Real<T>* eig, const size_t& dim, const size_t& nvec, |
| 29 | + const Real<T>* const pre, Real<T>* const d_pre = nullptr, const std::function<Real<T>(const Real<T>&)>& transfunc = transfunc::none<Real<T>>) |
| 30 | + { |
| 31 | + using syncmem_var_h2d_op = base_device::memory::synchronize_memory_op<Real<T>, Device, base_device::DEVICE_CPU>; |
| 32 | + std::vector<Real<T>> pre_trans(dim, 0.0); |
| 33 | + const auto device = base_device::get_device_type<Device>({}); |
| 34 | + |
| 35 | + for (int m = 0; m < nvec; m++) |
| 36 | + { |
| 37 | + T* const ptr_m = ptr + m * dim; |
| 38 | + for (size_t i = 0; i < dim; i++) { pre_trans[i] = transfunc(pre[i] - eig[m]); } |
| 39 | + std::cout << std::endl; |
| 40 | +#if defined(__CUDA) || defined(__ROCM) |
| 41 | + if (device == base_device::GpuDevice) |
| 42 | + { |
| 43 | + assert(d_pre); |
| 44 | + syncmem_var_h2d_op()({}, {}, d_pre, pre_trans.data(), dim); |
| 45 | + vector_div_vector_op<T, Device>()({}, dim, ptr_m, ptr_m, d_pre); |
| 46 | + } |
| 47 | + else |
| 48 | +#endif |
| 49 | + { |
| 50 | + vector_div_vector_op<T, Device>()({}, dim, ptr_m, ptr_m, pre_trans.data()); |
| 51 | + } |
| 52 | + } |
| 53 | + } |
| 54 | + |
| 55 | + /// @brief A operator-like class of precondition function |
| 56 | + /// to encapsulate the pre-allocation of memory on different devices before starting the iterative eigensolver. |
| 57 | + /// One can pass the operatr() function of this class, or other custom lambdas/functions to eigensolvers. |
| 58 | + template <typename T, typename Device = base_device::DEVICE_CPU> |
| 59 | + struct PreOP |
| 60 | + { |
| 61 | + PreOP(const std::vector<Real<T>>& prevec, const std::function<Real<T>(const Real<T>&)>& transfunc = transfunc::none) |
| 62 | + : PreOP<T, Device>(prevec.data(), prevec.size(), transfunc) {} |
| 63 | + PreOP(const Real<T>* const prevec, const int& dim, const std::function<Real<T>(const Real<T>&)>& transfunc = transfunc::none) |
| 64 | + : prevec_(prevec), dim_(dim), transfunc_(transfunc), |
| 65 | + dev_(base_device::get_device_type<Device>({})) |
| 66 | + { |
| 67 | +#if defined(__CUDA) || defined(__ROCM) |
| 68 | + if (this->dev_ == base_device::GpuDevice) { resmem_real_op<T, Device>()({}, this->d_prevec_, dim_); } |
| 69 | +#endif |
| 70 | + } |
| 71 | + PreOP(const PreOP& other) = delete; |
| 72 | + ~PreOP() { |
| 73 | +#if defined(__CUDA) || defined(__ROCM) |
| 74 | + if (this->dev_ == base_device::GpuDevice) { delmem_real_op<T, Device>()({}, this->d_precondition); } |
| 75 | +#endif |
| 76 | + } |
| 77 | + void operator()(T* ptr, const Real<T>* eig, const size_t& dim, const size_t& nvec) const |
| 78 | + { |
| 79 | + assert(dim <= dim_); |
| 80 | + div_trans_prevec_minus_eigen<T, Device>(ptr, eig, dim, nvec, prevec_, d_prevec_, transfunc_); |
| 81 | + } |
| 82 | + private: |
| 83 | + const Real<T>* const prevec_; |
| 84 | + const int dim_; |
| 85 | + Real<T>* d_prevec_; |
| 86 | + const std::function<Real<T>(const Real<T>&)> transfunc_; |
| 87 | + const base_device::AbacusDevice_t dev_; |
| 88 | + }; |
| 89 | + |
| 90 | + /// @brief Bind a PreOP object to a function |
| 91 | + template <typename T, typename Device> |
| 92 | + PreFunc<T> bind_pre_op(const PreOP<T, Device>& pre_op) |
| 93 | + { |
| 94 | + return std::bind(&PreOP<T, Device>::operator(), &pre_op, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4); |
| 95 | + } |
| 96 | +} |
0 commit comments