Skip to content

Commit 0687643

Browse files
committed
separate sorce and destination pointer
1 parent 17cac27 commit 0687643

File tree

4 files changed

+24
-20
lines changed

4 files changed

+24
-20
lines changed

source/module_hsolver/diago_dav_subspace.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,8 @@ void Diago_DavSubspace<T, Device>::cal_grad(const HPsiFunc& hpsi_func,
319319
this->dim);
320320

321321
// "precondition!!!"
322-
this->precondition(psi_iter + nbase * this->dim, eigenvalue_iter->data(), this->dim, notconv);
322+
auto* start_ptr = psi_iter + nbase * this->dim;
323+
this->precondition(start_ptr, start_ptr, eigenvalue_iter->data(), this->dim, notconv);
323324

324325
// "normalize!!!" in order to improve numerical stability of subspace diagonalization
325326
std::vector<Real> psi_norm(notconv, 0.0);

source/module_hsolver/diago_david.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,8 @@ void DiagoDavid<T, Device>::cal_grad(const HPsiFunc& hpsi_func,
489489
// T is a diagonal stored in array `precondition`
490490
// to do preconditioning, divide each column of basis by the corresponding element of precondition
491491
//<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
492-
this->precondition(basis + dim * nbase, dim, notconv);
492+
auto* start_ptr = basis + dim * nbase;
493+
this->precondition(start_ptr, start_ptr, dim, notconv);
493494
//<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
494495

495496
// there is a nbase to nbase + notconv band orthogonalise

source/module_hsolver/precondition_funcs.h

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,19 @@ namespace hsolver
2626
/// type 1: directly divide each vector by the precondition vector
2727
///---------------------------------------------------------------------------------------------
2828
template <typename T, typename Device = base_device::DEVICE_CPU>
29-
void div_prevec(T* ptr, const size_t& dim, const size_t& nvec,
29+
void div_prevec(T* const dst, const T* const src, const size_t& dim, const size_t& nvec,
3030
const Real<T>* const pre)
3131
{
3232
Device* ctx = {};
33-
for (int m = 0; m < nvec; m++)
33+
for (size_t m = 0; m < nvec; m++)
3434
{
35-
T* const ptr_m = ptr + m * dim;
36-
vector_div_vector_op<T, Device>()(ctx, dim, ptr_m, ptr_m, pre);
35+
const size_t offset = m * dim;
36+
vector_div_vector_op<T, Device>()(ctx, dim, dst + offset, src + offset, pre);
3737
}
3838
}
3939
/// Intereface to be called in the eigensolver
4040
template <typename T>
41-
using Div = std::function<void(T*, const size_t&, const size_t&)>;
41+
using Div = std::function<void(T* const, const T* const, const size_t&, const size_t&)>;
4242
// Kernel function full of dependence
4343
template <typename T, typename Device = base_device::DEVICE_CPU>
4444
using DivKernel = std::function<decltype(div_prevec<T, Device>)>;
@@ -48,7 +48,7 @@ namespace hsolver
4848
///$X \to (A-\lambda I)^{-1} X$
4949
///---------------------------------------------------------------------------------------------
5050
template <typename T, typename Device = base_device::DEVICE_CPU>
51-
void div_trans_prevec_minus_eigen(T* ptr, const Real<T>* eig, const size_t& dim, const size_t& nvec,
51+
void div_trans_prevec_minus_eigen(T* const dst, const T* const src, const Real<T>* eig, const size_t& dim, const size_t& nvec,
5252
const Real<T>* const pre, Real<T>* const d_pre = nullptr, const std::function<Real<T>(const Real<T>&)>& transval = fval::none<Real<T>>)
5353
{
5454
using syncmem_var_h2d_op = base_device::memory::synchronize_memory_op<Real<T>, Device, base_device::DEVICE_CPU>;
@@ -57,27 +57,27 @@ namespace hsolver
5757
Device* ctx = {};
5858
base_device::DEVICE_CPU* cpu_ctx = {};
5959

60-
for (int m = 0; m < nvec; m++)
60+
for (size_t m = 0; m < nvec; m++)
6161
{
62-
T* const ptr_m = ptr + m * dim;
62+
const size_t offset = m * dim;
6363
for (size_t i = 0; i < dim; i++) { pre_trans[i] = transval(pre[i] - eig[m]); }
6464
#if defined(__CUDA) || defined(__ROCM)
6565
if (device == base_device::GpuDevice)
6666
{
6767
assert(d_pre);
6868
syncmem_var_h2d_op()(ctx, cpu_ctx, d_pre, pre_trans.data(), dim);
69-
vector_div_vector_op<T, Device>()(ctx, dim, ptr_m, ptr_m, d_pre);
69+
vector_div_vector_op<T, Device>()(ctx, dim, dst + offset, src + offset, d_pre);
7070
}
7171
else
7272
#endif
7373
{
74-
vector_div_vector_op<T, Device>()(ctx, dim, ptr_m, ptr_m, pre_trans.data());
74+
vector_div_vector_op<T, Device>()(ctx, dim, dst + offset, src + offset, pre_trans.data());
7575
}
7676
}
7777
}
7878
/// Intereface to be called in the eigensolver
7979
template <typename T>
80-
using DivTransMinusEig = std::function<void(T*, const Real<T>*, const size_t&, const size_t&)>;
80+
using DivTransMinusEig = std::function<void(T* const, const T* const, const Real<T>*, const size_t&, const size_t&)>;
8181
/// Kernel function full of dependence
8282
template <typename T, typename Device = base_device::DEVICE_CPU>
8383
using DivTransMinusEigKernel = std::function<decltype(div_trans_prevec_minus_eigen<T, Device>)>;
@@ -104,7 +104,9 @@ namespace hsolver
104104
dev_(base_device::get_device_type<Device>({}))
105105
{
106106
#if defined(__CUDA) || defined(__ROCM)
107-
if (this->dev_ == base_device::GpuDevice) { resmem_real_op()({}, this->d_prevec_, dim_); }
107+
if (this->dev_ == base_device::GpuDevice) {
108+
resmem_real_op()({}, this->d_prevec_, dim_);
109+
}
108110
#endif
109111
}
110112
PreOP(const PreOP&) = delete;
@@ -119,13 +121,13 @@ namespace hsolver
119121
fvec::Div<T> get() const
120122
{
121123
return std::bind(PreOP<T, Device, Kernel_t>::transvec_,
122-
std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, this->prevec_);
124+
std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4, this->prevec_);
123125
}
124126
template<typename U = Kernel_t, typename std::enable_if<std::is_same<U, fvec::DivTransMinusEigKernel<T, Device>>::value, bool>::type = 0>
125127
fvec::DivTransMinusEig<T> get() const
126128
{
127129
return std::bind(PreOP<T, Device, Kernel_t>::transvec_,
128-
std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4,
130+
std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4, std::placeholders::_5,
129131
this->prevec_, this->d_prevec_, this->transval_);
130132
}
131133

source/module_lr/hsolver_lrtd.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,8 @@ namespace LR
7676

7777
if (method == "dav")
7878
{
79-
auto pre_func = [&precondition](T* ptr, const int& ld, const int& nvec)->void
80-
{ hsolver::fvec::div_prevec(ptr, ld, nvec, precondition.data()); };
79+
auto pre_func = [&precondition](T* const dst, const T* const src, const int& ld, const int& nvec)->void
80+
{ hsolver::fvec::div_prevec(dst, src, ld, nvec, precondition.data()); };
8181
// Allow 5 tries at most. If ntry > ntry_max = 5, exit diag loop.
8282
const int ntry_max = 5;
8383
// In non-self consistent calculation, do until totally converged. Else allow 5 eigenvecs to be NOT
@@ -90,8 +90,8 @@ namespace LR
9090
}
9191
else if (method == "dav_subspace") //need refactor
9292
{
93-
auto pre_func = [&precondition](T* ptr, const Real<T>* eig, const int& ld, const int& nvec)->void
94-
{ hsolver::fvec::div_trans_prevec_minus_eigen(ptr, eig, ld, nvec, precondition.data()); };
93+
auto pre_func = [&precondition](T* const dst, const T* const src, const Real<T>* eig, const int& ld, const int& nvec)->void
94+
{ hsolver::fvec::div_trans_prevec_minus_eigen(dst, src, eig, ld, nvec, precondition.data()); };
9595
hsolver::Diago_DavSubspace<T> dav_subspace(pre_func,
9696
nband,
9797
dim,

0 commit comments

Comments
 (0)