Skip to content

Commit 196fc4e

Browse files
committed
fix the parallel copy of eigenvectors
1 parent 00f6b96 commit 196fc4e

File tree

4 files changed

+79
-27
lines changed

4 files changed

+79
-27
lines changed

source/module_lr/hamilt_casida.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ namespace LR
88
const int no = this->nocc[0];
99
const int nv = this->nvirt[0];
1010
const auto& px = this->pX[0];
11-
const int nloc_per_band = nk * px.get_local_size();
11+
const int ldim = nk * px.get_local_size();
1212
int npairs = no * nv;
1313
std::vector<T> Amat_full(this->nk * npairs * this->nk * npairs, 0.0);
1414
for (int ik = 0;ik < this->nk;++ik)
@@ -30,7 +30,7 @@ namespace LR
3030
hamilt::Operator<T>* node(this->ops);
3131
while (node != nullptr)
3232
{ // act() on and return the k1-first type of psi
33-
node->act(1, nloc_per_band, /*npol=*/1, X_bj.get_pointer(), A_aibj.get_pointer());
33+
node->act(1, ldim, /*npol=*/1, X_bj.get_pointer(), A_aibj.get_pointer());
3434
node = (hamilt::Operator<T>*)(node->next_op);
3535
}
3636
// reduce ai for a fixed bj

source/module_lr/hamilt_casida.h

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -128,17 +128,11 @@ namespace LR
128128
for (int ik = 0;ik < nk;++ik) { this->DM_trans->set_DMK_pointer(ik, dm_trans_2d[ik].data<T>()); }
129129
};
130130
}
131-
~HamiltLR()
132-
{
133-
if (this->ops != nullptr)
134-
{
135-
delete this->ops;
136-
}
137-
};
131+
~HamiltLR() { delete this->ops; }
138132

139-
virtual std::vector<T> matrix()const;
133+
std::vector<T> matrix()const;
140134

141-
virtual void hPsi(const T* psi_in, T* hpsi, const int ld_psi, const int& nband) const
135+
void hPsi(const T* psi_in, T* hpsi, const int ld_psi, const int& nband) const
142136
{
143137
assert(ld_psi == nk * pX[0].get_local_size());
144138
for (int ib = 0;ib < nband;++ib)
@@ -154,6 +148,30 @@ namespace LR
154148
}
155149
}
156150

151+
void global2local(T* lvec, const T* gvec, const int& nband) const
152+
{
153+
const int npairs = nocc[0] * nvirt[0];
154+
for (int ib = 0;ib < nband;++ib)
155+
{
156+
const int loffset_b = ib * nk * pX[0].get_local_size();
157+
const int goffset_b = ib * nk * npairs;
158+
for (int ik = 0;ik < nk;++ik)
159+
{
160+
const int loffset = loffset_b + ik * pX[0].get_local_size();
161+
const int goffset = goffset_b + ik * npairs;
162+
for (int lo = 0;lo < pX[0].get_col_size();++lo)
163+
{
164+
const int go = pX[0].local2global_col(lo);
165+
for (int lv = 0;lv < pX[0].get_row_size();++lv)
166+
{
167+
const int gv = pX[0].local2global_row(lv);
168+
lvec[loffset + lo * pX[0].get_row_size() + lv] = gvec[goffset + go * nvirt[0] + gv];
169+
}
170+
}
171+
}
172+
}
173+
}
174+
157175
private:
158176
const std::vector<int>& nocc;
159177
const std::vector<int>& nvirt;

source/module_lr/hamilt_ulr.hpp

Lines changed: 43 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ namespace LR
3535
const std::vector<Parallel_2D>& pX_in, ///< {up, down}
3636
const Parallel_2D& pc_in,
3737
const Parallel_Orbitals& pmat_in) :nocc(nocc), nvirt(nvirt), pX(pX_in), nk(kv_in.get_nks() / nspin),
38-
nloc_per_band(nk* pX[0].get_local_size() + nk * pX[1].get_local_size())
38+
ldim(nk* pX[0].get_local_size() + nk * pX[1].get_local_size()),
39+
gdim(nk* std::inner_product(nocc.begin(), nocc.end(), nvirt.begin(), 0))
3940
{
4041
ModuleBase::TITLE("HamiltULR", "HamiltULR");
4142
this->DM_trans = LR_Util::make_unique<elecstate::DensityMatrix<T, T>>(&pmat_in, 1, kv_in.kvec_d, nk);
@@ -92,7 +93,7 @@ namespace LR
9293
void hPsi(const T* psi_in, T* hpsi, const int ld_psi, const int& nband) const
9394
{
9495
ModuleBase::TITLE("HamiltULR", "hPsi");
95-
assert(ld_psi == this->nloc_per_band);
96+
assert(ld_psi == this->ldim);
9697
const std::vector<int64_t> xdim_is = { nk * pX[0].get_local_size(), nk * pX[1].get_local_size() };
9798
/// band-wise act (also works for close-shell, but not efficient)
9899
for (int ib = 0;ib < nband;++ib)
@@ -121,8 +122,7 @@ namespace LR
121122
const std::vector<int> npairs = { this->nocc[0] * this->nvirt[0], this->nocc[1] * this->nvirt[1] };
122123
const std::vector<int64_t> ldim_is = { nk * pX[0].get_local_size(), nk * pX[1].get_local_size() };
123124
const std::vector<int> gdim_is = { nk * npairs[0], nk * npairs[1] };
124-
const int global_size = this->nk * (npairs[0] + npairs[1]);
125-
std::vector<T> Amat_full(global_size * global_size);
125+
std::vector<T> Amat_full(gdim * gdim);
126126
for (int is_bj : {0, 1})
127127
{
128128
const int no = this->nocc[is_bj];
@@ -137,13 +137,13 @@ namespace LR
137137
for (int b = 0;b < nv;++b)
138138
{
139139
const int gcol = goffset_bj + ik_bj * npairs[is_bj] + j * nv + b;//global
140-
std::vector<T> X_bj(this->nloc_per_band, T(0));
140+
std::vector<T> X_bj(this->ldim, T(0));
141141
const int lj = px.global2local_col(j);
142142
const int lb = px.global2local_row(b);
143143
const int lcol = loffset_bj + ik_bj * px.get_local_size() + lj * px.get_row_size() + lb;//local
144144
if (px.in_this_processor(b, j)) { X_bj[lcol] = T(1); }
145145
this->cal_dm_trans(is_bj, X_bj.data() + loffset_bj);
146-
std::vector<T> Aloc_col(this->nloc_per_band, T(0)); // a col of A matrix (local)
146+
std::vector<T> Aloc_col(this->ldim, T(0)); // a col of A matrix (local)
147147
for (int is_ai : {0, 1})
148148
{
149149
const int goffset_ai = is_ai * gdim_is[0];
@@ -159,22 +159,54 @@ namespace LR
159159
for (int ik_ai = 0;ik_ai < this->nk;++ik_ai)
160160
{
161161
LR_Util::gather_2d_to_full(pax, Aloc_col.data() + loffset_ai + ik_ai * pax.get_local_size(),
162-
Amat_full.data() + gcol * global_size /*col, bj*/ + goffset_ai + ik_ai * npairs[is_ai]/*row, ai*/,
162+
Amat_full.data() + gcol * gdim /*col, bj*/ + goffset_ai + ik_ai * npairs[is_ai]/*row, ai*/,
163163
false, nv, no);
164164
}
165165
#else
166-
std::memcpy(Amat_full.data() + gcol * global_size + goffset_ai, Aloc_col.data() + goffset_ai, gdim_is[is_ai] * sizeof(T));
166+
std::memcpy(Amat_full.data() + gcol * gdim + goffset_ai, Aloc_col.data() + goffset_ai, gdim_is[is_ai] * sizeof(T));
167167
#endif
168168
}
169169
}
170170
}
171171
}
172172
}
173173
std::cout << "Full A matrix:" << std::endl;
174-
LR_Util::print_value(Amat_full.data(), global_size, global_size);
174+
LR_Util::print_value(Amat_full.data(), gdim, gdim);
175175
return Amat_full;
176176
}
177177

178+
/// copy global data (eigenvectors) to local memory
179+
void global2local(T* lvec, const T* gvec, const int& nband) const
180+
{
181+
const std::vector<int> npairs = { this->nocc[0] * this->nvirt[0], this->nocc[1] * this->nvirt[1] };
182+
const std::vector<int64_t> ldim_is = { nk * pX[0].get_local_size(), nk * pX[1].get_local_size() };
183+
const std::vector<int> gdim_is = { nk * npairs[0], nk * npairs[1] };
184+
for (int ib = 0;ib < nband;++ib)
185+
{
186+
const int loffset_b = ib * this->ldim;
187+
const int goffset_b = ib * this->gdim;
188+
for (int is : {0, 1})
189+
{
190+
const int loffset_bs = loffset_b + is * ldim_is[0];
191+
const int goffset_bs = goffset_b + is * gdim_is[0];
192+
for (int ik = 0;ik < nk;++ik)
193+
{
194+
const int loffset = loffset_bs + ik * pX[is].get_local_size();
195+
const int goffset = goffset_bs + ik * npairs[is];
196+
for (int lo = 0;lo < pX[is].get_col_size();++lo)
197+
{
198+
const int go = pX[is].local2global_col(lo);
199+
for (int lv = 0;lv < pX[is].get_row_size();++lv)
200+
{
201+
const int gv = pX[is].local2global_row(lv);
202+
lvec[loffset + lo * pX[is].get_row_size() + lv] = gvec[goffset + go * nvirt[is] + gv];
203+
}
204+
}
205+
}
206+
}
207+
}
208+
}
209+
178210
private:
179211
const std::vector<int>& nocc;
180212
const std::vector<int>& nvirt;
@@ -183,7 +215,8 @@ namespace LR
183215

184216

185217
const int nk = 1;
186-
const int nloc_per_band = 1;
218+
const int ldim = 1;
219+
const int gdim = 1;
187220

188221
/// 4 operator lists: uu, ud, du, dd
189222
std::vector<hamilt::Operator<T>*> ops;

source/module_lr/hsolver_lrtd.hpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,17 +50,18 @@ namespace LR
5050
if (method == "lapack")
5151
{
5252
std::vector<T> Amat_full = hm.matrix();
53-
eigenvalue.resize(dim);
54-
if (hermitian) { LR_Util::diag_lapack(dim, Amat_full.data(), eigenvalue.data()); }
53+
const int gdim = std::sqrt(Amat_full.size());
54+
eigenvalue.resize(gdim);
55+
if (hermitian) { LR_Util::diag_lapack(gdim, Amat_full.data(), eigenvalue.data()); }
5556
else
5657
{
57-
std::vector<std::complex<double>> eig_complex(dim);
58-
LR_Util::diag_lapack_nh(dim, Amat_full.data(), eig_complex.data());
58+
std::vector<std::complex<double>> eig_complex(gdim);
59+
LR_Util::diag_lapack_nh(gdim, Amat_full.data(), eig_complex.data());
5960
print_eigs(eig_complex, "Right eigenvalues: of the non-Hermitian matrix: (Ry)");
60-
for (int i = 0; i < dim; i++) { eigenvalue[i] = eig_complex[i].real(); }
61+
for (int i = 0; i < gdim; i++) { eigenvalue[i] = eig_complex[i].real(); }
6162
}
6263
// copy eigenvectors
63-
std::memcpy(psi, Amat_full.data(), sizeof(T) * dim * nband);
64+
hm.global2local(psi, Amat_full.data(), nband);
6465
}
6566
else
6667
{

0 commit comments

Comments
 (0)