Skip to content

Commit 6b415e3

Browse files
committed
Fix RT-TDDFT current calculation crash using >20 MPI processes
1 parent 7f9dc96 commit 6b415e3

File tree

1 file changed

+65
-61
lines changed

1 file changed

+65
-61
lines changed

source/module_hamilt_lcao/module_tddft/td_current.cpp

Lines changed: 65 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
#include "module_base/tool_title.h"
55
#include "module_hamilt_lcao/module_tddft/snap_psibeta_half_tddft.h"
66
#ifdef _OPENMP
7-
#include <unordered_set>
87
#include <omp.h>
8+
#include <unordered_set>
99
#endif
1010

1111
TD_current::TD_current(const UnitCell* ucell_in,
@@ -14,28 +14,28 @@ TD_current::TD_current(const UnitCell* ucell_in,
1414
const LCAO_Orbitals& orb,
1515
const TwoCenterIntegrator* intor)
1616
: ucell(ucell_in), paraV(paraV), orb_(orb), Grid(GridD_in), intor_(intor)
17-
{
17+
{
1818
// for length gague, the A(t) = 0 for all the time.
19-
this->cart_At = ModuleBase::Vector3<double>(0,0,0);
19+
this->cart_At = ModuleBase::Vector3<double>(0, 0, 0);
2020
this->initialize_vcomm_r(GridD_in, paraV);
2121
this->initialize_grad_term(GridD_in, paraV);
2222
}
2323
TD_current::~TD_current()
2424
{
25-
for (int dir=0;dir<3;dir++)
25+
for (int dir = 0; dir < 3; dir++)
2626
{
2727
delete this->current_term[dir];
2828
}
2929
}
30-
//allocate space for current_term
30+
// allocate space for current_term
3131
void TD_current::initialize_vcomm_r(const Grid_Driver* GridD, const Parallel_Orbitals* paraV)
3232
{
3333
ModuleBase::TITLE("TD_current", "initialize_vcomm_r");
3434
ModuleBase::timer::tick("TD_current", "initialize_vcomm_r");
35-
for (int dir=0;dir<3;dir++)
35+
for (int dir = 0; dir < 3; dir++)
3636
{
3737
if (this->current_term[dir] == nullptr)
38-
this->current_term[dir] = new hamilt::HContainer<std::complex<double>>(paraV);
38+
this->current_term[dir] = new hamilt::HContainer<std::complex<double>>(paraV);
3939
}
4040

4141
this->adjs_vcommr.clear();
@@ -56,8 +56,8 @@ void TD_current::initialize_vcomm_r(const Grid_Driver* GridD, const Parallel_Orb
5656
const ModuleBase::Vector3<double>& tau1 = adjs.adjacent_tau[ad1];
5757
const ModuleBase::Vector3<int>& R_index1 = adjs.box[ad1];
5858
// choose the real adjacent atoms
59-
// Note: the distance of atoms should less than the cutoff radius,
60-
// When equal, the theoretical value of matrix element is zero,
59+
// Note: the distance of atoms should less than the cutoff radius,
60+
// When equal, the theoretical value of matrix element is zero,
6161
// but the calculated value is not zero due to the numerical error, which would lead to result changes.
6262
if (this->ucell->cal_dtau(iat0, iat1, R_index1).norm() * this->ucell->lat0
6363
< orb_.Phi[T1].getRcut() + this->ucell->infoNL.Beta[T0].get_rcut_max())
@@ -84,20 +84,20 @@ void TD_current::initialize_vcomm_r(const Grid_Driver* GridD, const Parallel_Orb
8484
continue;
8585
}
8686
hamilt::AtomPair<std::complex<double>> tmp(iat1,
87-
iat2,
88-
R_index2.x - R_index1.x,
89-
R_index2.y - R_index1.y,
90-
R_index2.z - R_index1.z,
91-
paraV);
92-
for (int dir=0;dir<3;dir++)
87+
iat2,
88+
R_index2.x - R_index1.x,
89+
R_index2.y - R_index1.y,
90+
R_index2.z - R_index1.z,
91+
paraV);
92+
for (int dir = 0; dir < 3; dir++)
9393
{
9494
this->current_term[dir]->insert_pair(tmp);
9595
}
9696
}
9797
}
9898
}
9999
// allocate the memory of BaseMatrix in cal_vcomm_r_IJR, and set the new values to zero
100-
for (int dir=0;dir<3;dir++)
100+
for (int dir = 0; dir < 3; dir++)
101101
{
102102
this->current_term[dir]->allocate(nullptr, true);
103103
}
@@ -108,10 +108,10 @@ void TD_current::initialize_grad_term(const Grid_Driver* GridD, const Parallel_O
108108
ModuleBase::TITLE("TD_current", "initialize_grad_term");
109109
ModuleBase::timer::tick("TD_current", "initialize_grad_term");
110110

111-
for (int dir=0;dir<3;dir++)
111+
for (int dir = 0; dir < 3; dir++)
112112
{
113113
if (this->current_term[dir] == nullptr)
114-
this->current_term[dir] = new hamilt::HContainer<std::complex<double>>(paraV);
114+
this->current_term[dir] = new hamilt::HContainer<std::complex<double>>(paraV);
115115
}
116116
for (int iat1 = 0; iat1 < ucell->nat; iat1++)
117117
{
@@ -132,8 +132,8 @@ void TD_current::initialize_grad_term(const Grid_Driver* GridD, const Parallel_O
132132
}
133133
const ModuleBase::Vector3<int>& R_index2 = adjs.box[ad1];
134134
// choose the real adjacent atoms
135-
// Note: the distance of atoms should less than the cutoff radius,
136-
// When equal, the theoretical value of matrix element is zero,
135+
// Note: the distance of atoms should less than the cutoff radius,
136+
// When equal, the theoretical value of matrix element is zero,
137137
// but the calculated value is not zero due to the numerical error, which would lead to result changes.
138138
if (this->ucell->cal_dtau(iat1, iat2, R_index2).norm() * this->ucell->lat0
139139
< orb_.Phi[T1].getRcut() + orb_.Phi[T2].getRcut())
@@ -150,14 +150,14 @@ void TD_current::initialize_grad_term(const Grid_Driver* GridD, const Parallel_O
150150
int iat2 = ucell->itia2iat(T2, I2);
151151
ModuleBase::Vector3<int>& R_index = adjs.box[ad];
152152
hamilt::AtomPair<std::complex<double>> tmp(iat1, iat2, R_index.x, R_index.y, R_index.z, paraV);
153-
for (int dir=0;dir<3;dir++)
153+
for (int dir = 0; dir < 3; dir++)
154154
{
155155
this->current_term[dir]->insert_pair(tmp);
156156
}
157157
}
158158
}
159159
// allocate the memory of BaseMatrix in HR, and set the new values to zero
160-
for (int dir=0;dir<3;dir++)
160+
for (int dir = 0; dir < 3; dir++)
161161
{
162162
this->current_term[dir]->allocate(nullptr, true);
163163
}
@@ -188,9 +188,9 @@ void TD_current::calculate_vcomm_r()
188188
nlm_tot[i].resize(4);
189189
}
190190

191-
#pragma omp parallel
191+
#pragma omp parallel
192192
{
193-
#pragma omp for schedule(dynamic)
193+
#pragma omp for schedule(dynamic)
194194
for (int ad = 0; ad < adjs.adj_num + 1; ++ad)
195195
{
196196
const int T1 = adjs.ntype[ad];
@@ -214,27 +214,27 @@ void TD_current::calculate_vcomm_r()
214214

215215
// snap_psibeta_half_tddft() are used to calculate <psi|exp(-iAr)|beta>
216216
// and <psi|rexp(-iAr)|beta> as well if current are needed
217-
217+
218218
module_tddft::snap_psibeta_half_tddft(orb_,
219-
this->ucell->infoNL,
220-
nlm,
221-
tau1 * this->ucell->lat0,
222-
T1,
223-
atom1->iw2l[iw1],
224-
atom1->iw2m[iw1],
225-
atom1->iw2n[iw1],
226-
tau0 * this->ucell->lat0,
227-
T0,
228-
this->cart_At,
229-
true);
219+
this->ucell->infoNL,
220+
nlm,
221+
tau1 * this->ucell->lat0,
222+
T1,
223+
atom1->iw2l[iw1],
224+
atom1->iw2m[iw1],
225+
atom1->iw2n[iw1],
226+
tau0 * this->ucell->lat0,
227+
T0,
228+
this->cart_At,
229+
true);
230230
for (int dir = 0; dir < 4; dir++)
231231
{
232232
nlm_tot[ad][dir].insert({all_indexes[iw1l], nlm[dir]});
233233
}
234234
}
235235
}
236236

237-
#ifdef _OPENMP
237+
#ifdef _OPENMP
238238
// record the iat number of the adjacent atoms
239239
std::set<int> ad_atom_set;
240240
for (int ad = 0; ad < adjs.adj_num + 1; ++ad)
@@ -250,28 +250,28 @@ void TD_current::calculate_vcomm_r()
250250
const int thread_id = omp_get_thread_num();
251251
std::set<int> ad_atom_set_thread;
252252
int i = 0;
253-
for(const auto iat1 : ad_atom_set)
253+
for (const auto iat1: ad_atom_set)
254254
{
255255
if (i % num_threads == thread_id)
256256
{
257257
ad_atom_set_thread.insert(iat1);
258258
}
259259
i++;
260260
}
261-
#endif
261+
#endif
262262

263-
// 2. calculate <psi_I|beta>D<beta|psi_{J,R}> for each pair of <IJR> atoms
263+
// 2. calculate <psi_I|beta>D<beta|psi_{J,R}> for each pair of <IJR> atoms
264264
for (int ad1 = 0; ad1 < adjs.adj_num + 1; ++ad1)
265265
{
266266
const int T1 = adjs.ntype[ad1];
267267
const int I1 = adjs.natom[ad1];
268268
const int iat1 = ucell->itia2iat(T1, I1);
269-
#ifdef _OPENMP
269+
#ifdef _OPENMP
270270
if (ad_atom_set_thread.find(iat1) == ad_atom_set_thread.end())
271-
{
272-
continue;
273-
}
274-
#endif
271+
{
272+
continue;
273+
}
274+
#endif
275275
ModuleBase::Vector3<int>& R_index1 = adjs.box[ad1];
276276
for (int ad2 = 0; ad2 < adjs.adj_num + 1; ++ad2)
277277
{
@@ -280,23 +280,22 @@ void TD_current::calculate_vcomm_r()
280280
const int iat2 = ucell->itia2iat(T2, I2);
281281
ModuleBase::Vector3<int>& R_index2 = adjs.box[ad2];
282282
ModuleBase::Vector3<int> R_vector(R_index2[0] - R_index1[0],
283-
R_index2[1] - R_index1[1],
284-
R_index2[2] - R_index1[2]);
283+
R_index2[1] - R_index1[1],
284+
R_index2[2] - R_index1[2]);
285285
std::complex<double>* tmp_c[3] = {nullptr, nullptr, nullptr};
286286
for (int i = 0; i < 3; i++)
287287
{
288-
tmp_c[i] = this->current_term[i]->find_matrix(iat1, iat2, R_vector[0], R_vector[1], R_vector[2])->get_pointer();
288+
hamilt::BaseMatrix<std::complex<double>>* matrix_ptr
289+
= this->current_term[i]->find_matrix(iat1, iat2, R_vector[0], R_vector[1], R_vector[2]);
290+
if (matrix_ptr != nullptr)
291+
{
292+
tmp_c[i] = matrix_ptr->get_pointer();
293+
}
289294
}
290295
// if not found , skip this pair of atoms
291296
if (tmp_c[0] != nullptr)
292297
{
293-
this->cal_vcomm_r_IJR(iat1,
294-
iat2,
295-
T0,
296-
paraV,
297-
nlm_tot[ad1],
298-
nlm_tot[ad2],
299-
tmp_c);
298+
this->cal_vcomm_r_IJR(iat1, iat2, T0, paraV, nlm_tot[ad1], nlm_tot[ad2], tmp_c);
300299
}
301300
}
302301
}
@@ -368,8 +367,8 @@ void TD_current::cal_vcomm_r_IJR(
368367
//<psi|rexp(-iAr)|beta><beta|exp(iAr)|psi>-<psi|exp(-iAr)|beta><beta|rexp(iAr)|psi>
369368
// multiply d in the end
370369
nlm_r_tmp += (nlm1[dir + 1]->at(p1) * std::conj(nlm2[0]->at(p2))
371-
- nlm1[0]->at(p1) * std::conj(nlm2[dir + 1]->at(p2)))
372-
* (*tmp_d);
370+
- nlm1[0]->at(p1) * std::conj(nlm2[dir + 1]->at(p2)))
371+
* (*tmp_d);
373372
}
374373
// -i[r,Vnl], 2.0 due to the unit transformation
375374
current_mat_p[dir][step_trace[is]] -= imag_unit * nlm_r_tmp / 2.0;
@@ -390,7 +389,7 @@ void TD_current::cal_vcomm_r_IJR(
390389
void TD_current::calculate_grad_term()
391390
{
392391
ModuleBase::TITLE("TD_current", "calculate_grad_term");
393-
if(this->current_term[0]==nullptr || this->current_term[0]->size_atom_pairs()<=0)
392+
if (this->current_term[0] == nullptr || this->current_term[0]->size_atom_pairs() <= 0)
394393
{
395394
ModuleBase::WARNING_QUIT("TD_current::calculate_grad_term", "grad_term is nullptr or empty");
396395
}
@@ -417,7 +416,12 @@ void TD_current::calculate_grad_term()
417416
std::complex<double>* tmp_c[3] = {nullptr, nullptr, nullptr};
418417
for (int i = 0; i < 3; i++)
419418
{
420-
tmp_c[i] = this->current_term[i]->find_matrix(iat1, iat2, R_index2)->get_pointer();
419+
hamilt::BaseMatrix<std::complex<double>>* matrix_ptr
420+
= this->current_term[i]->find_matrix(iat1, iat2, R_index2);
421+
if (matrix_ptr != nullptr)
422+
{
423+
tmp_c[i] = matrix_ptr->get_pointer();
424+
}
421425
}
422426
if (tmp_c[0] != nullptr)
423427
{
@@ -473,7 +477,7 @@ void TD_current::cal_grad_IJR(const int& iat1,
473477
auto row_indexes = paraV->get_indexes_row(iat1);
474478
auto col_indexes = paraV->get_indexes_col(iat2);
475479
const int step_trace = col_indexes.size() + 1;
476-
for(int iw1l = 0; iw1l < row_indexes.size(); iw1l += npol)
480+
for (int iw1l = 0; iw1l < row_indexes.size(); iw1l += npol)
477481
{
478482
const int iw1 = row_indexes[iw1l] / npol;
479483
const int L1 = iw2l1[iw1];
@@ -509,7 +513,7 @@ void TD_current::cal_grad_IJR(const int& iat1,
509513
for (int dir = 0; dir < 3; dir++)
510514
{
511515
current_mat_p[dir] += (npol - 1) * col_indexes.size();
512-
}
516+
}
513517
}
514518
}
515519

0 commit comments

Comments
 (0)