Skip to content

Commit 46b36ca

Browse files
wenfei-liwenfei-li
andauthored
Fix : force matrix not allocated but used (#1854)
Co-authored-by: wenfei-li <[email protected]>
1 parent 48d8625 commit 46b36ca

File tree

1 file changed

+130
-129
lines changed

1 file changed

+130
-129
lines changed

source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp

Lines changed: 130 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -399,90 +399,91 @@ void Force_LCAO_k::cal_foverlap_k(const bool isforce,
399399
const int T1 = GlobalC::ucell.iat2it[iat];
400400
Atom* atom1 = &GlobalC::ucell.atoms[T1];
401401
const int I1 = GlobalC::ucell.iat2ia[iat];
402-
{
403-
double *foverlap_iat = &foverlap(iat, 0);
402+
403+
double *foverlap_iat;
404+
if(isforce) foverlap_iat = &foverlap(iat, 0);
405+
404406
#ifdef _OPENMP
405-
// using local stack to avoid false sharing in multi-threaded case
406-
double foverlap_temp[3] = {0.0, 0.0, 0.0};
407-
if (num_threads > 1)
408-
{
409-
foverlap_iat = foverlap_temp;
410-
}
407+
// using local stack to avoid false sharing in multi-threaded case
408+
double foverlap_temp[3] = {0.0, 0.0, 0.0};
409+
if (num_threads > 1)
410+
{
411+
foverlap_iat = foverlap_temp;
412+
}
411413
#endif
412-
int irr = pv->nlocstart[iat];
413-
const int start1 = GlobalC::ucell.itiaiw2iwt(T1, I1, 0);
414-
for (int cb = 0; cb < ra.na_each[iat]; ++cb)
414+
int irr = pv->nlocstart[iat];
415+
const int start1 = GlobalC::ucell.itiaiw2iwt(T1, I1, 0);
416+
for (int cb = 0; cb < ra.na_each[iat]; ++cb)
417+
{
418+
const int T2 = ra.info[iat][cb][3];
419+
const int I2 = ra.info[iat][cb][4];
420+
const int start2 = GlobalC::ucell.itiaiw2iwt(T2, I2, 0);
421+
422+
Atom* atom2 = &GlobalC::ucell.atoms[T2];
423+
424+
for (int jj = 0; jj < atom1->nw; jj++)
415425
{
416-
const int T2 = ra.info[iat][cb][3];
417-
const int I2 = ra.info[iat][cb][4];
418-
const int start2 = GlobalC::ucell.itiaiw2iwt(T2, I2, 0);
426+
const int iw1_all = start1 + jj;
419427

420-
Atom* atom2 = &GlobalC::ucell.atoms[T2];
428+
// HPSEPS
429+
const int mu = pv->trace_loc_row[iw1_all];
430+
if (mu < 0)
431+
continue;
421432

422-
for (int jj = 0; jj < atom1->nw; jj++)
433+
for (int kk = 0; kk < atom2->nw; kk++)
423434
{
424-
const int iw1_all = start1 + jj;
435+
const int iw2_all = start2 + kk;
425436

426437
// HPSEPS
427-
const int mu = pv->trace_loc_row[iw1_all];
428-
if (mu < 0)
438+
const int nu = pv->trace_loc_col[iw2_all];
439+
if (nu < 0)
429440
continue;
430-
431-
for (int kk = 0; kk < atom2->nw; kk++)
441+
//==============================================================
442+
// here we use 'minus', but in GlobalV::GAMMA_ONLY_LOCAL we use 'plus',
443+
// both are correct because the 'DSloc_Rx' is used in 'row' (-),
444+
// however, the 'DSloc_x' in GAMMA is used in 'col' (+),
445+
// mohan update 2011-06-16
446+
//==============================================================
447+
for (int is = 0; is < GlobalV::NSPIN; ++is)
432448
{
433-
const int iw2_all = start2 + kk;
434-
435-
// HPSEPS
436-
const int nu = pv->trace_loc_col[iw2_all];
437-
if (nu < 0)
438-
continue;
439-
//==============================================================
440-
// here we use 'minus', but in GlobalV::GAMMA_ONLY_LOCAL we use 'plus',
441-
// both are correct because the 'DSloc_Rx' is used in 'row' (-),
442-
// however, the 'DSloc_x' in GAMMA is used in 'col' (+),
443-
// mohan update 2011-06-16
444-
//==============================================================
445-
for (int is = 0; is < GlobalV::NSPIN; ++is)
449+
double edm2d2 = 2.0 * edm2d[is][irr];
450+
if (isforce)
446451
{
447-
double edm2d2 = 2.0 * edm2d[is][irr];
448-
if (isforce)
449-
{
450-
foverlap_iat[0] -= edm2d2 * this->UHM->LM->DSloc_Rx[irr];
451-
foverlap_iat[1] -= edm2d2 * this->UHM->LM->DSloc_Ry[irr];
452-
foverlap_iat[2] -= edm2d2 * this->UHM->LM->DSloc_Rz[irr];
453-
}
454-
if (isstress)
452+
foverlap_iat[0] -= edm2d2 * this->UHM->LM->DSloc_Rx[irr];
453+
foverlap_iat[1] -= edm2d2 * this->UHM->LM->DSloc_Ry[irr];
454+
foverlap_iat[2] -= edm2d2 * this->UHM->LM->DSloc_Rz[irr];
455+
}
456+
if (isstress)
457+
{
458+
for (int ipol = 0; ipol < 3; ipol++)
455459
{
456-
for (int ipol = 0; ipol < 3; ipol++)
457-
{
458-
local_soverlap(0, ipol) += edm2d[is][irr] * this->UHM->LM->DSloc_Rx[irr]
459-
* this->UHM->LM->DH_r[irr * 3 + ipol];
460-
if (ipol < 1)
461-
continue;
462-
local_soverlap(1, ipol) += edm2d[is][irr] * this->UHM->LM->DSloc_Ry[irr]
463-
* this->UHM->LM->DH_r[irr * 3 + ipol];
464-
if (ipol < 2)
465-
continue;
466-
local_soverlap(2, ipol) += edm2d[is][irr] * this->UHM->LM->DSloc_Rz[irr]
467-
* this->UHM->LM->DH_r[irr * 3 + ipol];
468-
}
460+
local_soverlap(0, ipol) += edm2d[is][irr] * this->UHM->LM->DSloc_Rx[irr]
461+
* this->UHM->LM->DH_r[irr * 3 + ipol];
462+
if (ipol < 1)
463+
continue;
464+
local_soverlap(1, ipol) += edm2d[is][irr] * this->UHM->LM->DSloc_Ry[irr]
465+
* this->UHM->LM->DH_r[irr * 3 + ipol];
466+
if (ipol < 2)
467+
continue;
468+
local_soverlap(2, ipol) += edm2d[is][irr] * this->UHM->LM->DSloc_Rz[irr]
469+
* this->UHM->LM->DH_r[irr * 3 + ipol];
469470
}
470471
}
471-
++local_total_irr;
472-
++irr;
473-
} // end kk
474-
} // end jj
475-
} // end cb
472+
}
473+
++local_total_irr;
474+
++irr;
475+
} // end kk
476+
} // end jj
477+
} // end cb
476478
#ifdef _OPENMP
477-
if (isforce && num_threads > 1)
478-
{
479-
foverlap(iat, 0) += foverlap_iat[0];
480-
foverlap(iat, 1) += foverlap_iat[1];
481-
foverlap(iat, 2) += foverlap_iat[2];
482-
}
483-
#endif
479+
if (isforce && num_threads > 1)
480+
{
481+
foverlap(iat, 0) += foverlap_iat[0];
482+
foverlap(iat, 1) += foverlap_iat[1];
483+
foverlap(iat, 2) += foverlap_iat[2];
484484
}
485-
}
485+
#endif
486+
} // end iat
486487
#ifdef _OPENMP
487488
#pragma omp critical(cal_foverlap_k_reduce)
488489
{
@@ -557,77 +558,77 @@ void Force_LCAO_k::cal_ftvnl_dphi_k(double** dm2d,
557558
const int T1 = GlobalC::ucell.iat2it[iat];
558559
Atom* atom1 = &GlobalC::ucell.atoms[T1];
559560
const int I1 = GlobalC::ucell.iat2ia[iat];
560-
{
561-
int irr = pv->nlocstart[iat];
562-
const int start1 = GlobalC::ucell.itiaiw2iwt(T1, I1, 0);
563-
double *ftvnl_dphi_iat = &ftvnl_dphi(iat, 0);
561+
562+
int irr = pv->nlocstart[iat];
563+
const int start1 = GlobalC::ucell.itiaiw2iwt(T1, I1, 0);
564+
double *ftvnl_dphi_iat;
565+
if(isforce) ftvnl_dphi_iat = &ftvnl_dphi(iat, 0);
564566
#ifdef _OPENMP
565-
// using local stack to avoid false sharing in multi-threaded case
566-
double ftvnl_dphi_temp[3] = {0.0, 0.0, 0.0};
567-
if (num_threads > 1)
568-
{
569-
ftvnl_dphi_iat = ftvnl_dphi_temp;
570-
}
567+
// using local stack to avoid false sharing in multi-threaded case
568+
double ftvnl_dphi_temp[3] = {0.0, 0.0, 0.0};
569+
if (num_threads > 1)
570+
{
571+
ftvnl_dphi_iat = ftvnl_dphi_temp;
572+
}
571573
#endif
572-
for (int cb = 0; cb < ra.na_each[iat]; ++cb)
573-
{
574-
const int T2 = ra.info[iat][cb][3];
575-
const int I2 = ra.info[iat][cb][4];
576-
const int start2 = GlobalC::ucell.itiaiw2iwt(T2, I2, 0);
577-
Atom* atom2 = &GlobalC::ucell.atoms[T2];
574+
for (int cb = 0; cb < ra.na_each[iat]; ++cb)
575+
{
576+
const int T2 = ra.info[iat][cb][3];
577+
const int I2 = ra.info[iat][cb][4];
578+
const int start2 = GlobalC::ucell.itiaiw2iwt(T2, I2, 0);
579+
Atom* atom2 = &GlobalC::ucell.atoms[T2];
578580

579-
for (int jj = 0; jj < atom1->nw; ++jj)
581+
for (int jj = 0; jj < atom1->nw; ++jj)
582+
{
583+
const int iw1_all = start1 + jj;
584+
const int mu = pv->trace_loc_row[iw1_all];
585+
if (mu < 0)
586+
continue;
587+
for (int kk = 0; kk < atom2->nw; ++kk)
580588
{
581-
const int iw1_all = start1 + jj;
582-
const int mu = pv->trace_loc_row[iw1_all];
583-
if (mu < 0)
589+
const int iw2_all = start2 + kk;
590+
const int nu = pv->trace_loc_col[iw2_all];
591+
if (nu < 0)
584592
continue;
585-
for (int kk = 0; kk < atom2->nw; ++kk)
593+
//==============================================================
594+
// here we use 'minus', but in GlobalV::GAMMA_ONLY_LOCAL we use 'plus',
595+
// both are correct because the 'DSloc_Rx' is used in 'row' (-),
596+
// however, the 'DSloc_x' is used in 'col' (+),
597+
// mohan update 2011-06-16
598+
//==============================================================
599+
for (int is = 0; is < GlobalV::NSPIN; ++is)
586600
{
587-
const int iw2_all = start2 + kk;
588-
const int nu = pv->trace_loc_col[iw2_all];
589-
if (nu < 0)
590-
continue;
591-
//==============================================================
592-
// here we use 'minus', but in GlobalV::GAMMA_ONLY_LOCAL we use 'plus',
593-
// both are correct because the 'DSloc_Rx' is used in 'row' (-),
594-
// however, the 'DSloc_x' is used in 'col' (+),
595-
// mohan update 2011-06-16
596-
//==============================================================
597-
for (int is = 0; is < GlobalV::NSPIN; ++is)
601+
double dm2d2 = 2.0 * dm2d[is][irr];
602+
if (isforce)
598603
{
599-
double dm2d2 = 2.0 * dm2d[is][irr];
600-
if (isforce)
601-
{
602-
ftvnl_dphi_iat[0] += dm2d2 * this->UHM->LM->DHloc_fixedR_x[irr];
603-
ftvnl_dphi_iat[1] += dm2d2 * this->UHM->LM->DHloc_fixedR_y[irr];
604-
ftvnl_dphi_iat[2] += dm2d2 * this->UHM->LM->DHloc_fixedR_z[irr];
605-
}
606-
if (isstress)
607-
{
608-
local_stvnl_dphi(0, 0) -= dm2d[is][irr] * this->UHM->LM->stvnl11[irr];
609-
local_stvnl_dphi(0, 1) -= dm2d[is][irr] * this->UHM->LM->stvnl12[irr];
610-
local_stvnl_dphi(0, 2) -= dm2d[is][irr] * this->UHM->LM->stvnl13[irr];
611-
local_stvnl_dphi(1, 1) -= dm2d[is][irr] * this->UHM->LM->stvnl22[irr];
612-
local_stvnl_dphi(1, 2) -= dm2d[is][irr] * this->UHM->LM->stvnl23[irr];
613-
local_stvnl_dphi(2, 2) -= dm2d[is][irr] * this->UHM->LM->stvnl33[irr];
614-
}
604+
ftvnl_dphi_iat[0] += dm2d2 * this->UHM->LM->DHloc_fixedR_x[irr];
605+
ftvnl_dphi_iat[1] += dm2d2 * this->UHM->LM->DHloc_fixedR_y[irr];
606+
ftvnl_dphi_iat[2] += dm2d2 * this->UHM->LM->DHloc_fixedR_z[irr];
615607
}
616-
++local_total_irr;
617-
++irr;
618-
} // end kk
619-
} // end jj
620-
} // end cb
608+
if (isstress)
609+
{
610+
local_stvnl_dphi(0, 0) -= dm2d[is][irr] * this->UHM->LM->stvnl11[irr];
611+
local_stvnl_dphi(0, 1) -= dm2d[is][irr] * this->UHM->LM->stvnl12[irr];
612+
local_stvnl_dphi(0, 2) -= dm2d[is][irr] * this->UHM->LM->stvnl13[irr];
613+
local_stvnl_dphi(1, 1) -= dm2d[is][irr] * this->UHM->LM->stvnl22[irr];
614+
local_stvnl_dphi(1, 2) -= dm2d[is][irr] * this->UHM->LM->stvnl23[irr];
615+
local_stvnl_dphi(2, 2) -= dm2d[is][irr] * this->UHM->LM->stvnl33[irr];
616+
}
617+
}
618+
++local_total_irr;
619+
++irr;
620+
} // end kk
621+
} // end jj
622+
} // end cb
621623
#ifdef _OPENMP
622-
if (isforce && num_threads > 1)
623-
{
624-
ftvnl_dphi(iat, 0) += ftvnl_dphi_iat[0];
625-
ftvnl_dphi(iat, 1) += ftvnl_dphi_iat[1];
626-
ftvnl_dphi(iat, 2) += ftvnl_dphi_iat[2];
627-
}
628-
#endif
624+
if (isforce && num_threads > 1)
625+
{
626+
ftvnl_dphi(iat, 0) += ftvnl_dphi_iat[0];
627+
ftvnl_dphi(iat, 1) += ftvnl_dphi_iat[1];
628+
ftvnl_dphi(iat, 2) += ftvnl_dphi_iat[2];
629629
}
630-
}
630+
#endif
631+
} // end iat
631632
#ifdef _OPENMP
632633
#pragma omp critical(cal_ftvnl_dphi_k_reduce)
633634
{

0 commit comments

Comments
 (0)