Skip to content

Commit 1dda41c

Browse files
authored
Fix: fix cal_force_scc (#1870)
* Fix: fix typo in force_scc * Fix: fix rhocgnt init * revert cal_force_scc to origin serial verion and do simple parallelization
1 parent e373a3d commit 1dda41c

File tree

1 file changed

+52
-101
lines changed

1 file changed

+52
-101
lines changed

source/module_hamilt_pw/hamilt_pwdft/forces.cpp

Lines changed: 52 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -1132,133 +1132,84 @@ void Forces<FPTYPE, Device>::cal_force_scc(ModuleBase::matrix& forcescc, ModuleP
11321132
}
11331133
}
11341134

1135+
// work space
1136+
double* rhocgnt = new double[rho_basis->ngg];
1137+
ModuleBase::GlobalFunc::ZEROS(rhocgnt, rho_basis->ngg);
1138+
11351139
rho_basis->real2recip(psic, psic);
11361140

11371141
int igg0 = 0;
11381142
const int ig0 = rho_basis->ig_gge0;
1139-
const int ig_gap = (ig0 >= 0 && ig0 < rho_basis->npw) ? ig0 : -1;
11401143
if (rho_basis->gg_uniq[0] < 1.0e-8)
11411144
igg0 = 1;
11421145

1143-
double* rhocgnt = new double[rho_basis->ngg];
1144-
1145-
#ifdef _OPENMP
1146-
#pragma omp parallel
1147-
{
1148-
int num_threads = omp_get_num_threads();
1149-
int thread_id = omp_get_thread_num();
1150-
#else
1151-
int num_threads = 1;
1152-
int thread_id = 0;
1153-
#endif
1154-
1155-
// thread local work space
1156-
double *aux = new double[ndm];
1157-
ModuleBase::GlobalFunc::ZEROS(aux, ndm);
1158-
1159-
int ig_beg, ig_length, ig_end;
1160-
ModuleBase::TASK_DIST_1D(num_threads, thread_id, rho_basis->ngg, ig_beg, ig_length);
1161-
ModuleBase::GlobalFunc::ZEROS(rhocgnt + ig_beg, ig_length);
1162-
ig_end = ig_beg + ig_length;
1163-
11641146
double fact = 2.0;
1165-
int last_it = -1;
1166-
1167-
for (int iat = 0; iat < GlobalC::ucell.nat; ++iat)
1147+
for (int nt = 0; nt < GlobalC::ucell.ntype; nt++)
11681148
{
1169-
int it = GlobalC::ucell.iat2it[iat];
1170-
int ia = GlobalC::ucell.iat2ia[iat];
1171-
1172-
// initialize rhocgnt when `it` is changed
1173-
if (it != last_it)
1149+
// Here we compute the G.ne.0 term
1150+
const int mesh = GlobalC::ucell.atoms[nt].ncpp.msh;
1151+
#ifdef _OPENMP
1152+
#pragma omp parallel for
1153+
#endif
1154+
for (int ig = igg0; ig < rho_basis->ngg; ++ig)
11741155
{
1175-
// Here we compute the G.ne.0 term
1176-
const int mesh = GlobalC::ucell.atoms[it].ncpp.msh;
1177-
1178-
for (int ig = std::max(ig_beg, igg0); ig < ig_end; ++ig)
1156+
double* aux = new double[mesh];
1157+
const double gx = sqrt(rho_basis->gg_uniq[ig]) * GlobalC::ucell.tpiba;
1158+
for (int ir = 0; ir < mesh; ir++)
11791159
{
1180-
const double gx = sqrt(rho_basis->gg_uniq[ig]) * GlobalC::ucell.tpiba;
1181-
for (int ir = 0; ir < mesh; ir++)
1160+
if (GlobalC::ucell.atoms[nt].ncpp.r[ir] < 1.0e-8)
11821161
{
1183-
if (GlobalC::ucell.atoms[it].ncpp.r[ir] < 1.0e-8)
1184-
{
1185-
aux[ir] = GlobalC::ucell.atoms[it].ncpp.rho_at[ir];
1186-
}
1187-
else
1188-
{
1189-
const double gxx = gx * GlobalC::ucell.atoms[it].ncpp.r[ir];
1190-
aux[ir] = GlobalC::ucell.atoms[it].ncpp.rho_at[ir] * ModuleBase::libm::sin(gxx) / gxx;
1191-
}
1162+
aux[ir] = GlobalC::ucell.atoms[nt].ncpp.rho_at[ir];
1163+
}
1164+
else
1165+
{
1166+
const double gxx = gx * GlobalC::ucell.atoms[nt].ncpp.r[ir];
1167+
aux[ir] = GlobalC::ucell.atoms[nt].ncpp.rho_at[ir] * ModuleBase::libm::sin(gxx) / gxx;
11921168
}
1193-
ModuleBase::Integral::Simpson_Integral(mesh, aux, GlobalC::ucell.atoms[it].ncpp.rab, rhocgnt[ig]);
11941169
}
1195-
1196-
// record it
1197-
last_it = it;
1198-
1199-
// wait for rhocgnt update
1200-
#ifdef _OPENMP
1201-
#pragma omp barrier
1202-
#endif
1170+
ModuleBase::Integral::Simpson_Integral(mesh, aux, GlobalC::ucell.atoms[nt].ncpp.rab, rhocgnt[ig]);
1171+
delete[] aux; // mohan fix bug 2012-03-22
12031172
}
12041173

1205-
const ModuleBase::Vector3<double> pos = GlobalC::ucell.atoms[it].tau[ia];
1206-
1207-
double force[3] = {0, 0, 0};
1208-
1209-
const auto ig_loop = [&](int start, int stop)
1174+
int iat = 0;
1175+
for (int it = 0; it < GlobalC::ucell.ntype; it++)
12101176
{
1211-
for (int ig = start; ig < stop; ++ig)
1177+
for (int ia = 0; ia < GlobalC::ucell.atoms[it].na; ia++)
12121178
{
1213-
const ModuleBase::Vector3<double> gv = rho_basis->gcar[ig];
1214-
const double rhocgntigg = rhocgnt[GlobalC::rhopw->ig2igg[ig]];
1215-
const double arg = ModuleBase::TWO_PI * (gv * pos);
1216-
double sinp, cosp;
1217-
ModuleBase::libm::sincos(arg, &sinp, &cosp);
1218-
const std::complex<double> cpm = std::complex<double>(sinp, cosp) * conj(psic[ig]);
1219-
1220-
force[0] += fact * rhocgntigg * GlobalC::ucell.tpiba * gv.x * cpm.real();
1221-
force[1] += fact * rhocgntigg * GlobalC::ucell.tpiba * gv.y * cpm.real();
1222-
force[2] += fact * rhocgntigg * GlobalC::ucell.tpiba * gv.z * cpm.real();
1223-
}
1224-
};
1225-
1226-
ig_loop(ig_beg, std::min(ig_gap, ig_end));
1227-
ig_loop(ig_gap + 1, ig_end);
1228-
1179+
if (nt == it)
1180+
{
1181+
const ModuleBase::Vector3<double> pos = GlobalC::ucell.atoms[it].tau[ia];
1182+
double &force0 = forcescc(iat, 0), &force1 = forcescc(iat, 1), &force2 = forcescc(iat, 2);
12291183
#ifdef _OPENMP
1230-
if (num_threads > 1)
1231-
{
1232-
#pragma omp atomic
1233-
forcescc(iat, 0) += force[0];
1234-
#pragma omp atomic
1235-
forcescc(iat, 1) += force[1];
1236-
#pragma omp atomic
1237-
forcescc(iat, 2) += force[2];
1238-
}
1239-
else
1184+
#pragma omp parallel for reduction(+:force0) reduction(+:force1) reduction(+:force2)
12401185
#endif
1241-
{
1242-
forcescc(iat, 0) += force[0];
1243-
forcescc(iat, 1) += force[1];
1244-
forcescc(iat, 2) += force[2];
1186+
for (int ig = 0; ig < rho_basis->npw; ++ig)
1187+
{
1188+
if (ig == ig0)
1189+
continue;
1190+
const ModuleBase::Vector3<double> gv = rho_basis->gcar[ig];
1191+
const double rhocgntigg = rhocgnt[GlobalC::rhopw->ig2igg[ig]];
1192+
const double arg = ModuleBase::TWO_PI * (gv * pos);
1193+
double sinp, cosp;
1194+
ModuleBase::libm::sincos(arg, &sinp, &cosp);
1195+
const std::complex<double> cpm = std::complex<double>(sinp, cosp) * conj(psic[ig]);
1196+
1197+
force0 += fact * rhocgntigg * GlobalC::ucell.tpiba * gv.x * cpm.real();
1198+
force1 += fact * rhocgntigg * GlobalC::ucell.tpiba * gv.y * cpm.real();
1199+
force2 += fact * rhocgntigg * GlobalC::ucell.tpiba * gv.z * cpm.real();
1200+
}
1201+
// std::cout << " forcescc = " << forcescc(iat,0) << " " << forcescc(iat,1) << " " <<
1202+
// forcescc(iat,2) << std::endl;
1203+
}
1204+
iat++;
1205+
}
12451206
}
1246-
1247-
// std::cout << " forcescc = " << forcescc(iat,0) << " " << forcescc(iat,1) << " " <<
1248-
// forcescc(iat,2) << std::endl;
12491207
}
12501208

1251-
delete[] aux;
1252-
1253-
#ifdef _OPENMP
1254-
}
1255-
#endif
1256-
1257-
delete[] rhocgnt;
1258-
12591209
Parallel_Reduce::reduce_double_pool(forcescc.c, forcescc.nr * forcescc.nc);
12601210

12611211
delete[] psic; // mohan fix bug 2012-03-22
1212+
delete[] rhocgnt; // mohan fix bug 2012-03-22
12621213

12631214
ModuleBase::timer::tick("Forces", "cal_force_scc");
12641215
return;

0 commit comments

Comments
 (0)