Skip to content

Commit 53a7d9d

Browse files
committed
Merge branch 'develop' of github.com:abacusmodeling/abacus-develop into develop
2 parents a4c6f5a + 3b891d1 commit 53a7d9d

File tree

11 files changed

+1075
-520
lines changed

11 files changed

+1075
-520
lines changed

source/module_neighbor/sltk_grid_driver.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,3 +314,34 @@ ModuleBase::Vector3<double> Grid_Driver::Calculate_adjacent_site
314314

315315
return adjacent_site;
316316
}
317+
318+
#include "../src_pw/global.h"
319+
std::vector<std::tuple<int, int, ModuleBase::Vector3<int>, ModuleBase::Vector3<double>>> Grid_Driver::get_adjs(const size_t &iat)
320+
{
321+
const int it = GlobalC::ucell.iat2it[iat];
322+
const int ia = GlobalC::ucell.iat2ia[iat];
323+
const ModuleBase::Vector3<double> &tau = GlobalC::ucell.atoms[it].tau[ia];
324+
325+
std::vector<std::tuple<int, int, ModuleBase::Vector3<int>, ModuleBase::Vector3<double>>> adjs;
326+
GlobalC::GridD.Find_atom(GlobalC::ucell, tau, it, ia);
327+
for(int ad=0; ad<GlobalC::GridD.getAdjacentNum()+1; ad++)
328+
{
329+
const size_t it_ad = GlobalC::GridD.getType(ad);
330+
const size_t ia_ad = GlobalC::GridD.getNatom(ad);
331+
const ModuleBase::Vector3<int> box_ad = GlobalC::GridD.getBox(ad);
332+
const ModuleBase::Vector3<double> tau_ad = GlobalC::GridD.getAdjacentTau(ad);
333+
334+
adjs.push_back(std::make_tuple(it_ad, ia_ad, box_ad, tau_ad));
335+
}
336+
return adjs;
337+
}
338+
339+
std::vector<std::vector<std::tuple<int, int, ModuleBase::Vector3<int>, ModuleBase::Vector3<double>>>> Grid_Driver::get_adjs()
340+
{
341+
std::vector<std::vector<std::tuple<int, int, ModuleBase::Vector3<int>, ModuleBase::Vector3<double>>>> adjs(GlobalC::ucell.nat);
342+
for(size_t iat=0; iat<GlobalC::ucell.nat; iat++)
343+
{
344+
adjs[iat] = Grid_Driver::get_adjs(iat);
345+
}
346+
return adjs;
347+
}

source/module_neighbor/sltk_grid_driver.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "../module_base/global_variable.h"
1111
#include "../module_base/vector3.h"
1212
#include "../src_pw/pw_basis.h"
13+
#include <tuple>
1314

1415
class Grid_Driver : public Grid
1516
{
@@ -50,6 +51,9 @@ class Grid_Driver : public Grid
5051
const ModuleBase::Vector3<double>& getAdjacentTau(const int i) const { return adjacent_tau[i]; }
5152
const ModuleBase::Vector3<int>& getBox(const int i) const {return box[i];}
5253

54+
std::vector<std::tuple<int, int, ModuleBase::Vector3<int>, ModuleBase::Vector3<double>>> get_adjs(const size_t &iat);
55+
std::vector<std::vector<std::tuple<int, int, ModuleBase::Vector3<int>, ModuleBase::Vector3<double>>>> get_adjs();
56+
5357
private:
5458

5559
mutable int adj_num;

source/src_lcao/LCAO_gen_fixedH.cpp

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@
88
#include <map>
99
#include "../module_base/timer.h"
1010

11+
#ifdef __MKL
12+
#include <mkl_service.h>
13+
#endif
14+
15+
#ifdef _OPENMP
16+
#include <omp.h>
17+
#endif
18+
1119
LCAO_gen_fixedH::LCAO_gen_fixedH()
1220
{}
1321

@@ -1135,6 +1143,214 @@ void LCAO_gen_fixedH::build_Nonlocal_beta_new() //update by liuyu 2021-04-07
11351143
ModuleBase::TITLE("LCAO_gen_fixedH","build_Nonlocal_beta_new");
11361144
ModuleBase::timer::tick ("LCAO_gen_fixedH","build_Nonlocal_beta_new");
11371145

1146+
#ifdef __MKL
1147+
const int mkl_threads = mkl_get_max_threads();
1148+
mkl_set_num_threads(1);
1149+
#endif
1150+
1151+
const std::vector<std::vector<std::tuple<int, int, ModuleBase::Vector3<int>, ModuleBase::Vector3<double>>>> adjs_all = GlobalC::GridD.get_adjs();
1152+
1153+
#ifdef _OPENMP
1154+
#pragma omp parallel
1155+
{
1156+
double* Nonlocal_thread;
1157+
Nonlocal_thread = new double[GlobalC::ParaO.nloc];
1158+
ModuleBase::GlobalFunc::ZEROS(Nonlocal_thread, GlobalC::ParaO.nloc);
1159+
#pragma omp for schedule(dynamic)
1160+
#endif
1161+
for(int iat=0; iat<GlobalC::ucell.nat; iat++)
1162+
{
1163+
const int T0 = GlobalC::ucell.iat2it[iat];
1164+
const int I0 = GlobalC::ucell.iat2ia[iat];
1165+
Atom* atom0 = &GlobalC::ucell.atoms[T0];
1166+
1167+
//=======================================================
1168+
//Step1:
1169+
//saves <beta|psi>, where beta runs over L0,M0 on atom I0
1170+
//and psi runs over atomic basis sets on the current core
1171+
//=======================================================
1172+
#ifdef _OPENMP
1173+
std::vector<std::unordered_map<int,std::vector<double>>> nlm_tot_thread;
1174+
nlm_tot_thread.resize(adjs_all[iat].size());
1175+
#else
1176+
std::vector<std::unordered_map<int,std::vector<double>>> nlm_tot;
1177+
nlm_tot.resize(GlobalC::GridD.getAdjacentNum()+1);
1178+
#endif
1179+
1180+
const ModuleBase::Vector3<double> tau0 = atom0->tau[I0];
1181+
const double Rcut_Beta = GlobalC::ucell.infoNL.Beta[T0].get_rcut_max();
1182+
1183+
//outermost loop : all adjacent atoms
1184+
for(int ad_count=0; ad_count < adjs_all[iat].size(); ad_count++)
1185+
{
1186+
const int T1 = std::get<0>(adjs_all[iat][ad_count]);
1187+
const int I1 = std::get<1>(adjs_all[iat][ad_count]);
1188+
const int start1 = GlobalC::ucell.itiaiw2iwt(T1, I1, 0);
1189+
const double Rcut_AO1 = GlobalC::ORB.Phi[T1].getRcut();
1190+
const ModuleBase::Vector3<double> tau1 = std::get<3>(adjs_all[iat][ad_count]);
1191+
const Atom* atom1 = &GlobalC::ucell.atoms[T1];
1192+
const int nw1_tot = atom1->nw*GlobalV::NPOL;
1193+
1194+
#ifdef _OPENMP
1195+
nlm_tot_thread[ad_count].clear();
1196+
#else
1197+
nlm_tot[ad_count].clear();
1198+
#endif
1199+
1200+
//middle loop : atomic basis on current processor (either row or column)
1201+
const double dist1 = (tau1-tau0).norm() * GlobalC::ucell.lat0;
1202+
if (dist1 > Rcut_Beta + Rcut_AO1)
1203+
{
1204+
continue;
1205+
}
1206+
1207+
for(int iw1=0; iw1<nw1_tot; ++iw1)
1208+
{
1209+
const int iw1_all = start1 + iw1;
1210+
const int iw1_local = GlobalC::ParaO.trace_loc_row[iw1_all];
1211+
const int iw2_local = GlobalC::ParaO.trace_loc_col[iw1_all];
1212+
1213+
if(iw1_local < 0 && iw2_local < 0) continue;
1214+
1215+
const int iw1_0 = iw1/GlobalV::NPOL;
1216+
std::vector<std::vector<double>> nlm;
1217+
//2D, but first dimension is only 1 here
1218+
//for force, the right hand side is the gradient
1219+
//and the first dimension is then 3
1220+
//inner loop : all projectors (L0,M0)
1221+
GlobalC::UOT.snap_psibeta_half(
1222+
GlobalC::ORB,
1223+
GlobalC::ucell.infoNL,
1224+
nlm, tau1, T1,
1225+
atom1->iw2l[ iw1_0 ], // L1
1226+
atom1->iw2m[ iw1_0 ], // m1
1227+
atom1->iw2n[ iw1_0 ], // N1
1228+
GlobalC::ucell.atoms[T0].tau[I0], T0, 0); //R0,T0
1229+
1230+
#ifdef _OPENMP
1231+
nlm_tot_thread[ad_count].insert({iw1_all,nlm[0]});
1232+
#else
1233+
nlm_tot[ad_count].insert({iw1_all,nlm[0]});
1234+
#endif
1235+
}//end iw
1236+
}//end ad
1237+
1238+
//=======================================================
1239+
//Step2:
1240+
//calculate sum_(L0,M0) beta<psi_i|beta><beta|psi_j>
1241+
//and accumulate the value to Hloc_fixed(i,j)
1242+
//=======================================================
1243+
for(int ad1_count=0; ad1_count < adjs_all[iat].size(); ad1_count++)
1244+
{
1245+
const int T1 = std::get<0>(adjs_all[iat][ad1_count]);
1246+
const int I1 = std::get<1>(adjs_all[iat][ad1_count]);
1247+
const int start1 = GlobalC::ucell.itiaiw2iwt(T1, I1, 0);
1248+
const ModuleBase::Vector3<double> tau1 = std::get<3>(adjs_all[iat][ad1_count]);
1249+
const Atom* atom1 = &GlobalC::ucell.atoms[T1];
1250+
const int nw1_tot = atom1->nw*GlobalV::NPOL;
1251+
const double Rcut_AO1 = GlobalC::ORB.Phi[T1].getRcut();
1252+
1253+
for (int ad2_count=0; ad2_count < adjs_all[iat].size(); ad2_count++)
1254+
{
1255+
const int T2 = std::get<0>(adjs_all[iat][ad2_count]);
1256+
const int I2 = std::get<1>(adjs_all[iat][ad2_count]);
1257+
const int start2 = GlobalC::ucell.itiaiw2iwt(T2, I2, 0);
1258+
const ModuleBase::Vector3<double> tau2 = std::get<3>(adjs_all[iat][ad2_count]);
1259+
const Atom* atom2 = &GlobalC::ucell.atoms[T2];
1260+
const int nw2_tot = atom2->nw*GlobalV::NPOL;
1261+
const double Rcut_AO2 = GlobalC::ORB.Phi[T2].getRcut();
1262+
const double dist1 = (tau1-tau0).norm() * GlobalC::ucell.lat0;
1263+
const double dist2 = (tau2-tau0).norm() * GlobalC::ucell.lat0;
1264+
1265+
if (dist1 > Rcut_Beta + Rcut_AO1
1266+
|| dist2 > Rcut_Beta + Rcut_AO2)
1267+
{
1268+
continue;
1269+
}
1270+
1271+
for(int iw1=0; iw1<nw1_tot; ++iw1)
1272+
{
1273+
const int iw1_all = start1 + iw1;
1274+
const int iw1_local = GlobalC::ParaO.trace_loc_row[iw1_all];
1275+
if(iw1_local < 0) continue;
1276+
const int iw1_0 = iw1/GlobalV::NPOL;
1277+
for(int iw2=0; iw2<nw2_tot; ++iw2)
1278+
{
1279+
const int iw2_all = start2 + iw2;
1280+
const int iw2_local = GlobalC::ParaO.trace_loc_col[iw2_all];
1281+
if(iw2_local < 0) continue;
1282+
const int iw2_0 = iw2/GlobalV::NPOL;
1283+
#ifdef _OPENMP
1284+
std::vector<double> nlm1 = nlm_tot_thread[ad1_count][iw1_all];
1285+
std::vector<double> nlm2 = nlm_tot_thread[ad2_count][iw2_all];
1286+
#else
1287+
std::vector<double> nlm1 = nlm_tot[ad1_count][iw1_all];
1288+
std::vector<double> nlm2 = nlm_tot[ad2_count][iw2_all];
1289+
#endif
1290+
1291+
assert(nlm1.size()==nlm2.size());
1292+
#ifdef _OPENMP
1293+
double nlm_thread=0.0;
1294+
#else
1295+
double nlm=0.0;
1296+
#endif
1297+
const int nproj = GlobalC::ucell.infoNL.nproj[T0];
1298+
int ib = 0;
1299+
for(int nb = 0; nb < nproj; nb++)
1300+
{
1301+
const int L0 = GlobalC::ucell.infoNL.Beta[T0].Proj[nb].getL();
1302+
for(int m=0;m<2*L0+1;m++)
1303+
{
1304+
#ifdef _OPENMP
1305+
nlm_thread += nlm1[ib]*nlm2[ib]*GlobalC::ucell.atoms[T0].dion(nb,nb);
1306+
#else
1307+
nlm += nlm1[ib]*nlm2[ib]*GlobalC::ucell.atoms[T0].dion(nb,nb);
1308+
#endif
1309+
ib+=1;
1310+
}
1311+
}
1312+
assert(ib==nlm1.size());
1313+
1314+
const int ir = GlobalC::ParaO.trace_loc_row[ iw1_all ];
1315+
const int ic = GlobalC::ParaO.trace_loc_col[ iw2_all ];
1316+
long index=0;
1317+
if(GlobalV::KS_SOLVER=="genelpa" || GlobalV::KS_SOLVER=="scalapack_gvx")
1318+
{
1319+
index=ic*GlobalC::ParaO.nrow+ir;
1320+
}
1321+
else
1322+
{
1323+
index=ir*GlobalC::ParaO.ncol+ic;
1324+
}
1325+
#ifdef _OPENMP
1326+
Nonlocal_thread[index] += nlm_thread;
1327+
#else
1328+
GlobalC::LM.set_HSgamma(iw1_all,iw2_all,nlm,'N');
1329+
#endif
1330+
}//iw2
1331+
}//iw1
1332+
}//ad2
1333+
}//ad1
1334+
}//end iat
1335+
1336+
#ifdef _OPENMP
1337+
#pragma omp critical(cal_nonlocal)
1338+
{
1339+
for(int i=0; i<GlobalC::ParaO.nloc; i++)
1340+
{
1341+
GlobalC::LM.Hloc_fixed[i] += Nonlocal_thread[i];
1342+
}
1343+
}
1344+
delete[] Nonlocal_thread;
1345+
#endif
1346+
#ifdef _OPENMP
1347+
}
1348+
#endif
1349+
1350+
#ifdef __MKL
1351+
mkl_set_num_threads(mkl_threads);
1352+
#endif
1353+
/*
11381354
for (int T0 = 0; T0 < GlobalC::ucell.ntype; T0++)
11391355
{
11401356
Atom* atom0 = &GlobalC::ucell.atoms[T0];
@@ -1274,6 +1490,7 @@ void LCAO_gen_fixedH::build_Nonlocal_beta_new() //update by liuyu 2021-04-07
12741490
12751491
}//end I0
12761492
}//end T0
1493+
*/
12771494

12781495
ModuleBase::timer::tick ("LCAO_gen_fixedH","build_Nonlocal_beta_new");
12791496
return;

0 commit comments

Comments
 (0)