@@ -1132,133 +1132,84 @@ void Forces<FPTYPE, Device>::cal_force_scc(ModuleBase::matrix& forcescc, ModuleP
11321132 }
11331133 }
11341134
1135+ // work space
1136+ double * rhocgnt = new double [rho_basis->ngg ];
1137+ ModuleBase::GlobalFunc::ZEROS (rhocgnt, rho_basis->ngg );
1138+
11351139 rho_basis->real2recip (psic, psic);
11361140
11371141 int igg0 = 0 ;
11381142 const int ig0 = rho_basis->ig_gge0 ;
1139- const int ig_gap = (ig0 >= 0 && ig0 < rho_basis->npw ) ? ig0 : -1 ;
11401143 if (rho_basis->gg_uniq [0 ] < 1.0e-8 )
11411144 igg0 = 1 ;
11421145
1143- double * rhocgnt = new double [rho_basis->ngg ];
1144-
1145- #ifdef _OPENMP
1146- #pragma omp parallel
1147- {
1148- int num_threads = omp_get_num_threads ();
1149- int thread_id = omp_get_thread_num ();
1150- #else
1151- int num_threads = 1 ;
1152- int thread_id = 0 ;
1153- #endif
1154-
1155- // thread local work space
1156- double *aux = new double [ndm];
1157- ModuleBase::GlobalFunc::ZEROS (aux, ndm);
1158-
1159- int ig_beg, ig_length, ig_end;
1160- ModuleBase::TASK_DIST_1D (num_threads, thread_id, rho_basis->ngg , ig_beg, ig_length);
1161- ModuleBase::GlobalFunc::ZEROS (rhocgnt + ig_beg, ig_length);
1162- ig_end = ig_beg + ig_length;
1163-
11641146 double fact = 2.0 ;
1165- int last_it = -1 ;
1166-
1167- for (int iat = 0 ; iat < GlobalC::ucell.nat ; ++iat)
1147+ for (int nt = 0 ; nt < GlobalC::ucell.ntype ; nt++)
11681148 {
1169- int it = GlobalC::ucell.iat2it [iat];
1170- int ia = GlobalC::ucell.iat2ia [iat];
1171-
1172- // initialize rhocgnt when `it` is changed
1173- if (it != last_it)
1149+ // Here we compute the G.ne.0 term
1150+ const int mesh = GlobalC::ucell.atoms [nt].ncpp .msh ;
1151+ #ifdef _OPENMP
1152+ #pragma omp parallel for
1153+ #endif
1154+ for (int ig = igg0; ig < rho_basis->ngg ; ++ig)
11741155 {
1175- // Here we compute the G.ne.0 term
1176- const int mesh = GlobalC::ucell.atoms [it].ncpp .msh ;
1177-
1178- for (int ig = std::max (ig_beg, igg0); ig < ig_end; ++ig)
1156+ double * aux = new double [mesh];
1157+ const double gx = sqrt (rho_basis->gg_uniq [ig]) * GlobalC::ucell.tpiba ;
1158+ for (int ir = 0 ; ir < mesh; ir++)
11791159 {
1180- const double gx = sqrt (rho_basis->gg_uniq [ig]) * GlobalC::ucell.tpiba ;
1181- for (int ir = 0 ; ir < mesh; ir++)
1160+ if (GlobalC::ucell.atoms [nt].ncpp .r [ir] < 1.0e-8 )
11821161 {
1183- if (GlobalC::ucell.atoms [it].ncpp .r [ir] < 1.0e-8 )
1184- {
1185- aux[ir] = GlobalC::ucell.atoms [it].ncpp .rho_at [ir];
1186- }
1187- else
1188- {
1189- const double gxx = gx * GlobalC::ucell.atoms [it].ncpp .r [ir];
1190- aux[ir] = GlobalC::ucell.atoms [it].ncpp .rho_at [ir] * ModuleBase::libm::sin (gxx) / gxx;
1191- }
1162+ aux[ir] = GlobalC::ucell.atoms [nt].ncpp .rho_at [ir];
1163+ }
1164+ else
1165+ {
1166+ const double gxx = gx * GlobalC::ucell.atoms [nt].ncpp .r [ir];
1167+ aux[ir] = GlobalC::ucell.atoms [nt].ncpp .rho_at [ir] * ModuleBase::libm::sin (gxx) / gxx;
11921168 }
1193- ModuleBase::Integral::Simpson_Integral (mesh, aux, GlobalC::ucell.atoms [it].ncpp .rab , rhocgnt[ig]);
11941169 }
1195-
1196- // record it
1197- last_it = it;
1198-
1199- // wait for rhocgnt update
1200- #ifdef _OPENMP
1201- #pragma omp barrier
1202- #endif
1170+ ModuleBase::Integral::Simpson_Integral (mesh, aux, GlobalC::ucell.atoms [nt].ncpp .rab , rhocgnt[ig]);
1171+ delete[] aux; // mohan fix bug 2012-03-22
12031172 }
12041173
1205- const ModuleBase::Vector3<double > pos = GlobalC::ucell.atoms [it].tau [ia];
1206-
1207- double force[3 ] = {0 , 0 , 0 };
1208-
1209- const auto ig_loop = [&](int start, int stop)
1174+ int iat = 0 ;
1175+ for (int it = 0 ; it < GlobalC::ucell.ntype ; it++)
12101176 {
1211- for (int ig = start; ig < stop; ++ig )
1177+ for (int ia = 0 ; ia < GlobalC::ucell. atoms [it]. na ; ia++ )
12121178 {
1213- const ModuleBase::Vector3<double > gv = rho_basis->gcar [ig];
1214- const double rhocgntigg = rhocgnt[GlobalC::rhopw->ig2igg [ig]];
1215- const double arg = ModuleBase::TWO_PI * (gv * pos);
1216- double sinp, cosp;
1217- ModuleBase::libm::sincos (arg, &sinp, &cosp);
1218- const std::complex <double > cpm = std::complex <double >(sinp, cosp) * conj (psic[ig]);
1219-
1220- force[0 ] += fact * rhocgntigg * GlobalC::ucell.tpiba * gv.x * cpm.real ();
1221- force[1 ] += fact * rhocgntigg * GlobalC::ucell.tpiba * gv.y * cpm.real ();
1222- force[2 ] += fact * rhocgntigg * GlobalC::ucell.tpiba * gv.z * cpm.real ();
1223- }
1224- };
1225-
1226- ig_loop (ig_beg, std::min (ig_gap, ig_end));
1227- ig_loop (ig_gap + 1 , ig_end);
1228-
1179+ if (nt == it)
1180+ {
1181+ const ModuleBase::Vector3<double > pos = GlobalC::ucell.atoms [it].tau [ia];
1182+ double &force0 = forcescc (iat, 0 ), &force1 = forcescc (iat, 1 ), &force2 = forcescc (iat, 2 );
12291183#ifdef _OPENMP
1230- if (num_threads > 1 )
1231- {
1232- #pragma omp atomic
1233- forcescc (iat, 0 ) += force[0 ];
1234- #pragma omp atomic
1235- forcescc (iat, 1 ) += force[1 ];
1236- #pragma omp atomic
1237- forcescc (iat, 2 ) += force[2 ];
1238- }
1239- else
1184+ #pragma omp parallel for reduction(+:force0) reduction(+:force1) reduction(+:force2)
12401185#endif
1241- {
1242- forcescc (iat, 0 ) += force[0 ];
1243- forcescc (iat, 1 ) += force[1 ];
1244- forcescc (iat, 2 ) += force[2 ];
1186+ for (int ig = 0 ; ig < rho_basis->npw ; ++ig)
1187+ {
1188+ if (ig == ig0)
1189+ continue ;
1190+ const ModuleBase::Vector3<double > gv = rho_basis->gcar [ig];
1191+ const double rhocgntigg = rhocgnt[GlobalC::rhopw->ig2igg [ig]];
1192+ const double arg = ModuleBase::TWO_PI * (gv * pos);
1193+ double sinp, cosp;
1194+ ModuleBase::libm::sincos (arg, &sinp, &cosp);
1195+ const std::complex <double > cpm = std::complex <double >(sinp, cosp) * conj (psic[ig]);
1196+
1197+ force0 += fact * rhocgntigg * GlobalC::ucell.tpiba * gv.x * cpm.real ();
1198+ force1 += fact * rhocgntigg * GlobalC::ucell.tpiba * gv.y * cpm.real ();
1199+ force2 += fact * rhocgntigg * GlobalC::ucell.tpiba * gv.z * cpm.real ();
1200+ }
1201+ // std::cout << " forcescc = " << forcescc(iat,0) << " " << forcescc(iat,1) << " " <<
1202+ // forcescc(iat,2) << std::endl;
1203+ }
1204+ iat++;
1205+ }
12451206 }
1246-
1247- // std::cout << " forcescc = " << forcescc(iat,0) << " " << forcescc(iat,1) << " " <<
1248- // forcescc(iat,2) << std::endl;
12491207 }
12501208
1251- delete[] aux;
1252-
1253- #ifdef _OPENMP
1254- }
1255- #endif
1256-
1257- delete[] rhocgnt;
1258-
12591209 Parallel_Reduce::reduce_double_pool (forcescc.c , forcescc.nr * forcescc.nc );
12601210
12611211 delete[] psic; // mohan fix bug 2012-03-22
1212+ delete[] rhocgnt; // mohan fix bug 2012-03-22
12621213
12631214 ModuleBase::timer::tick (" Forces" , " cal_force_scc" );
12641215 return ;
0 commit comments