abacusmodeling
diff --git a/‎source/module_elecstate/module_charge/charge.cpp‎
Lines changed: 57 additions & 16 deletions b/‎source/module_elecstate/module_charge/charge.cpp‎
Lines changed: 57 additions & 16 deletions
diff --git a/‎source/module_elecstate/module_charge/charge_broyden.cpp‎
Lines changed: 17 additions & 1 deletion b/‎source/module_elecstate/module_charge/charge_broyden.cpp‎
Lines changed: 17 additions & 1 deletion
diff --git a/‎source/module_elecstate/module_charge/charge_extra.cpp‎
Lines changed: 29 additions & 6 deletions b/‎source/module_elecstate/module_charge/charge_extra.cpp‎
Lines changed: 29 additions & 6 deletions
@@ -29,6 +29,7 @@
 #include <vector>
 #include "module_base/timer.h"
 #include "module_base/tool_threading.h"
+#include "module_base/libm/libm.h"
 #include "module_io/rho_io.h"
 
 Charge::Charge()
@@ -368,13 +369,13 @@ void Charge::atomic_rho(const int spin_number_need, double** rho_in, ModulePW::P
 					}();
 
 					assert(GlobalC::ucell.meshx>0);
-					std::vector<double> rho1d(GlobalC::ucell.meshx);
 					//----------------------------------------------------------
 					// Here we compute the G=0 term
 					//----------------------------------------------------------
 					int gstart = 0;
 					if(rho_basis->gg_uniq[0] < 1e-8)
 					{
+						std::vector<double> rho1d(GlobalC::ucell.meshx);
 						for (int ir = 0;ir < mesh;ir++)
 						{
 			//              rho1d [ir] = atom->rho_at[ir];
@@ -390,6 +391,15 @@ void Charge::atomic_rho(const int spin_number_need, double** rho_in, ModulePW::P
 					// G=0 term only belong to 1 cpu.
 					// Other processors start from '0'
 					//----------------------------------------------------------
+#ifdef _OPENMP
+#pragma omp parallel
+{
+#endif
+					std::vector<double> rho1d(GlobalC::ucell.meshx);
+
+#ifdef _OPENMP
+#pragma omp for
+#endif
 					for (int igg = gstart; igg < rho_basis->ngg ;++igg)
 					{
 						const double gx = sqrt(rho_basis->gg_uniq[igg]) * GlobalC::ucell.tpiba;
@@ -403,27 +413,37 @@ void Charge::atomic_rho(const int spin_number_need, double** rho_in, ModulePW::P
 							else
 							{
 								const double gxx = gx * atom->ncpp.r[ir];
-								rho1d[ir] = rhoatm[ir] * sin(gxx) / gxx;
-								rho1d[ir] = rhoatm[ir] * sin(gxx) / gxx;
+								rho1d[ir] = rhoatm[ir] * ModuleBase::libm::sin(gxx) / gxx;
 							}
 						}
 						ModuleBase::Integral::Simpson_Integral(mesh, rho1d.data(), atom->ncpp.rab, rho_lgl[igg]);
 					}
-					
-					if (GlobalV::test_charge>0) std::cout<<" |G|>0 term done." <<std::endl;
+#ifdef _OPENMP
+#pragma omp single
+#endif
+					{ if (GlobalV::test_charge>0) std::cout<<" |G|>0 term done." <<std::endl; }
 					//----------------------------------------------------------
 					// EXPLAIN : Complete the transfer of rho from real space to
 					// reciprocal space
 					//----------------------------------------------------------
+#ifdef _OPENMP
+#pragma omp for
+#endif
 					for (int igg=0; igg< rho_basis->ngg ; igg++)
 						rho_lgl[igg] /= GlobalC::ucell.omega;
+#ifdef _OPENMP
+}
+#endif
 					return rho_lgl;
 				}();
 				//----------------------------------------------------------
 				// EXPLAIN : compute the 3D atomic charge in reciprocal space
 				//----------------------------------------------------------
 				if(spin_number_need==1)
 				{
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
 					for (int ig=0; ig< rho_basis->npw ;ig++)
 					{
 						rho_g3d(0, ig) += GlobalC::sf.strucFac(it, ig) * rho_lgl[ rho_basis->ig2igg[ig] ];
@@ -434,6 +454,9 @@ void Charge::atomic_rho(const int spin_number_need, double** rho_in, ModulePW::P
 				{
 					if(startmag_type==1)
 					{
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
 						for (int ig = 0; ig < rho_basis->npw ; ig++)
 						{
 							const std::complex<double> swap = GlobalC::sf.strucFac(it, ig)* rho_lgl[rho_basis->ig2igg[ig]];
@@ -446,7 +469,6 @@ void Charge::atomic_rho(const int spin_number_need, double** rho_in, ModulePW::P
 					// mohan add 2011-06-14
 					else if(startmag_type==2)
 					{
-						std::complex<double> swap = ModuleBase::ZERO;
 						std::complex<double> ci_tpi = ModuleBase::NEG_IMAG_UNIT * ModuleBase::TWO_PI;
 						for (int ia = 0; ia < atom->na; ia++)
 						{
@@ -455,15 +477,17 @@ void Charge::atomic_rho(const int spin_number_need, double** rho_in, ModulePW::P
 							const double up = 0.5 * ( 1 + atom->mag[ia] / atom->ncpp.zv );
 							const double dw = 0.5 * ( 1 - atom->mag[ia] / atom->ncpp.zv );
 							//std::cout << " atom " << ia << " up=" << up << " dw=" << dw << std::endl;
-
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
 							for (int ig = 0; ig < rho_basis->npw ; ig++)
 							{
 								const double Gtau =
 									rho_basis->gcar[ig][0] * atom->tau[ia].x + 
 									rho_basis->gcar[ig][1] * atom->tau[ia].y + 
 									rho_basis->gcar[ig][2] * atom->tau[ia].z;
 
-								swap = exp(ci_tpi * Gtau) * rho_lgl[rho_basis->ig2igg[ig]];
+								std::complex<double> swap = ModuleBase::libm::exp(ci_tpi * Gtau) * rho_lgl[rho_basis->ig2igg[ig]];
 
 								rho_g3d(0, ig) += swap * up;
 								rho_g3d(1, ig) += swap * dw;
@@ -476,18 +500,27 @@ void Charge::atomic_rho(const int spin_number_need, double** rho_in, ModulePW::P
 					//noncolinear case
 					if(startmag_type == 1)
 					{
+						double sin_a1, sin_a2, cos_a1, cos_a2;
+						if(GlobalV::DOMAG)
+						{
+							ModuleBase::libm::sincos(atom->angle1[0], &sin_a1, &cos_a1);
+							ModuleBase::libm::sincos(atom->angle2[0], &sin_a2, &cos_a2);
+						}
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
 						for (int ig = 0; ig < rho_basis->npw ; ig++)
 						{
 							const std::complex<double> swap = GlobalC::sf.strucFac(it, ig)* rho_lgl[rho_basis->ig2igg[ig]];
 							rho_g3d(0, ig) += swap ;
 							if(GlobalV::DOMAG)
 							{
 								rho_g3d(1, ig) += swap * (GlobalC::ucell.magnet.start_magnetization[it] / atom->ncpp.zv) 
-								* sin(atom->angle1[0]) * cos(atom->angle2[0]);
+								* sin_a1 * cos_a2;
 								rho_g3d(2, ig) += swap * (GlobalC::ucell.magnet.start_magnetization[it] / atom->ncpp.zv) 
-								* sin(atom->angle1[0]) * sin(atom->angle2[0]);
+								* sin_a1 * sin_a2;
 								rho_g3d(3, ig) += swap * (GlobalC::ucell.magnet.start_magnetization[it] / atom->ncpp.zv) 
-								* cos(atom->angle1[0]);
+								* cos_a1;
 							}
 							else if(GlobalV::DOMAG_Z)
 							{
@@ -498,28 +531,36 @@ void Charge::atomic_rho(const int spin_number_need, double** rho_in, ModulePW::P
 					}
 					else if(startmag_type == 2)
 					{//zdy-warning-not-available
-						std::complex<double> swap = ModuleBase::ZERO;
 						std::complex<double> ci_tpi = ModuleBase::NEG_IMAG_UNIT * ModuleBase::TWO_PI;
 						for(int ia = 0;ia<atom->na;ia++)
 						{
+							double sin_a1, sin_a2, cos_a1, cos_a2;
+							if(GlobalV::DOMAG)
+							{
+								ModuleBase::libm::sincos(atom->angle1[ia], &sin_a1, &cos_a1);
+								ModuleBase::libm::sincos(atom->angle2[ia], &sin_a2, &cos_a2);
+							}
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
 							for (int ig = 0; ig < rho_basis->npw ; ig++)
 							{
 								const double Gtau =
 									rho_basis->gcar[ig][0] * atom->tau[ia].x + 
 									rho_basis->gcar[ig][1] * atom->tau[ia].y + 
 									rho_basis->gcar[ig][2] * atom->tau[ia].z;
 
-								swap = exp(ci_tpi * Gtau) * rho_lgl[rho_basis->ig2igg[ig]];
+								std::complex<double> swap = exp(ci_tpi * Gtau) * rho_lgl[rho_basis->ig2igg[ig]];
 
 								rho_g3d(0, ig) += swap;
 								if(GlobalV::DOMAG)
 								{
 									rho_g3d(1, ig) += swap * (atom->mag[ia] / atom->ncpp.zv) 
-										* sin(atom->angle1[ia]) * cos(atom->angle2[ia]);
+										* sin_a1 * cos_a2;
 									rho_g3d(2, ig) += swap * (atom->mag[ia] / atom->ncpp.zv) 
-										* sin(atom->angle1[ia]) * sin(atom->angle2[ia]);
+										* sin_a1 * sin_a2;
 									rho_g3d(3, ig) += swap * (atom->mag[ia] / atom->ncpp.zv) 
-										* cos(atom->angle1[ia]);
+										* cos_a1;
 								}
 								else if(GlobalV::DOMAG_Z)
 								{
 
@@ -11,6 +11,7 @@ void Charge_Mixing::Simplified_Broyden_mixing(const int &iter,
 	Charge* chr)
 {
 	ModuleBase::TITLE("Charge_Mixing","Simplified_Broyden_mixing");
+	ModuleBase::timer::tick("Charge", "Broyden_mixing");
 	//It is a simplified modified broyden_mixing method.
 	//Ref: D.D. Johnson PRB 38, 12807 (1988)
 	//Here the weight w0 of the error of the inverse Jacobian is set to 0 and the weight wn of
@@ -23,6 +24,9 @@ void Charge_Mixing::Simplified_Broyden_mixing(const int &iter,
 	int ipos = iter-2 - int((iter-2)/mixing_ndim) * mixing_ndim;
 	if(iter > 1)
 	{
+#ifdef _OPENMP
+#pragma omp parallel for collapse(2) schedule(static, 128)
+#endif
 		for(int is=0; is<GlobalV::NSPIN; is++)
 		{
 			for(int ig = 0 ; ig < GlobalC::rhopw->npw; ++ig)
@@ -32,6 +36,9 @@ void Charge_Mixing::Simplified_Broyden_mixing(const int &iter,
 			}
 		}
 	}
+#ifdef _OPENMP
+#pragma omp parallel for collapse(2) schedule(static, 128)
+#endif
 	for(int is=0; is<GlobalV::NSPIN; is++)
 	{
 		for(int ig = 0 ; ig < GlobalC::rhopw->npw; ++ig)
@@ -81,6 +88,9 @@ void Charge_Mixing::Simplified_Broyden_mixing(const int &iter,
 			{
 				gamma0 += beta(i,j) * work[j];
 			}
+#ifdef _OPENMP
+#pragma omp parallel for collapse(2) schedule(static, 512)
+#endif
 			for(int is=0; is<GlobalV::NSPIN; is++)
 			{
 				for(int ig = 0 ; ig < GlobalC::rhopw->npw; ++ig)
@@ -96,6 +106,9 @@ void Charge_Mixing::Simplified_Broyden_mixing(const int &iter,
 	}
 	int inext = iter-1 - int((iter-1)/mixing_ndim) * mixing_ndim;
 
+#ifdef _OPENMP
+#pragma omp parallel for collapse(2) schedule(static, 128)
+#endif
 	for(int is=0; is<GlobalV::NSPIN; is++)
 	{
 		for(int ig = 0 ; ig < GlobalC::rhopw->npw; ++ig)
@@ -108,13 +121,16 @@ void Charge_Mixing::Simplified_Broyden_mixing(const int &iter,
 
 	for(int is=0; is<GlobalV::NSPIN; is++)
 	{
+#ifdef _OPENMP
+#pragma omp parallel for schedule(static, 256)
+#endif
 		for(int ig = 0 ; ig < GlobalC::rhopw->npw; ++ig)
 		{
 			chr->rhog_save[is][ig] += mixing_beta * chr->rhog[is][ig];
 		}
 		GlobalC::rhopw->recip2real( chr->rhog_save[is], chr->rho[is]);
 	}
-
+	ModuleBase::timer::tick("Charge", "Broyden_mixing");
 	return;
 }
 
 
@@ -2,6 +2,7 @@
 #include "module_base/global_function.h"
 #include "module_base/global_variable.h"
 #include "module_hamilt_pw/hamilt_pwdft/global.h"
+#include "module_base/tool_threading.h"
 
 Charge_Extra::Charge_Extra()
 {
@@ -116,6 +117,9 @@ void Charge_Extra::extrapolate_charge(Charge* chr)
         ModuleBase::GlobalFunc::ZEROS(rho_atom[is], GlobalC::rhopw->nrxx);
     }
     chr->atomic_rho(GlobalV::NSPIN, rho_atom, GlobalC::rhopw);
+#ifdef _OPENMP
+#pragma omp parallel for collapse(2) schedule(static, 512)
+#endif
     for(int is=0; is<GlobalV::NSPIN; is++)
     {
         for(int ir=0; ir<GlobalC::rhopw->nrxx; ir++)
@@ -130,6 +134,9 @@ void Charge_Extra::extrapolate_charge(Charge* chr)
 
         if(pot_order > 1)
         {
+#ifdef _OPENMP
+#pragma omp parallel for collapse(2) schedule(static, 512)
+#endif
             for(int is=0; is<GlobalV::NSPIN; is++)
             {
                 for(int ir=0; ir<GlobalC::rhopw->nrxx; ir++)
@@ -143,7 +150,9 @@ void Charge_Extra::extrapolate_charge(Charge* chr)
     else if(rho_extr ==2)
     {
         GlobalV::ofs_running << " first order charge density extrapolation !" << std::endl;
-
+#ifdef _OPENMP
+#pragma omp parallel for collapse(2) schedule(static, 128)
+#endif
         for(int is=0; is<GlobalV::NSPIN; is++)
         {
             for(int ir=0; ir<GlobalC::rhopw->nrxx; ir++)
@@ -166,7 +175,9 @@ void Charge_Extra::extrapolate_charge(Charge* chr)
         {
             delta_rho3[is] = new double[GlobalC::rhopw->nrxx];
         }
-
+#ifdef _OPENMP
+#pragma omp parallel for collapse(2) schedule(static, 64)
+#endif
         for(int is=0; is<GlobalV::NSPIN; is++)
         {
             for(int ir=0; ir<GlobalC::rhopw->nrxx; ir++)
@@ -187,11 +198,19 @@ void Charge_Extra::extrapolate_charge(Charge* chr)
     }
 
     GlobalC::sf.setup_structure_factor(&GlobalC::ucell, GlobalC::rhopw);
-    for(int is=0; is<GlobalV::NSPIN; is++)
+    ModuleBase::OMP_PARALLEL([&](int num_threads, int thread_id)
     {
-        ModuleBase::GlobalFunc::ZEROS(rho_atom[is], GlobalC::rhopw->nrxx);
-    }
+        int irbeg, irlen;
+        ModuleBase::BLOCK_TASK_DIST_1D(num_threads, thread_id, GlobalC::rhopw->nrxx, 512, irbeg, irlen);
+        for(int is=0; is<GlobalV::NSPIN; is++)
+        {
+            ModuleBase::GlobalFunc::ZEROS(rho_atom[is] + irbeg, irlen);
+        }
+    });
     chr->atomic_rho(GlobalV::NSPIN, rho_atom, GlobalC::rhopw);
+#ifdef _OPENMP
+#pragma omp parallel for collapse(2) schedule(static, 512)
+#endif
     for(int is=0; is<GlobalV::NSPIN; is++)
     {
         for(int ir=0; ir<GlobalC::rhopw->nrxx; ir++)
@@ -222,7 +241,11 @@ void Charge_Extra::find_alpha_and_beta(void)
     double b2  = 0.0;
     double c   = 0.0;
     double det = 0.0;
-
+#ifdef _OPENMP
+#pragma omp parallel for schedule(static, 16) \
+    reduction(+:a11) reduction(+:a12) reduction(+:a22) \
+    reduction(+:b1) reduction(+:b2) reduction(+:c)
+#endif
     for(int i=0; i<natom; ++i)
     {
         a11 += (pos_now[i] - pos_old1[i]).norm2();
Original file line number	Diff line number	Diff line change
`@@ -11,6 +11,7 @@ void Charge_Mixing::Simplified_Broyden_mixing(const int &iter,`
`11`	`11`	`Charge* chr)`
`12`	`12`	`{`
`13`	`13`	`ModuleBase::TITLE("Charge_Mixing","Simplified_Broyden_mixing");`
	`14`	`+ ModuleBase::timer::tick("Charge", "Broyden_mixing");`
`14`	`15`	`//It is a simplified modified broyden_mixing method.`
`15`	`16`	`//Ref: D.D. Johnson PRB 38, 12807 (1988)`
`16`	`17`	`//Here the weight w0 of the error of the inverse Jacobian is set to 0 and the weight wn of`
`@@ -23,6 +24,9 @@ void Charge_Mixing::Simplified_Broyden_mixing(const int &iter,`
`23`	`24`	`int ipos = iter-2 - int((iter-2)/mixing_ndim) * mixing_ndim;`
`24`	`25`	`if(iter > 1)`
`25`	`26`	`{`
	`27`	`+#ifdef _OPENMP`
	`28`	`+#pragma omp parallel for collapse(2) schedule(static, 128)`
	`29`	`+#endif`
`26`	`30`	`for(int is=0; is<GlobalV::NSPIN; is++)`
`27`	`31`	`{`
`28`	`32`	`for(int ig = 0 ; ig < GlobalC::rhopw->npw; ++ig)`
`@@ -32,6 +36,9 @@ void Charge_Mixing::Simplified_Broyden_mixing(const int &iter,`
`32`	`36`	`}`
`33`	`37`	`}`
`34`	`38`	`}`
	`39`	`+#ifdef _OPENMP`
	`40`	`+#pragma omp parallel for collapse(2) schedule(static, 128)`
	`41`	`+#endif`
`35`	`42`	`for(int is=0; is<GlobalV::NSPIN; is++)`
`36`	`43`	`{`
`37`	`44`	`for(int ig = 0 ; ig < GlobalC::rhopw->npw; ++ig)`
`@@ -81,6 +88,9 @@ void Charge_Mixing::Simplified_Broyden_mixing(const int &iter,`
`81`	`88`	`{`
`82`	`89`	`gamma0 += beta(i,j) * work[j];`
`83`	`90`	`}`
	`91`	`+#ifdef _OPENMP`
	`92`	`+#pragma omp parallel for collapse(2) schedule(static, 512)`
	`93`	`+#endif`
`84`	`94`	`for(int is=0; is<GlobalV::NSPIN; is++)`
`85`	`95`	`{`
`86`	`96`	`for(int ig = 0 ; ig < GlobalC::rhopw->npw; ++ig)`
`@@ -96,6 +106,9 @@ void Charge_Mixing::Simplified_Broyden_mixing(const int &iter,`
`96`	`106`	`}`
`97`	`107`	`int inext = iter-1 - int((iter-1)/mixing_ndim) * mixing_ndim;`
`98`	`108`
	`109`	`+#ifdef _OPENMP`
	`110`	`+#pragma omp parallel for collapse(2) schedule(static, 128)`
	`111`	`+#endif`
`99`	`112`	`for(int is=0; is<GlobalV::NSPIN; is++)`
`100`	`113`	`{`
`101`	`114`	`for(int ig = 0 ; ig < GlobalC::rhopw->npw; ++ig)`
`@@ -108,13 +121,16 @@ void Charge_Mixing::Simplified_Broyden_mixing(const int &iter,`
`108`	`121`
`109`	`122`	`for(int is=0; is<GlobalV::NSPIN; is++)`
`110`	`123`	`{`
	`124`	`+#ifdef _OPENMP`
	`125`	`+#pragma omp parallel for schedule(static, 256)`
	`126`	`+#endif`
`111`	`127`	`for(int ig = 0 ; ig < GlobalC::rhopw->npw; ++ig)`
`112`	`128`	`{`
`113`	`129`	`chr->rhog_save[is][ig] += mixing_beta * chr->rhog[is][ig];`
`114`	`130`	`}`
`115`	`131`	`GlobalC::rhopw->recip2real( chr->rhog_save[is], chr->rho[is]);`
`116`	`132`	`}`
`117`		`-`
	`133`	`+ ModuleBase::timer::tick("Charge", "Broyden_mixing");`
`118`	`134`	`return;`
`119`	`135`	`}`
`120`	`136`