Skip to content

Commit 50faf36

Browse files
authored
Feature: OpenMP for rho init and Charge_Mixing (#1847)
* Feature: OpenMP for pseudopot_cell_vl::vloc_of_g * Feature: OpenMP for Charge::atomic_rho * Feature: OpenMP for H_Hartree_pw::v_hartree * Feature: Full OpenMP for setup_structure_factor * OpenMP for Charge_Mixing::Pulay_mixing * OpenMP for Charge_Mixing::Simplified_Broyden_mixin * OpenMP for remaining of Charge_Mixing * Fix: fix intel compiler build
1 parent 42beb67 commit 50faf36

File tree

9 files changed

+404
-181
lines changed

9 files changed

+404
-181
lines changed

source/module_elecstate/module_charge/charge.cpp

Lines changed: 57 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include <vector>
3030
#include "module_base/timer.h"
3131
#include "module_base/tool_threading.h"
32+
#include "module_base/libm/libm.h"
3233
#include "module_io/rho_io.h"
3334

3435
Charge::Charge()
@@ -368,13 +369,13 @@ void Charge::atomic_rho(const int spin_number_need, double** rho_in, ModulePW::P
368369
}();
369370

370371
assert(GlobalC::ucell.meshx>0);
371-
std::vector<double> rho1d(GlobalC::ucell.meshx);
372372
//----------------------------------------------------------
373373
// Here we compute the G=0 term
374374
//----------------------------------------------------------
375375
int gstart = 0;
376376
if(rho_basis->gg_uniq[0] < 1e-8)
377377
{
378+
std::vector<double> rho1d(GlobalC::ucell.meshx);
378379
for (int ir = 0;ir < mesh;ir++)
379380
{
380381
// rho1d [ir] = atom->rho_at[ir];
@@ -390,6 +391,15 @@ void Charge::atomic_rho(const int spin_number_need, double** rho_in, ModulePW::P
390391
// G=0 term only belong to 1 cpu.
391392
// Other processors start from '0'
392393
//----------------------------------------------------------
394+
#ifdef _OPENMP
395+
#pragma omp parallel
396+
{
397+
#endif
398+
std::vector<double> rho1d(GlobalC::ucell.meshx);
399+
400+
#ifdef _OPENMP
401+
#pragma omp for
402+
#endif
393403
for (int igg = gstart; igg < rho_basis->ngg ;++igg)
394404
{
395405
const double gx = sqrt(rho_basis->gg_uniq[igg]) * GlobalC::ucell.tpiba;
@@ -403,27 +413,37 @@ void Charge::atomic_rho(const int spin_number_need, double** rho_in, ModulePW::P
403413
else
404414
{
405415
const double gxx = gx * atom->ncpp.r[ir];
406-
rho1d[ir] = rhoatm[ir] * sin(gxx) / gxx;
407-
rho1d[ir] = rhoatm[ir] * sin(gxx) / gxx;
416+
rho1d[ir] = rhoatm[ir] * ModuleBase::libm::sin(gxx) / gxx;
408417
}
409418
}
410419
ModuleBase::Integral::Simpson_Integral(mesh, rho1d.data(), atom->ncpp.rab, rho_lgl[igg]);
411420
}
412-
413-
if (GlobalV::test_charge>0) std::cout<<" |G|>0 term done." <<std::endl;
421+
#ifdef _OPENMP
422+
#pragma omp single
423+
#endif
424+
{ if (GlobalV::test_charge>0) std::cout<<" |G|>0 term done." <<std::endl; }
414425
//----------------------------------------------------------
415426
// EXPLAIN : Complete the transfer of rho from real space to
416427
// reciprocal space
417428
//----------------------------------------------------------
429+
#ifdef _OPENMP
430+
#pragma omp for
431+
#endif
418432
for (int igg=0; igg< rho_basis->ngg ; igg++)
419433
rho_lgl[igg] /= GlobalC::ucell.omega;
434+
#ifdef _OPENMP
435+
}
436+
#endif
420437
return rho_lgl;
421438
}();
422439
//----------------------------------------------------------
423440
// EXPLAIN : compute the 3D atomic charge in reciprocal space
424441
//----------------------------------------------------------
425442
if(spin_number_need==1)
426443
{
444+
#ifdef _OPENMP
445+
#pragma omp parallel for
446+
#endif
427447
for (int ig=0; ig< rho_basis->npw ;ig++)
428448
{
429449
rho_g3d(0, ig) += GlobalC::sf.strucFac(it, ig) * rho_lgl[ rho_basis->ig2igg[ig] ];
@@ -434,6 +454,9 @@ void Charge::atomic_rho(const int spin_number_need, double** rho_in, ModulePW::P
434454
{
435455
if(startmag_type==1)
436456
{
457+
#ifdef _OPENMP
458+
#pragma omp parallel for
459+
#endif
437460
for (int ig = 0; ig < rho_basis->npw ; ig++)
438461
{
439462
const std::complex<double> swap = GlobalC::sf.strucFac(it, ig)* rho_lgl[rho_basis->ig2igg[ig]];
@@ -446,7 +469,6 @@ void Charge::atomic_rho(const int spin_number_need, double** rho_in, ModulePW::P
446469
// mohan add 2011-06-14
447470
else if(startmag_type==2)
448471
{
449-
std::complex<double> swap = ModuleBase::ZERO;
450472
std::complex<double> ci_tpi = ModuleBase::NEG_IMAG_UNIT * ModuleBase::TWO_PI;
451473
for (int ia = 0; ia < atom->na; ia++)
452474
{
@@ -455,15 +477,17 @@ void Charge::atomic_rho(const int spin_number_need, double** rho_in, ModulePW::P
455477
const double up = 0.5 * ( 1 + atom->mag[ia] / atom->ncpp.zv );
456478
const double dw = 0.5 * ( 1 - atom->mag[ia] / atom->ncpp.zv );
457479
//std::cout << " atom " << ia << " up=" << up << " dw=" << dw << std::endl;
458-
480+
#ifdef _OPENMP
481+
#pragma omp parallel for
482+
#endif
459483
for (int ig = 0; ig < rho_basis->npw ; ig++)
460484
{
461485
const double Gtau =
462486
rho_basis->gcar[ig][0] * atom->tau[ia].x +
463487
rho_basis->gcar[ig][1] * atom->tau[ia].y +
464488
rho_basis->gcar[ig][2] * atom->tau[ia].z;
465489

466-
swap = exp(ci_tpi * Gtau) * rho_lgl[rho_basis->ig2igg[ig]];
490+
std::complex<double> swap = ModuleBase::libm::exp(ci_tpi * Gtau) * rho_lgl[rho_basis->ig2igg[ig]];
467491

468492
rho_g3d(0, ig) += swap * up;
469493
rho_g3d(1, ig) += swap * dw;
@@ -476,18 +500,27 @@ void Charge::atomic_rho(const int spin_number_need, double** rho_in, ModulePW::P
476500
//noncolinear case
477501
if(startmag_type == 1)
478502
{
503+
double sin_a1, sin_a2, cos_a1, cos_a2;
504+
if(GlobalV::DOMAG)
505+
{
506+
ModuleBase::libm::sincos(atom->angle1[0], &sin_a1, &cos_a1);
507+
ModuleBase::libm::sincos(atom->angle2[0], &sin_a2, &cos_a2);
508+
}
509+
#ifdef _OPENMP
510+
#pragma omp parallel for
511+
#endif
479512
for (int ig = 0; ig < rho_basis->npw ; ig++)
480513
{
481514
const std::complex<double> swap = GlobalC::sf.strucFac(it, ig)* rho_lgl[rho_basis->ig2igg[ig]];
482515
rho_g3d(0, ig) += swap ;
483516
if(GlobalV::DOMAG)
484517
{
485518
rho_g3d(1, ig) += swap * (GlobalC::ucell.magnet.start_magnetization[it] / atom->ncpp.zv)
486-
* sin(atom->angle1[0]) * cos(atom->angle2[0]);
519+
* sin_a1 * cos_a2;
487520
rho_g3d(2, ig) += swap * (GlobalC::ucell.magnet.start_magnetization[it] / atom->ncpp.zv)
488-
* sin(atom->angle1[0]) * sin(atom->angle2[0]);
521+
* sin_a1 * sin_a2;
489522
rho_g3d(3, ig) += swap * (GlobalC::ucell.magnet.start_magnetization[it] / atom->ncpp.zv)
490-
* cos(atom->angle1[0]);
523+
* cos_a1;
491524
}
492525
else if(GlobalV::DOMAG_Z)
493526
{
@@ -498,28 +531,36 @@ void Charge::atomic_rho(const int spin_number_need, double** rho_in, ModulePW::P
498531
}
499532
else if(startmag_type == 2)
500533
{//zdy-warning-not-available
501-
std::complex<double> swap = ModuleBase::ZERO;
502534
std::complex<double> ci_tpi = ModuleBase::NEG_IMAG_UNIT * ModuleBase::TWO_PI;
503535
for(int ia = 0;ia<atom->na;ia++)
504536
{
537+
double sin_a1, sin_a2, cos_a1, cos_a2;
538+
if(GlobalV::DOMAG)
539+
{
540+
ModuleBase::libm::sincos(atom->angle1[ia], &sin_a1, &cos_a1);
541+
ModuleBase::libm::sincos(atom->angle2[ia], &sin_a2, &cos_a2);
542+
}
543+
#ifdef _OPENMP
544+
#pragma omp parallel for
545+
#endif
505546
for (int ig = 0; ig < rho_basis->npw ; ig++)
506547
{
507548
const double Gtau =
508549
rho_basis->gcar[ig][0] * atom->tau[ia].x +
509550
rho_basis->gcar[ig][1] * atom->tau[ia].y +
510551
rho_basis->gcar[ig][2] * atom->tau[ia].z;
511552

512-
swap = exp(ci_tpi * Gtau) * rho_lgl[rho_basis->ig2igg[ig]];
553+
std::complex<double> swap = exp(ci_tpi * Gtau) * rho_lgl[rho_basis->ig2igg[ig]];
513554

514555
rho_g3d(0, ig) += swap;
515556
if(GlobalV::DOMAG)
516557
{
517558
rho_g3d(1, ig) += swap * (atom->mag[ia] / atom->ncpp.zv)
518-
* sin(atom->angle1[ia]) * cos(atom->angle2[ia]);
559+
* sin_a1 * cos_a2;
519560
rho_g3d(2, ig) += swap * (atom->mag[ia] / atom->ncpp.zv)
520-
* sin(atom->angle1[ia]) * sin(atom->angle2[ia]);
561+
* sin_a1 * sin_a2;
521562
rho_g3d(3, ig) += swap * (atom->mag[ia] / atom->ncpp.zv)
522-
* cos(atom->angle1[ia]);
563+
* cos_a1;
523564
}
524565
else if(GlobalV::DOMAG_Z)
525566
{

source/module_elecstate/module_charge/charge_broyden.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ void Charge_Mixing::Simplified_Broyden_mixing(const int &iter,
1111
Charge* chr)
1212
{
1313
ModuleBase::TITLE("Charge_Mixing","Simplified_Broyden_mixing");
14+
ModuleBase::timer::tick("Charge", "Broyden_mixing");
1415
//It is a simplified modified broyden_mixing method.
1516
//Ref: D.D. Johnson PRB 38, 12807 (1988)
1617
//Here the weight w0 of the error of the inverse Jacobian is set to 0 and the weight wn of
@@ -23,6 +24,9 @@ void Charge_Mixing::Simplified_Broyden_mixing(const int &iter,
2324
int ipos = iter-2 - int((iter-2)/mixing_ndim) * mixing_ndim;
2425
if(iter > 1)
2526
{
27+
#ifdef _OPENMP
28+
#pragma omp parallel for collapse(2) schedule(static, 128)
29+
#endif
2630
for(int is=0; is<GlobalV::NSPIN; is++)
2731
{
2832
for(int ig = 0 ; ig < GlobalC::rhopw->npw; ++ig)
@@ -32,6 +36,9 @@ void Charge_Mixing::Simplified_Broyden_mixing(const int &iter,
3236
}
3337
}
3438
}
39+
#ifdef _OPENMP
40+
#pragma omp parallel for collapse(2) schedule(static, 128)
41+
#endif
3542
for(int is=0; is<GlobalV::NSPIN; is++)
3643
{
3744
for(int ig = 0 ; ig < GlobalC::rhopw->npw; ++ig)
@@ -81,6 +88,9 @@ void Charge_Mixing::Simplified_Broyden_mixing(const int &iter,
8188
{
8289
gamma0 += beta(i,j) * work[j];
8390
}
91+
#ifdef _OPENMP
92+
#pragma omp parallel for collapse(2) schedule(static, 512)
93+
#endif
8494
for(int is=0; is<GlobalV::NSPIN; is++)
8595
{
8696
for(int ig = 0 ; ig < GlobalC::rhopw->npw; ++ig)
@@ -96,6 +106,9 @@ void Charge_Mixing::Simplified_Broyden_mixing(const int &iter,
96106
}
97107
int inext = iter-1 - int((iter-1)/mixing_ndim) * mixing_ndim;
98108

109+
#ifdef _OPENMP
110+
#pragma omp parallel for collapse(2) schedule(static, 128)
111+
#endif
99112
for(int is=0; is<GlobalV::NSPIN; is++)
100113
{
101114
for(int ig = 0 ; ig < GlobalC::rhopw->npw; ++ig)
@@ -108,13 +121,16 @@ void Charge_Mixing::Simplified_Broyden_mixing(const int &iter,
108121

109122
for(int is=0; is<GlobalV::NSPIN; is++)
110123
{
124+
#ifdef _OPENMP
125+
#pragma omp parallel for schedule(static, 256)
126+
#endif
111127
for(int ig = 0 ; ig < GlobalC::rhopw->npw; ++ig)
112128
{
113129
chr->rhog_save[is][ig] += mixing_beta * chr->rhog[is][ig];
114130
}
115131
GlobalC::rhopw->recip2real( chr->rhog_save[is], chr->rho[is]);
116132
}
117-
133+
ModuleBase::timer::tick("Charge", "Broyden_mixing");
118134
return;
119135
}
120136

source/module_elecstate/module_charge/charge_extra.cpp

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include "module_base/global_function.h"
33
#include "module_base/global_variable.h"
44
#include "module_hamilt_pw/hamilt_pwdft/global.h"
5+
#include "module_base/tool_threading.h"
56

67
Charge_Extra::Charge_Extra()
78
{
@@ -116,6 +117,9 @@ void Charge_Extra::extrapolate_charge(Charge* chr)
116117
ModuleBase::GlobalFunc::ZEROS(rho_atom[is], GlobalC::rhopw->nrxx);
117118
}
118119
chr->atomic_rho(GlobalV::NSPIN, rho_atom, GlobalC::rhopw);
120+
#ifdef _OPENMP
121+
#pragma omp parallel for collapse(2) schedule(static, 512)
122+
#endif
119123
for(int is=0; is<GlobalV::NSPIN; is++)
120124
{
121125
for(int ir=0; ir<GlobalC::rhopw->nrxx; ir++)
@@ -130,6 +134,9 @@ void Charge_Extra::extrapolate_charge(Charge* chr)
130134

131135
if(pot_order > 1)
132136
{
137+
#ifdef _OPENMP
138+
#pragma omp parallel for collapse(2) schedule(static, 512)
139+
#endif
133140
for(int is=0; is<GlobalV::NSPIN; is++)
134141
{
135142
for(int ir=0; ir<GlobalC::rhopw->nrxx; ir++)
@@ -143,7 +150,9 @@ void Charge_Extra::extrapolate_charge(Charge* chr)
143150
else if(rho_extr ==2)
144151
{
145152
GlobalV::ofs_running << " first order charge density extrapolation !" << std::endl;
146-
153+
#ifdef _OPENMP
154+
#pragma omp parallel for collapse(2) schedule(static, 128)
155+
#endif
147156
for(int is=0; is<GlobalV::NSPIN; is++)
148157
{
149158
for(int ir=0; ir<GlobalC::rhopw->nrxx; ir++)
@@ -166,7 +175,9 @@ void Charge_Extra::extrapolate_charge(Charge* chr)
166175
{
167176
delta_rho3[is] = new double[GlobalC::rhopw->nrxx];
168177
}
169-
178+
#ifdef _OPENMP
179+
#pragma omp parallel for collapse(2) schedule(static, 64)
180+
#endif
170181
for(int is=0; is<GlobalV::NSPIN; is++)
171182
{
172183
for(int ir=0; ir<GlobalC::rhopw->nrxx; ir++)
@@ -187,11 +198,19 @@ void Charge_Extra::extrapolate_charge(Charge* chr)
187198
}
188199

189200
GlobalC::sf.setup_structure_factor(&GlobalC::ucell, GlobalC::rhopw);
190-
for(int is=0; is<GlobalV::NSPIN; is++)
201+
ModuleBase::OMP_PARALLEL([&](int num_threads, int thread_id)
191202
{
192-
ModuleBase::GlobalFunc::ZEROS(rho_atom[is], GlobalC::rhopw->nrxx);
193-
}
203+
int irbeg, irlen;
204+
ModuleBase::BLOCK_TASK_DIST_1D(num_threads, thread_id, GlobalC::rhopw->nrxx, 512, irbeg, irlen);
205+
for(int is=0; is<GlobalV::NSPIN; is++)
206+
{
207+
ModuleBase::GlobalFunc::ZEROS(rho_atom[is] + irbeg, irlen);
208+
}
209+
});
194210
chr->atomic_rho(GlobalV::NSPIN, rho_atom, GlobalC::rhopw);
211+
#ifdef _OPENMP
212+
#pragma omp parallel for collapse(2) schedule(static, 512)
213+
#endif
195214
for(int is=0; is<GlobalV::NSPIN; is++)
196215
{
197216
for(int ir=0; ir<GlobalC::rhopw->nrxx; ir++)
@@ -222,7 +241,11 @@ void Charge_Extra::find_alpha_and_beta(void)
222241
double b2 = 0.0;
223242
double c = 0.0;
224243
double det = 0.0;
225-
244+
#ifdef _OPENMP
245+
#pragma omp parallel for schedule(static, 16) \
246+
reduction(+:a11) reduction(+:a12) reduction(+:a22) \
247+
reduction(+:b1) reduction(+:b2) reduction(+:c)
248+
#endif
226249
for(int i=0; i<natom; ++i)
227250
{
228251
a11 += (pos_now[i] - pos_old1[i]).norm2();

0 commit comments

Comments
 (0)