Skip to content

Commit 2a098b2

Browse files
authored
Add some timers related to FFT (#6537)
* update small places in charge_mixing_residual.cpp * update charge_mixing_preconditioner * add timers and remove some PARAM.inp.nspin * fix problems * add timers * small fix * fix a potential memory leak * fix bug
1 parent 860ce0f commit 2a098b2

File tree

8 files changed

+207
-120
lines changed

8 files changed

+207
-120
lines changed

source/source_basis/module_pw/test_gpu/pw_basis_C2C.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,6 @@ class PW_BASIS_K_GPU_TEST : public ::testing::Test
4444
int distribution_type = 1;
4545
bool xprime = false;
4646
const int nks = 1;
47-
ModuleBase::Vector3<double>* kvec_d;
48-
kvec_d = new ModuleBase::Vector3<double>[nks];
49-
kvec_d[0].set(0, 0, 0);
5047
// init
5148
const int mypool = 0;
5249
const int key = 1;

source/source_cell/module_symmetry/symm_rho.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ void Symmetry::rho_symmetry( double *rho,
99
{
1010
ModuleBase::timer::tick("Symmetry","rho_symmetry");
1111

12+
assert(nr1>0);
13+
assert(nr2>0);
14+
assert(nr3>0);
15+
1216
// allocate flag for each FFT grid.
1317
bool* symflag = new bool[nr1 * nr2 * nr3];
1418
for (int i=0; i<nr1*nr2*nr3; i++)

source/source_esolver/esolver_of_tool.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ double ESolver_OF::cal_mu(double* pphi, double* pdEdphi, double nelec)
220220
* @brief Rotate and renormalize the direction |d>,
221221
* make it orthogonal to phi (<d|phi> = 0), and <d|d> = nelec
222222
*/
223-
void ESolver_OF::adjust_direction()
223+
void ESolver_OF::adjust_direction(void)
224224
{
225225
// filter the high frequency term in direction if of_full_pw = false
226226
if (!PARAM.inp.of_full_pw)

source/source_estate/module_charge/charge_mixing_preconditioner.cpp

Lines changed: 62 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,32 @@
66

77
void Charge_Mixing::Kerker_screen_recip(std::complex<double>* drhog)
88
{
9-
if (this->mixing_gg0 <= 0.0 || this->mixing_beta <= 0.1) {
10-
return;
11-
}
9+
ModuleBase::TITLE("Charge_Mixing", "Kerker_screen_recip");
10+
11+
if (this->mixing_gg0 <= 0.0 || this->mixing_beta <= 0.1)
12+
{
13+
return;
14+
}
15+
16+
ModuleBase::timer::tick("Charge_Mixing", "Kerker_screen_recip");
17+
18+
const int nspin = PARAM.inp.nspin;
19+
1220
double fac = 0.0;
1321
double gg0 = 0.0;
1422
double amin = 0.0;
1523

1624
/// consider a resize for mixing_angle
17-
int resize_tmp = 1;
18-
if (PARAM.inp.nspin == 4 && this->mixing_angle > 0) { resize_tmp = 2;
19-
}
25+
int resize_tmp = 1;
26+
if (nspin == 4 && this->mixing_angle > 0)
27+
{
28+
resize_tmp = 2;
29+
}
2030

2131
/// implement Kerker for density and magnetization separately
22-
for (int is = 0; is < PARAM.inp.nspin / resize_tmp; ++is)
32+
for (int is = 0; is < nspin / resize_tmp; ++is)
2333
{
34+
const int is_idx = is * this->rhopw->npw;
2435
/// new mixing method only support nspin=2 not nspin=4
2536
if (is >= 1)
2637
{
@@ -29,10 +40,10 @@ void Charge_Mixing::Kerker_screen_recip(std::complex<double>* drhog)
2940
#ifdef __DEBUG
3041
assert(is == 1); // make sure break works
3142
#endif
32-
double is_mag = PARAM.inp.nspin - 1;
43+
double is_mag = nspin - 1;
3344
//for (int ig = 0; ig < this->rhopw->npw * is_mag; ig++)
3445
//{
35-
// drhog[is * this->rhopw->npw + ig] *= 1;
46+
// drhog[is_idx + ig] *= 1;
3647
//}
3748
break;
3849
}
@@ -46,32 +57,49 @@ void Charge_Mixing::Kerker_screen_recip(std::complex<double>* drhog)
4657
}
4758

4859
gg0 = std::pow(fac * 0.529177 / *this->tpiba, 2);
60+
61+
const double gg0_amin = this->mixing_gg0_min / amin;
62+
4963
#ifdef _OPENMP
5064
#pragma omp parallel for schedule(static, 512)
5165
#endif
5266
for (int ig = 0; ig < this->rhopw->npw; ++ig)
5367
{
5468
double gg = this->rhopw->gg[ig];
55-
double filter_g = std::max(gg / (gg + gg0), this->mixing_gg0_min / amin);
56-
drhog[is * this->rhopw->npw + ig] *= filter_g;
69+
double filter_g = std::max(gg / (gg + gg0), gg0_amin);
70+
drhog[is_idx + ig] *= filter_g;
5771
}
5872
}
73+
74+
ModuleBase::timer::tick("Charge_Mixing", "Kerker_screen_recip");
5975
return;
6076
}
6177

6278
void Charge_Mixing::Kerker_screen_real(double* drhor)
6379
{
64-
if (this->mixing_gg0 <= 0.0001 || this->mixing_beta <= 0.1) {
65-
return;
66-
}
67-
/// consider a resize for mixing_angle
80+
ModuleBase::TITLE("Charge_Mixing", "Kerker_screen_real");
81+
82+
if (this->mixing_gg0 <= 0.0001 || this->mixing_beta <= 0.1)
83+
{
84+
return;
85+
}
86+
87+
ModuleBase::timer::tick("Charge_Mixing", "Kerker_screen_real");
88+
89+
const int nspin = PARAM.inp.nspin;
90+
assert(nspin==1 || nspin==2 || nspin==4);
91+
92+
/// consider a resize for mixing_angle
6893
int resize_tmp = 1;
69-
if (PARAM.inp.nspin == 4 && this->mixing_angle > 0) { resize_tmp = 2;
70-
}
94+
if (nspin == 4 && this->mixing_angle > 0)
95+
{
96+
resize_tmp = 2;
97+
}
7198

72-
std::vector<std::complex<double>> drhog(this->rhopw->npw * PARAM.inp.nspin / resize_tmp);
73-
std::vector<double> drhor_filter(this->rhopw->nrxx * PARAM.inp.nspin / resize_tmp);
74-
for (int is = 0; is < PARAM.inp.nspin / resize_tmp; ++is)
99+
std::vector<std::complex<double>> drhog(this->rhopw->npw * nspin / resize_tmp);
100+
std::vector<double> drhor_filter(this->rhopw->nrxx * nspin / resize_tmp);
101+
102+
for (int is = 0; is < nspin / resize_tmp; ++is)
75103
{
76104
// Note after this process some G which is higher than Gmax will be filtered.
77105
// Thus we cannot use Kerker_screen_recip(drhog.data()) directly after it.
@@ -82,7 +110,7 @@ void Charge_Mixing::Kerker_screen_real(double* drhor)
82110
double gg0 = 0.0;
83111
double amin = 0.0;
84112

85-
for (int is = 0; is < PARAM.inp.nspin / resize_tmp; is++)
113+
for (int is = 0; is < nspin / resize_tmp; is++)
86114
{
87115

88116
if (is >= 1)
@@ -92,8 +120,8 @@ void Charge_Mixing::Kerker_screen_real(double* drhor)
92120
#ifdef __DEBUG
93121
assert(is == 1); /// make sure break works
94122
#endif
95-
double is_mag = PARAM.inp.nspin - 1;
96-
if (PARAM.inp.nspin == 4 && this->mixing_angle > 0) { is_mag = 1;
123+
double is_mag = nspin - 1;
124+
if (nspin == 4 && this->mixing_angle > 0) { is_mag = 1;
97125
}
98126
for (int ig = 0; ig < this->rhopw->npw * is_mag; ig++)
99127
{
@@ -111,6 +139,9 @@ void Charge_Mixing::Kerker_screen_real(double* drhor)
111139
}
112140

113141
gg0 = std::pow(fac * 0.529177 / *this->tpiba, 2);
142+
143+
const int is_idx = is * this->rhopw->npw;
144+
const double gg0_amin = this->mixing_gg0_min / amin;
114145
#ifdef _OPENMP
115146
#pragma omp parallel for schedule(static, 512)
116147
#endif
@@ -120,24 +151,27 @@ void Charge_Mixing::Kerker_screen_real(double* drhor)
120151
// I have not decided how to handle gg=0 part, will be changed in future
121152
//if (gg == 0)
122153
//{
123-
// drhog[is * this->rhopw->npw + ig] *= 0;
154+
// drhog[is_idx + ig] *= 0;
124155
// continue;
125156
//}
126-
double filter_g = std::max(gg / (gg + gg0), this->mixing_gg0_min / amin);
127-
drhog[is * this->rhopw->npw + ig] *= (1 - filter_g);
157+
double filter_g = std::max(gg / (gg + gg0), gg0_amin);
158+
drhog[is_idx + ig] *= (1 - filter_g);
128159
}
129160
}
130161
/// inverse FT
131-
for (int is = 0; is < PARAM.inp.nspin / resize_tmp; ++is)
162+
for (int is = 0; is < nspin / resize_tmp; ++is)
132163
{
133164
this->rhopw->recip2real(drhog.data() + is * this->rhopw->npw, drhor_filter.data() + is * this->rhopw->nrxx);
134165
}
135166

136167
#ifdef _OPENMP
137168
#pragma omp parallel for schedule(static, 512)
138169
#endif
139-
for (int ir = 0; ir < this->rhopw->nrxx * PARAM.inp.nspin / resize_tmp; ir++)
170+
for (int ir = 0; ir < this->rhopw->nrxx * nspin / resize_tmp; ir++)
140171
{
141172
drhor[ir] -= drhor_filter[ir];
142173
}
174+
175+
ModuleBase::timer::tick("Charge_Mixing", "Kerker_screen_real");
176+
return;
143177
}

source/source_estate/module_charge/charge_mixing_residual.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,13 @@ double Charge_Mixing::get_drho(Charge* chr, const double nelec)
99
{
1010
ModuleBase::TITLE("Charge_Mixing", "get_drho");
1111
ModuleBase::timer::tick("Charge_Mixing", "get_drho");
12+
const int nspin = PARAM.inp.nspin;
13+
assert(nspin==1 || nspin==2 || nspin==4);
1214
double drho = 0.0;
1315

1416
if (PARAM.inp.scf_thr_type == 1)
1517
{
16-
for (int is = 0; is < PARAM.inp.nspin; ++is)
18+
for (int is = 0; is < nspin; ++is)
1719
{
1820
ModuleBase::GlobalFunc::NOTE("Perform FFT on rho(r) to obtain rho(G).");
1921
chr->rhopw->real2recip(chr->rho[is], chr->rhog[is]);
@@ -23,15 +25,15 @@ double Charge_Mixing::get_drho(Charge* chr, const double nelec)
2325
}
2426

2527
ModuleBase::GlobalFunc::NOTE("Calculate the charge difference between rho(G) and rho_save(G)");
26-
std::vector<std::complex<double>> drhog(PARAM.inp.nspin * this->rhopw->npw);
28+
std::vector<std::complex<double>> drhog(nspin * this->rhopw->npw);
2729
#ifdef _OPENMP
2830
#pragma omp parallel for collapse(2) schedule(static, 512)
2931
#endif
30-
for (int is = 0; is < PARAM.inp.nspin; ++is)
32+
for (int is = 0; is < nspin; ++is)
3133
{
3234
for (int ig = 0; ig < this->rhopw->npw; ig++)
3335
{
34-
drhog[is * rhopw->npw + ig] = chr->rhog[is][ig] - chr->rhog_save[is][ig];
36+
drhog[is * this->rhopw->npw + ig] = chr->rhog[is][ig] - chr->rhog_save[is][ig];
3537
}
3638
}
3739

@@ -42,7 +44,7 @@ double Charge_Mixing::get_drho(Charge* chr, const double nelec)
4244
{
4345
// Note: Maybe it is wrong.
4446
// The inner_product_real function (L1-norm) is different from that (L2-norm) in mixing.
45-
for (int is = 0; is < PARAM.inp.nspin; is++)
47+
for (int is = 0; is < nspin; is++)
4648
{
4749
if (is != 0 && is != 3 && PARAM.globalv.domag_z)
4850
{

0 commit comments

Comments
 (0)