Skip to content

Commit a8bef4a

Browse files
authored
Merge branch 'develop' into fix_testcase_312_NO_GO_wfc_get_wf
2 parents 7487359 + f1508aa commit a8bef4a

21 files changed

+209
-246
lines changed

docs/community/faq.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ write(cs_stru, cs_atoms, format='abacus', pp=pp, basis=basis)
109109

110110
ABACUS applies the density difference between two SCF steps (labeled as `DRHO` in the screen output) as the convergence criterion, which is considered as a more robust choice compared with the energy difference. `DRHO` is calculated via `DRHO = |rho(G)-rho_previous(G)|^2`. Note that the energy difference between two SCF steps (labed as `EDIFF`) is also printed out in the screen output.
111111

112-
**4. Why EDIFF is much slower than DRHO?
112+
**4. Why EDIFF is much slower than DRHO?**
113113

114114
For metaGGA calculations, it is normal because in addition to charge density, kinetic density also needs to be considered in metaGGA calculations. In this case, you can try set `mixing_tau = true`. If you find EDIFF is much slower than DRHO for non-metaGGA calculations, please start a new issue to us.
115115

docs/quick_start/easy_install.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,8 @@ OMP_NUM_THREADS=4 mpirun -n 4 abacus
192192

193193
In this case, the total thread count is 16.
194194

195+
> Notice: If the MPI library you are using is OpenMPI, which is commonly the case, when you set the number of processes to 1 or 2, OpenMPI will default to `--bind-to core`. This means that no matter how many threads you set, these threads will be restricted to run on 1 or 2 CPU cores. Therefore, setting a higher number of OpenMP threads might result in slower program execution. Hence, when using `mpirun -n` set to 1 or 2, it is recommended to set `--bind-to none` to avoid performance degradation. For example:`OMP_NUM_THREADS=6 mpirun --bind-to none -n 1 abacus`. The detailed binding strategy of OpenMPI can be referred to at https://docs.open-mpi.org/en/v5.0.x/man-openmpi/man1/mpirun.1.html#quick-summary.
196+
195197
ABACUS will try to determine the number of threads used by each process if `OMP_NUM_THREADS` is not set. However, it is **required** to set `OMP_NUM_THREADS` before running `mpirun` to avoid potential performance issues.
196198

197199
Please refer to [hands-on guide](./hands_on.md) for more instructions.

source/module_base/complexarray.cpp

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -262,18 +262,4 @@ void point_mult(ComplexArray &in1, ComplexArray &in2, ComplexArray &out){
262262
in1.ptr[i].real() * in2.ptr[i].imag() +
263263
in1.ptr[i].imag() * in2.ptr[i].real());}
264264
}
265-
const std::complex <double> &ComplexArray::operator()(const int ind1, const int ind2, const int ind3, const int ind4) const{
266-
assert(ind1>=0); assert(ind1<bound1);
267-
assert(ind2>=0); assert(ind2<bound2);
268-
assert(ind3>=0); assert(ind3<bound3);
269-
assert(ind4>=0); assert(ind4<bound4);
270-
const int ind = ((ind1 * bound2 + ind2) * bound3 + ind3) * bound4 + ind4;
271-
return ptr[ind];}
272-
std::complex<double>& ComplexArray::operator()(const int ind1,const int ind2,const int ind3,const int ind4){
273-
assert(ind1>=0); assert(ind1<bound1);
274-
assert(ind2>=0); assert(ind2<bound2);
275-
assert(ind3>=0); assert(ind3<bound3);
276-
assert(ind4>=0); assert(ind4<bound4);
277-
const int ind = ((ind1 * bound2 + ind2) * bound3 + ind3) * bound4 + ind4;
278-
return ptr[ind];}
279265
}

source/module_base/complexarray.h

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <iostream>
66
#include <fstream>
77
#include <iomanip>
8+
#include <cassert>
89

910
namespace ModuleBase
1011
{
@@ -58,11 +59,27 @@ class ComplexArray
5859

5960
/// overloaded subscript operator for non-const std::complex Array const reference return creates an lvakue
6061
std::complex <double> &operator()
61-
(const int ind1=0, const int ind2=0, const int ind3=0, const int ind4=0);
62+
(const int ind1=0, const int ind2=0, const int ind3=0, const int ind4=0)
63+
{
64+
assert(ind1>=0); assert(ind1<bound1);
65+
assert(ind2>=0); assert(ind2<bound2);
66+
assert(ind3>=0); assert(ind3<bound3);
67+
assert(ind4>=0); assert(ind4<bound4);
68+
const int ind = ((ind1 * bound2 + ind2) * bound3 + ind3) * bound4 + ind4;
69+
return ptr[ind];
70+
};
6271
// std::complex < double> &operator()(int, int, int, int, int);
6372
/// overloaded subscript operator for const std::complex Array const reference return creates an cvakue
6473
const std::complex <double> &operator()
65-
(const int ind1=0, const int ind2=0, const int ind3=0, const int ind4=0)const;
74+
(const int ind1=0, const int ind2=0, const int ind3=0, const int ind4=0) const
75+
{
76+
assert(ind1>=0); assert(ind1<bound1);
77+
assert(ind2>=0); assert(ind2<bound2);
78+
assert(ind3>=0); assert(ind3<bound3);
79+
assert(ind4>=0); assert(ind4<bound4);
80+
const int ind = ((ind1 * bound2 + ind2) * bound3 + ind3) * bound4 + ind4;
81+
return ptr[ind];
82+
};
6683
// const std::complex < double> &operator()(int, int, int, int, int)const;
6784

6885
/****************************************************

source/module_esolver/esolver_fp.cpp

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,16 @@ namespace ModuleESolver
2323

2424
ESolver_FP::ESolver_FP()
2525
{
26-
// pw_rho = new ModuleBase::PW_Basis();
27-
// LCAO basis doesn't support GPU acceleration on FFT currently
2826
std::string fft_device = PARAM.inp.device;
27+
28+
// LCAO basis doesn't support GPU acceleration on FFT currently
2929
if(PARAM.inp.basis_type == "lcao")
3030
{
3131
fft_device = "cpu";
3232
}
33+
3334
pw_rho = new ModulePW::PW_Basis_Big(fft_device, PARAM.inp.precision);
34-
if ( PARAM.globalv.double_grid)
35+
if (PARAM.globalv.double_grid)
3536
{
3637
pw_rhod = new ModulePW::PW_Basis_Big(fft_device, PARAM.inp.precision);
3738
}
@@ -44,6 +45,7 @@ ESolver_FP::ESolver_FP()
4445
pw_big = static_cast<ModulePW::PW_Basis_Big*>(pw_rhod);
4546
pw_big->setbxyz(PARAM.inp.bx, PARAM.inp.by, PARAM.inp.bz);
4647
sf.set(pw_rhod, PARAM.inp.nbspline);
48+
4749
}
4850

4951
ESolver_FP::~ESolver_FP()
@@ -140,7 +142,7 @@ void ESolver_FP::after_scf(UnitCell& ucell, const int istep, const bool conv_eso
140142
// 2) write fermi energy
141143
ModuleIO::output_efermi(conv_esolver, this->pelec->eferm.ef);
142144

143-
// 3) update delta rho for charge extrapolation
145+
// 3) update delta_rho for charge extrapolation
144146
CE.update_delta_rho(ucell, &(this->chr), &(this->sf));
145147

146148
if (istep % PARAM.inp.out_interval == 0)
@@ -153,13 +155,13 @@ void ESolver_FP::after_scf(UnitCell& ucell, const int istep, const bool conv_eso
153155
double* data = nullptr;
154156
if (PARAM.inp.dm_to_rho)
155157
{
156-
data = this->pelec->charge->rho[is];
157-
this->pw_rhod->real2recip(this->pelec->charge->rho[is], this->pelec->charge->rhog[is]);
158+
data = this->chr.rho[is];
159+
this->pw_rhod->real2recip(this->chr.rho[is], this->chr.rhog[is]);
158160
}
159161
else
160162
{
161-
data = this->pelec->charge->rho_save[is];
162-
this->pw_rhod->real2recip(this->pelec->charge->rho_save[is], this->pelec->charge->rhog_save[is]);
163+
data = this->chr.rho_save[is];
164+
this->pw_rhod->real2recip(this->chr.rho_save[is], this->chr.rhog_save[is]);
163165
}
164166
std::string fn =PARAM.globalv.global_out_dir + "/SPIN" + std::to_string(is + 1) + "_CHG.cube";
165167
ModuleIO::write_vdata_palgrid(Pgrid,
@@ -176,7 +178,7 @@ void ESolver_FP::after_scf(UnitCell& ucell, const int istep, const bool conv_eso
176178
{
177179
fn =PARAM.globalv.global_out_dir + "/SPIN" + std::to_string(is + 1) + "_TAU.cube";
178180
ModuleIO::write_vdata_palgrid(Pgrid,
179-
this->pelec->charge->kin_r_save[is],
181+
this->chr.kin_r_save[is],
180182
is,
181183
PARAM.inp.nspin,
182184
istep,
@@ -217,7 +219,7 @@ void ESolver_FP::after_scf(UnitCell& ucell, const int istep, const bool conv_eso
217219
fn,
218220
istep,
219221
this->pw_rhod,
220-
this->pelec->charge,
222+
&this->chr,
221223
&(ucell),
222224
this->pelec->pot->get_fixed_v(),
223225
this->solvent);
@@ -226,11 +228,11 @@ void ESolver_FP::after_scf(UnitCell& ucell, const int istep, const bool conv_eso
226228
// 6) write ELF
227229
if (PARAM.inp.out_elf[0] > 0)
228230
{
229-
this->pelec->charge->cal_elf = true;
231+
this->chr.cal_elf = true;
230232
Symmetry_rho srho;
231233
for (int is = 0; is < PARAM.inp.nspin; is++)
232234
{
233-
srho.begin(is, *(this->pelec->charge), this->pw_rhod, ucell.symm);
235+
srho.begin(is, this->chr, this->pw_rhod, ucell.symm);
234236
}
235237

236238
std::string out_dir =PARAM.globalv.global_out_dir;
@@ -242,8 +244,8 @@ void ESolver_FP::after_scf(UnitCell& ucell, const int istep, const bool conv_eso
242244
out_dir,
243245
istep,
244246
PARAM.inp.nspin,
245-
this->pelec->charge->rho,
246-
this->pelec->charge->kin_r,
247+
this->chr.rho,
248+
this->chr.kin_r,
247249
this->pw_rhod,
248250
this->Pgrid,
249251
&(ucell),
@@ -260,9 +262,9 @@ void ESolver_FP::before_scf(UnitCell& ucell, const int istep)
260262
{
261263
// only G-vector and K-vector are changed due to the change of lattice
262264
// vector FFT grids do not change!!
263-
pw_rho->initgrids(ucell.lat0, ucell.latvec, pw_rho->nx, pw_rho->ny, pw_rho->nz);
264-
pw_rho->collect_local_pw();
265-
pw_rho->collect_uniqgg();
265+
this->pw_rho->initgrids(ucell.lat0, ucell.latvec, pw_rho->nx, pw_rho->ny, pw_rho->nz);
266+
this->pw_rho->collect_local_pw();
267+
this->pw_rho->collect_uniqgg();
266268

267269
if (PARAM.globalv.double_grid)
268270
{
@@ -292,7 +294,7 @@ void ESolver_FP::before_scf(UnitCell& ucell, const int istep)
292294
this->CE.update_all_dis(ucell);
293295
this->CE.extrapolate_charge(&(this->Pgrid),
294296
ucell,
295-
this->pelec->charge,
297+
&this->chr,
296298
&(this->sf),
297299
GlobalV::ofs_running,
298300
GlobalV::ofs_warning);
@@ -327,7 +329,7 @@ void ESolver_FP::before_scf(UnitCell& ucell, const int istep)
327329
std::stringstream ss;
328330
ss << PARAM.globalv.global_out_dir << "SPIN" << is + 1 << "_CHG_INI.cube";
329331
ModuleIO::write_vdata_palgrid(this->Pgrid,
330-
this->pelec->charge->rho[is],
332+
this->chr.rho[is],
331333
is,
332334
PARAM.inp.nspin,
333335
istep,
@@ -368,8 +370,8 @@ void ESolver_FP::iter_finish(UnitCell& ucell, const int istep, int& iter, bool&
368370
if (iter % PARAM.inp.out_freq_elec == 0 || iter == PARAM.inp.scf_nmax || conv_esolver)
369371
{
370372
std::complex<double>** rhog_tot
371-
= (PARAM.inp.dm_to_rho) ? this->pelec->charge->rhog : this->pelec->charge->rhog_save;
372-
double** rhor_tot = (PARAM.inp.dm_to_rho) ? this->pelec->charge->rho : this->pelec->charge->rho_save;
373+
= (PARAM.inp.dm_to_rho) ? this->chr.rhog : this->chr.rhog_save;
374+
double** rhor_tot = (PARAM.inp.dm_to_rho) ? this->chr.rho : this->chr.rho_save;
373375
for (int is = 0; is < PARAM.inp.nspin; is++)
374376
{
375377
this->pw_rhod->real2recip(rhor_tot[is], rhog_tot[is]);
@@ -386,12 +388,12 @@ void ESolver_FP::iter_finish(UnitCell& ucell, const int istep, int& iter, bool&
386388

387389
if (XC_Functional::get_ked_flag())
388390
{
389-
std::vector<std::complex<double>> kin_g_space(PARAM.inp.nspin * this->pelec->charge->ngmc, {0.0, 0.0});
391+
std::vector<std::complex<double>> kin_g_space(PARAM.inp.nspin * this->chr.ngmc, {0.0, 0.0});
390392
std::vector<std::complex<double>*> kin_g;
391393
for (int is = 0; is < PARAM.inp.nspin; is++)
392394
{
393-
kin_g.push_back(kin_g_space.data() + is * this->pelec->charge->ngmc);
394-
this->pw_rhod->real2recip(this->pelec->charge->kin_r_save[is], kin_g[is]);
395+
kin_g.push_back(kin_g_space.data() + is * this->chr.ngmc);
396+
this->pw_rhod->real2recip(this->chr.kin_r_save[is], kin_g[is]);
395397
}
396398
ModuleIO::write_rhog(PARAM.globalv.global_out_dir + PARAM.inp.suffix + "-TAU-DENSITY.restart",
397399
PARAM.globalv.gamma_only_pw || PARAM.globalv.gamma_only_local,

source/module_esolver/esolver_fp.h

Lines changed: 31 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,31 @@
22
#define ESOLVER_FP_H
33

44
#include "esolver.h"
5+
6+
//! plane wave basis
57
#include "module_basis/module_pw/pw_basis.h"
8+
9+
//! symmetry analysis
610
#include "module_cell/module_symmetry/symmetry.h"
11+
12+
//! electronic states
713
#include "module_elecstate/elecstate.h"
14+
15+
//! charge extrapolation
816
#include "module_elecstate/module_charge/charge_extra.h"
17+
18+
//! solvation model
919
#include "module_hamilt_general/module_surchem/surchem.h"
20+
21+
//! local pseudopotential
1022
#include "module_hamilt_pw/hamilt_pwdft/VL_in_pw.h"
23+
24+
//! structure factor related to plane wave basis
1125
#include "module_hamilt_pw/hamilt_pwdft/structure_factor.h"
1226

1327
#include <fstream>
1428

29+
1530
//! The First-Principles (FP) Energy Solver Class
1631
/**
1732
* This class represents components that needed in
@@ -22,7 +37,7 @@
2237

2338
namespace ModuleESolver
2439
{
25-
class ESolver_FP : public ESolver
40+
class ESolver_FP: public ESolver
2641
{
2742
public:
2843
//! Constructor
@@ -49,39 +64,34 @@ class ESolver_FP : public ESolver
4964
//! ------------------------------------------------------------------------------
5065
elecstate::ElecState* pelec = nullptr; ///< Electronic states
5166

52-
//! ------------------------------------------------------------------------------
67+
//! K points in Brillouin zone
68+
K_Vectors kv;
5369

5470
//! Electorn charge density
5571
Charge chr;
5672

57-
//! Structure factors that used with plane-wave basis set
58-
Structure_Factor sf;
59-
60-
//! K points in Brillouin zone
61-
K_Vectors kv;
62-
63-
//! Plane-wave basis set for charge density
73+
//! pw_rho: Plane-wave basis set for charge density
74+
//! pw_rhod: same as pw_rho for NCPP. Here 'd' stands for 'dense',
75+
//! dense grid for for uspp, used for ultrasoft augmented charge density.
76+
//! charge density and potential are defined on dense grids,
77+
//! but effective potential needs to be interpolated on smooth grids in order to compute Veff|psi>
6478
ModulePW::PW_Basis* pw_rho;
79+
ModulePW::PW_Basis* pw_rhod; //! dense grid for USPP
80+
ModulePW::PW_Basis_Big* pw_big; ///< [temp] pw_basis_big class
6581

6682
//! parallel for rho grid
6783
Parallel_Grid Pgrid;
6884

69-
//! pointer to local pseudopotential
70-
pseudopot_cell_vl locpp;
85+
//! Structure factors that used with plane-wave basis set
86+
Structure_Factor sf;
7187

72-
/**
73-
* @brief same as pw_rho for ncpp. Here 'd' stands for 'dense'
74-
* dense grid for for uspp, used for ultrasoft augmented charge density.
75-
* charge density and potential are defined on dense grids,
76-
* but effective potential needs to be interpolated on smooth grids in order to compute Veff|psi>
77-
*/
78-
ModulePW::PW_Basis* pw_rhod;
79-
ModulePW::PW_Basis_Big* pw_big; ///< [temp] pw_basis_big class
88+
//! local pseudopotentials
89+
pseudopot_cell_vl locpp;
8090

81-
//! Charge extrapolation
91+
//! charge extrapolation method
8292
Charge_Extra CE;
8393

84-
// solvent model
94+
//! solvent model
8595
surchem solvent;
8696
};
8797
} // namespace ModuleESolver

0 commit comments

Comments
 (0)