Skip to content

Commit d8e205d

Browse files
committed
Merge branch 'develop' of github.com:deepmodeling/abacus-develop into develop
2 parents c1394c4 + c0f9734 commit d8e205d

File tree

39 files changed

+452
-227
lines changed

39 files changed

+452
-227
lines changed

docs/input-main.md

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -597,9 +597,9 @@ This part of variables are used to control the parameters of stochastic DFT (SDF
597597
- **Description**:
598598
- Different method to do SDFT.
599599
- 1: SDFT calculates $T_n(\hat{h})\ket{\chi}$ twice, where $T_n(x)$ is the n-th order Chebyshev polynomial and $\hat{h}=\frac{\hat{H}-\bar{E}}{\Delta E}$ owning eigen-value $\in(-1,1)$. This method cost less memory but slow.
600-
- 2: SDFT calculates $T_n(\hat{h})\ket{\chi}$ once but need much more memory. This method is fast but when memory is not enough, only method 1 can be used.
601-
- other: use 1
602-
- **Default**: 1
600+
- 2: SDFT calculates $T_n(\hat{h})\ket{\chi}$ once but need much more memory. This method is much faster. Besides, it calculate $N_e$ with $\bra{\chi}\sqrt{\hat f}\sqrt{\hat f}\ket{\chi}$, which needs smaller [nche_sto](#nche_sto). However, when memory is not enough, only method 1 can be used.
601+
- other: use 2
602+
- **Default**: 2
603603
604604
#### nbands_sto
605605
@@ -642,6 +642,12 @@ This part of variables are used to control the parameters of stochastic DFT (SDF
642642
- **Description**: Frequency (once each initsto_freq steps) to generate new stochastic orbitals when running md.
643643
- **Default**:1000
644644
645+
#### npart_sto
646+
647+
- **Type**: Integer
648+
- **Description**: Make memory cost to 1/npart_sto times of previous one when running post process of SDFT like DOS with method_sto = 2.
649+
- **Default**:1
650+
645651
### Geometry relaxation
646652
647653
This part of variables are used to control the geometry relaxation.

source/input.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,8 @@ void Input::Default(void)
144144
bndpar = 1;
145145
kpar = 1;
146146
initsto_freq = 1000;
147-
method_sto = 1;
147+
method_sto = 2;
148+
npart_sto = 1;
148149
cal_cond = false;
149150
dos_nche = 100;
150151
cond_nche = 20;
@@ -596,6 +597,10 @@ bool Input::Read(const std::string &fn)
596597
{
597598
read_value(ifs, method_sto);
598599
}
600+
else if (strcmp("npart_sto", word) == 0)
601+
{
602+
read_value(ifs, npart_sto);
603+
}
599604
else if (strcmp("cal_cond", word) == 0)
600605
{
601606
read_value(ifs, cal_cond);
@@ -1945,6 +1950,10 @@ void Input::Default_2(void) // jiyy add 2019-08-04
19451950
}
19461951
if(calculation.substr(0,3) != "sto") bndpar = 1;
19471952
if(bndpar > GlobalV::NPROC) bndpar = GlobalV::NPROC;
1953+
if(method_sto != 1 && method_sto != 2)
1954+
{
1955+
method_sto = 2;
1956+
}
19481957
}
19491958
#ifdef __MPI
19501959
void Input::Bcast()
@@ -1979,6 +1988,7 @@ void Input::Bcast()
19791988
Parallel_Common::bcast_double(emin_sto);
19801989
Parallel_Common::bcast_int(initsto_freq);
19811990
Parallel_Common::bcast_int(method_sto);
1991+
Parallel_Common::bcast_int(npart_sto);
19821992
Parallel_Common::bcast_bool(cal_cond);
19831993
Parallel_Common::bcast_int(cond_nche);
19841994
Parallel_Common::bcast_double(cond_dw);

source/input.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ class Input
7272
int bndpar; //parallel for stochastic/deterministic bands
7373
int initsto_freq; //frequency to init stochastic orbitals when running md
7474
int method_sto; //different methods for sdft, 1: slow, less memory 2: fast, more memory
75+
int npart_sto; //for method_sto = 2, reduce memory
7576
bool cal_cond; //calculate electronic conductivities
7677
int cond_nche; //orders of Chebyshev expansions for conductivities
7778
double cond_dw; //d\omega for conductivities

source/module_base/math_chebyshev_def.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -349,15 +349,19 @@ void Chebyshev<REAL>::calpolyvec_complex(T *ptr,
349349
{
350350

351351
assert(N>=0 && LDA >= N);
352-
int ndmxt;
353-
if(m == 1) ndmxt = N * m;
354-
else ndmxt = LDA * m;
352+
const int ndmxt = LDA * m;
355353

356354
std::complex<REAL> *arraynp1 = polywaveout + 2 * ndmxt;
357355
std::complex<REAL> *arrayn = polywaveout + ndmxt;
358356
std::complex<REAL> *arrayn_1 = polywaveout;
359-
360-
ModuleBase::GlobalFunc::DCOPY(wavein, arrayn_1, ndmxt);
357+
358+
std::complex<REAL> *tmpin = wavein, *tmpout = arrayn_1;
359+
for(int i = 0 ; i < m ; ++i)
360+
{
361+
ModuleBase::GlobalFunc::DCOPY(tmpin, tmpout, N);
362+
tmpin += LDA;
363+
tmpout += LDA;
364+
}
361365

362366
//1-st order
363367
(ptr->*funA)(arrayn_1, arrayn, m);

source/module_cell/read_atoms.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,7 @@ bool UnitCell_pseudo::read_atom_positions(std::ifstream &ifpos, std::ofstream &o
567567
if (na > 0)
568568
{
569569
delete[] atoms[it].tau;
570+
delete[] atoms[it].tau_original;
570571
delete[] atoms[it].taud;
571572
delete[] atoms[it].vel;
572573
delete[] atoms[it].mbl;

source/module_esolver/esolver_ks_pw_tool.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ void ESolver_KS_PW::calcondw(const int nt,const double dt,const double fwhmin,co
158158
double * cw11 = new double [nw];
159159
double * cw12 = new double [nw];
160160
double * cw22 = new double [nw];
161-
double * kappa = new double [(int)ceil(wcut/dw_in)];
161+
double * kappa = new double [nw];
162162
ModuleBase::GlobalFunc::ZEROS(cw11,nw);
163163
ModuleBase::GlobalFunc::ZEROS(cw12,nw);
164164
ModuleBase::GlobalFunc::ZEROS(cw22,nw);

source/module_esolver/esolver_sdft_pw.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,6 @@ void ESolver_SDFT_PW::postprocess()
144144
{
145145
GlobalC::en.print_occ();
146146

147-
((hsolver::HSolverPW_SDFT*)phsol)->stoiter.cleanchiallorder();//release lots of memories
148147
if(this->maxniter == 0)
149148
{
150149
int iter = 1;
@@ -153,6 +152,7 @@ void ESolver_SDFT_PW::postprocess()
153152
hsolver::DiagoIterAssist::PW_DIAG_THR = std::max(std::min(1e-5, 0.1 * GlobalV::SCF_THR / std::max(1.0, GlobalC::CHR.nelec)),1e-12);
154153
hsolver::DiagoIterAssist::need_subspace = false;
155154
this->phsol->solve(this->phami, this->psi[0], this->pelec,this->stowf,istep, iter, GlobalV::KS_SOLVER, true);
155+
((hsolver::HSolverPW_SDFT*)phsol)->stoiter.cleanchiallorder();//release lots of memories
156156
}
157157
int nche_test = 0;
158158
if(INPUT.cal_cond) nche_test = std::max(nche_test, INPUT.cond_nche);
@@ -166,17 +166,21 @@ void ESolver_SDFT_PW::postprocess()
166166
if(INPUT.out_dos)
167167
{
168168
double emax, emin;
169-
if(INPUT.dos_setemax) emax = INPUT.dos_emax_ev;
170-
if(INPUT.dos_setemin) emin = INPUT.dos_emin_ev;
169+
if(INPUT.dos_setemax)
170+
emax = INPUT.dos_emax_ev;
171+
else
172+
emax = ((hsolver::HSolverPW_SDFT*)phsol)->stoiter.stohchi.Emax*ModuleBase::Ry_to_eV;
173+
if(INPUT.dos_setemin)
174+
emin = INPUT.dos_emin_ev;
175+
else
176+
emin = ((hsolver::HSolverPW_SDFT*)phsol)->stoiter.stohchi.Emin*ModuleBase::Ry_to_eV;
171177
if(!INPUT.dos_setemax && !INPUT.dos_setemin)
172178
{
173-
emax = ((hsolver::HSolverPW_SDFT*)phsol)->stoiter.stohchi.Emax;
174-
emin = ((hsolver::HSolverPW_SDFT*)phsol)->stoiter.stohchi.Emin;
175179
double delta=(emax-emin)*INPUT.dos_scale;
176180
emax=emax+delta/2.0;
177181
emin=emin-delta/2.0;
178182
}
179-
this->caldos(INPUT.dos_nche, INPUT.b_coef, emin, emax, INPUT.dos_edelta_ev );
183+
this->caldos(INPUT.dos_nche, INPUT.b_coef, emin, emax, INPUT.dos_edelta_ev, INPUT.npart_sto );
180184
}
181185
}
182186

source/module_esolver/esolver_sdft_pw.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class ESolver_SDFT_PW: public ESolver_KS_PW
3333
const double dw_in, const int times);
3434
//calculate DOS
3535
void caldos(const int nche_dos, const double sigmain,
36-
const double emin, const double emax, const double de);
36+
const double emin, const double emax, const double de, const int npart);
3737

3838
private:
3939
int nche_sto; //norder of Chebyshev

source/module_esolver/esolver_sdft_pw_tool.cpp

Lines changed: 103 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ void ESolver_SDFT_PW::check_che(const int nche_in)
5656
while(1)
5757
{
5858
bool converge;
59-
converge= chetest.checkconverge(&stohchi, &Stochastic_hchi::hchi_reciprocal,
59+
converge= chetest.checkconverge(&stohchi, &Stochastic_hchi::hchi_norm,
6060
pchi, npw, stohchi.Emax, stohchi.Emin, 5.0);
6161

6262
if(!converge)
@@ -263,10 +263,10 @@ void ESolver_SDFT_PW::sKG(const int nche_KG, const double fwhmin, const double w
263263
ModuleBase::GlobalFunc::COPYARRAY(hj1sfpsi_out, j2sfpsi.get_pointer(), ndim*totbands_per*npwx);
264264

265265
/*
266-
// stohchi.hchi_reciprocal(psi0.get_pointer(), hpsi0.get_pointer(), totbands_per);
267-
// stohchi.hchi_reciprocal(sfpsi0.get_pointer(), hsfpsi0.get_pointer(), totbands_per);
268-
// stohchi.hchi_reciprocal(j1psi.get_pointer(), j2psi.get_pointer(), ndim*totbands_per);
269-
// stohchi.hchi_reciprocal(j1sfpsi.get_pointer(), j2sfpsi.get_pointer(), ndim*totbands_per);
266+
// stohchi.hchi_norm(psi0.get_pointer(), hpsi0.get_pointer(), totbands_per);
267+
// stohchi.hchi_norm(sfpsi0.get_pointer(), hsfpsi0.get_pointer(), totbands_per);
268+
// stohchi.hchi_norm(j1psi.get_pointer(), j2psi.get_pointer(), ndim*totbands_per);
269+
// stohchi.hchi_norm(j1sfpsi.get_pointer(), j2sfpsi.get_pointer(), ndim*totbands_per);
270270
// double Ebar = (stohchi.Emin + stohchi.Emax)/2;
271271
// double DeltaE = (stohchi.Emax - stohchi.Emin)/2;
272272
// for(int ib = 0 ; ib < totbands_per ; ++ib)
@@ -307,8 +307,8 @@ void ESolver_SDFT_PW::sKG(const int nche_KG, const double fwhmin, const double w
307307

308308
//(1-f)
309309
che.calcoef_real(&stoiter.stofunc,&Sto_Func<double>::n_fd);
310-
che.calfinalvec_real(&stohchi, &Stochastic_hchi::hchi_reciprocal, j1sfpsi.get_pointer(), j1sfpsi.get_pointer(), npw, npwx, totbands_per*ndim);
311-
che.calfinalvec_real(&stohchi, &Stochastic_hchi::hchi_reciprocal, j2sfpsi.get_pointer(), j2sfpsi.get_pointer(), npw, npwx, totbands_per*ndim);
310+
che.calfinalvec_real(&stohchi, &Stochastic_hchi::hchi_norm, j1sfpsi.get_pointer(), j1sfpsi.get_pointer(), npw, npwx, totbands_per*ndim);
311+
che.calfinalvec_real(&stohchi, &Stochastic_hchi::hchi_norm, j2sfpsi.get_pointer(), j2sfpsi.get_pointer(), npw, npwx, totbands_per*ndim);
312312

313313
psi::Psi<std::complex<double>> *p_j1psi = &j1psi;
314314
psi::Psi<std::complex<double>> *p_j2psi = &j2psi;
@@ -361,9 +361,9 @@ void ESolver_SDFT_PW::sKG(const int nche_KG, const double fwhmin, const double w
361361
}
362362

363363
//exp(iHdt)|chi>
364-
chet.calfinalvec_complex(&stohchi, &Stochastic_hchi::hchi_reciprocal, &exppsi(ksbandper,0), &exppsi(ksbandper,0), npw, npwx, nchip);
364+
chet.calfinalvec_complex(&stohchi, &Stochastic_hchi::hchi_norm, &exppsi(ksbandper,0), &exppsi(ksbandper,0), npw, npwx, nchip);
365365
//exp(-iHdt)|shchi>
366-
chet2.calfinalvec_complex(&stohchi, &Stochastic_hchi::hchi_reciprocal, &expsfpsi(ksbandper,0), &expsfpsi(ksbandper,0), npw, npwx, nchip);
366+
chet2.calfinalvec_complex(&stohchi, &Stochastic_hchi::hchi_norm, &expsfpsi(ksbandper,0), &expsfpsi(ksbandper,0), npw, npwx, nchip);
367367
psi::Psi<std::complex<double>> *p_exppsi = &exppsi;
368368
#ifdef __MPI
369369
psi::Psi<std::complex<double>> exppsi_tot;
@@ -454,53 +454,111 @@ void ESolver_SDFT_PW::sKG(const int nche_KG, const double fwhmin, const double w
454454
ModuleBase::timer::tick(this->classname,"sKG");
455455
}
456456

457-
void ESolver_SDFT_PW:: caldos( const int nche_dos, const double sigmain, const double emin, const double emax, const double de)
457+
void ESolver_SDFT_PW:: caldos( const int nche_dos, const double sigmain, const double emin, const double emax, const double de, const int npart)
458458
{
459-
cout<<"Calculating Dos...."<<endl;
459+
cout<<"========================="<<endl;
460+
cout<<"###Calculating Dos....###"<<endl;
461+
cout<<"========================="<<endl;
460462
ModuleBase::Chebyshev<double> che(nche_dos);
461463
const int nk = GlobalC::kv.nks;
462464
Stochastic_Iter& stoiter = ((hsolver::HSolverPW_SDFT*)phsol)->stoiter;
463465
Stochastic_hchi& stohchi = stoiter.stohchi;
464466
const int npwx = GlobalC::wf.npwx;
465467

466-
double * spolyv = new double [nche_dos];
467-
ModuleBase::GlobalFunc::ZEROS(spolyv, nche_dos);
468+
double * spolyv = nullptr;
469+
std::complex<double> *allorderchi = nullptr;
470+
if(stoiter.method == 1)
471+
{
472+
spolyv = new double [nche_dos];
473+
ModuleBase::GlobalFunc::ZEROS(spolyv, nche_dos);
474+
}
475+
else
476+
{
477+
spolyv = new double [nche_dos*nche_dos];
478+
ModuleBase::GlobalFunc::ZEROS(spolyv, nche_dos*nche_dos);
479+
int nchip_new = ceil((double)this->stowf.nchip_max / npart);
480+
allorderchi = new std::complex<double> [nchip_new * npwx * nche_dos];
481+
}
482+
cout<<"1. TracepolyA:"<<endl;
468483
for (int ik = 0;ik < nk;ik++)
469484
{
485+
cout<<"ik: "<<ik+1<<endl;
470486
if(nk > 1)
471487
{
472488
this->phami->updateHk(ik);
473489
}
474490
stohchi.current_ik = ik;
475491
const int npw = GlobalC::kv.ngk[ik];
476-
const int nchip = this->stowf.nchip[ik];
492+
const int nchipk = this->stowf.nchip[ik];
477493

478-
complex<double> * pchi;
494+
std::complex<double> * pchi;
479495
if(GlobalV::NBANDS > 0)
480496
pchi = stowf.chiortho[ik].c;
481497
else
482498
pchi = stowf.chi0[ik].c;
483-
che.tracepolyA(&stohchi, &Stochastic_hchi::hchi_reciprocal, pchi, npw, npwx, nchip);
484-
for(int i = 0 ; i < nche_dos ; ++i)
499+
if(stoiter.method == 1)
485500
{
486-
spolyv[i] += che.polytrace[i] * GlobalC::kv.wk[ik] / 2 ;
501+
che.tracepolyA(&stohchi, &Stochastic_hchi::hchi_norm, pchi, npw, npwx, nchipk);
502+
for(int i = 0 ; i < nche_dos ; ++i)
503+
{
504+
spolyv[i] += che.polytrace[i] * GlobalC::kv.wk[ik] / 2 ;
505+
}
506+
}
507+
else
508+
{
509+
int N = nche_dos;
510+
double kweight = GlobalC::kv.wk[ik] / 2;
511+
char trans = 'T';
512+
char normal = 'N';
513+
double one = 1;
514+
for(int ipart = 0 ; ipart < npart ; ++ipart)
515+
{
516+
int nchipk_new = nchipk / npart;
517+
int start_nchipk = ipart * nchipk_new + nchipk % npart;
518+
if(ipart < nchipk % npart)
519+
{
520+
nchipk_new++;
521+
start_nchipk = ipart * nchipk_new;
522+
}
523+
ModuleBase::GlobalFunc::ZEROS(allorderchi, nchipk_new * npwx * nche_dos);
524+
std::complex<double> *tmpchi = pchi + start_nchipk * npwx;
525+
che.calpolyvec_complex(&stohchi, &Stochastic_hchi::hchi_norm, tmpchi, allorderchi, npw, npwx, nchipk_new);
526+
double* vec_all= (double *) allorderchi;
527+
int LDA = npwx * nchipk_new * 2;
528+
int M = npwx * nchipk_new * 2;
529+
dgemm_(&trans,&normal, &N,&N,&M,&kweight,vec_all,&LDA,vec_all,&LDA,&one,spolyv,&N);
530+
}
487531
}
488532
}
533+
if(stoiter.method == 2) delete[] allorderchi;
534+
489535
string dosfile = GlobalV::global_out_dir+"DOS1_smearing.dat";
490536
ofstream ofsdos(dosfile.c_str());
491537
int ndos = int((emax-emin) / de)+1;
492538
double *dos = new double [ndos];
493539
ModuleBase::GlobalFunc::ZEROS(dos,ndos);
494540
stoiter.stofunc.sigma = sigmain / ModuleBase::Ry_to_eV;
495541
double sum = 0;
496-
double error = 0;
542+
double maxerror = 0;
497543
ofsdos<<setw(8)<<"## E(eV) "<<setw(20)<<"dos(eV^-1)"<<setw(20)<<"sum"<<setw(20)<<"Error(eV^-1)"<<endl;
544+
cout<<"2. Dos:"<<endl;
545+
int n10 = ndos/10;
546+
int percent = 10;
498547
for(int ie = 0; ie < ndos; ++ie)
499548
{
500-
stoiter.stofunc.targ_e = (emin + ie * de) / ModuleBase::Ry_to_eV;
501-
che.calcoef_real(&stoiter.stofunc, &Sto_Func<double>::ngauss);
502549
double KS_dos = 0;
503-
double sto_dos = BlasConnector::dot(nche_dos,che.coef_real,1,spolyv,1);
550+
double sto_dos = 0;
551+
stoiter.stofunc.targ_e = (emin + ie * de) / ModuleBase::Ry_to_eV;
552+
if(stoiter.method == 1)
553+
{
554+
che.calcoef_real(&stoiter.stofunc, &Sto_Func<double>::ngauss);
555+
sto_dos = BlasConnector::dot(nche_dos,che.coef_real,1,spolyv,1);
556+
}
557+
else
558+
{
559+
che.calcoef_real(&stoiter.stofunc, &Sto_Func<double>::nroot_gauss);
560+
sto_dos = stoiter.vTMv(che.coef_real,spolyv,nche_dos);
561+
}
504562
if(GlobalV::NBANDS > 0)
505563
{
506564
for(int ik = 0; ik < nk; ++ik)
@@ -517,16 +575,35 @@ void ESolver_SDFT_PW:: caldos( const int nche_dos, const double sigmain, const d
517575
MPI_Allreduce(MPI_IN_PLACE, &KS_dos, 1, MPI_DOUBLE, MPI_SUM , STO_WORLD);
518576
MPI_Allreduce(MPI_IN_PLACE, &sto_dos, 1, MPI_DOUBLE, MPI_SUM , MPI_COMM_WORLD);
519577
#endif
520-
double tmpre = che.coef_real[nche_dos-1] * spolyv[nche_dos-1];
578+
double tmpre = 0;
579+
if(stoiter.method == 1)
580+
{
581+
tmpre = che.coef_real[nche_dos-1] * spolyv[nche_dos-1];
582+
}
583+
else
584+
{
585+
const int norder = nche_dos;
586+
double last_coef = che.coef_real[norder-1];
587+
double last_spolyv = spolyv[norder*norder - 1];
588+
tmpre = last_coef *(BlasConnector::dot(norder,che.coef_real,1,spolyv+norder*(norder-1),1)
589+
+ BlasConnector::dot(norder,che.coef_real,1,spolyv+norder-1,norder)-last_coef*last_spolyv);
590+
}
521591
#ifdef __MPI
522592
MPI_Allreduce(MPI_IN_PLACE, &tmpre, 1, MPI_DOUBLE, MPI_SUM , MPI_COMM_WORLD);
523593
#endif
524-
if(error < tmpre) error = tmpre;
594+
if(maxerror < tmpre) maxerror = tmpre;
525595
dos[ie] = (KS_dos + sto_dos) / ModuleBase::Ry_to_eV;
526596
sum += dos[ie];
527-
ofsdos <<setw(8)<< emin + ie * de <<setw(20)<<dos[ie]<<setw(20)<<sum * de <<setw(20) <<error <<endl;
597+
ofsdos <<setw(8)<< emin + ie * de <<setw(20)<<dos[ie]<<setw(20)<<sum * de <<setw(20) <<tmpre <<endl;
598+
if(ie%n10 == n10 -1)
599+
{
600+
cout<<percent<<"%"<<" ";
601+
percent+=10;
602+
}
528603
}
529-
cout<<scientific<<"DOS max Chebyshev Error: "<<error<<endl;
604+
cout<<endl;
605+
cout<<"Finish DOS"<<endl;
606+
cout<<scientific<<"DOS max Chebyshev Error: "<<maxerror<<endl;
530607
delete[] dos;
531608
delete[] spolyv;
532609
return;

source/module_hsolver/hsolver_pw_sdft.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ namespace hsolver
100100
}
101101
}
102102
// calculate stochastic rho
103-
stoiter.sum_stoband(stowf,pes);
103+
stoiter.sum_stoband(stowf,pes,pHamilt);
104104

105105

106106
//(6) calculate the delta_harris energy

0 commit comments

Comments
 (0)