diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md index d91640bdb4..a524290c37 100644 --- a/docs/advanced/input_files/input-main.md +++ b/docs/advanced/input_files/input-main.md @@ -599,7 +599,7 @@ These variables are used to control general system parameters. - nao: from numerical atomic orbitals. If they are not enough, other wave functions are initialized with random numbers. - nao+random: add small random numbers on numerical atomic orbitals - > Only the `file` option is useful for the lcao basis set, which is mostly used when [calculation](#calculation) is set to `set_wf` and `get_pchg`. See more details in [out_wfc_lcao](#out_wfc_lcao). + > Only the `file` option is useful for the lcao basis set, which is mostly used when [calculation](#calculation) is set to `get_wf` and `get_pchg`. See more details in [out_wfc_lcao](#out_wfc_lcao). - **Default**: atomic ### init_chg @@ -1921,7 +1921,7 @@ The band (KS orbital) energy for each (k-point, spin, band) will be printed in t ### if_separate_k - **Type**: Boolean -- **Availability**: Only for LCAO, used only when `calculation = get_pchg` and `gamma_only` is turned off. +- **Availability**: For both PW and LCAO. When `basis_type = pw`, used if `out_pchg` is set. When `basis_type = lcao`, used only when `calculation = get_pchg` and `gamma_only` is turned off. - **Description**: Specifies whether to write the partial charge densities for all k-points to individual files or merge them. **Warning**: Enabling symmetry may produce incorrect results due to incorrect k-point weights. Therefore, when calculating partial charge densities, it is strongly recommended to set `symmetry = -1`. - **Default**: false diff --git a/source/module_elecstate/module_charge/symmetry_rho.cpp b/source/module_elecstate/module_charge/symmetry_rho.cpp index 9bf0c6b3d5..ae5d942976 100644 --- a/source/module_elecstate/module_charge/symmetry_rho.cpp +++ b/source/module_elecstate/module_charge/symmetry_rho.cpp @@ -97,7 +97,7 @@ void Symmetry_rho::psymm(double* rho_part, rhotot.resize(rho_basis->nxyz); ModuleBase::GlobalFunc::ZEROS(rhotot.data(), rho_basis->nxyz); } - Pgrid.reduce(rhotot.data(), rho_part); + Pgrid.reduce(rhotot.data(), rho_part, false); // (2) if (GlobalV::MY_RANK == 0) diff --git a/source/module_esolver/esolver_ks_pw.cpp b/source/module_esolver/esolver_ks_pw.cpp index ea76287bba..5e5177d1a9 100644 --- a/source/module_esolver/esolver_ks_pw.cpp +++ b/source/module_esolver/esolver_ks_pw.cpp @@ -645,14 +645,24 @@ void ESolver_KS_PW::iter_finish(UnitCell& ucell, const int istep, int //---------------------------------------------------------- // 3) Print out electronic wavefunctions in pw basis //---------------------------------------------------------- - if (iter % PARAM.inp.out_freq_elec == 0 || iter == PARAM.inp.scf_nmax || conv_esolver) - { - ModuleIO::write_wfc_pw(GlobalV::KPAR, GlobalV::MY_POOL, GlobalV::MY_RANK, - PARAM.inp.nbands, PARAM.inp.nspin, PARAM.globalv.npol, - GlobalV::RANK_IN_POOL, GlobalV::NPROC_IN_POOL, - PARAM.inp.out_wfc_pw, PARAM.inp.ecutwfc, PARAM.globalv.global_out_dir, - this->psi[0], this->kv, this->pw_wfc, GlobalV::ofs_running); - } + if (iter % PARAM.inp.out_freq_elec == 0 || iter == PARAM.inp.scf_nmax || conv_esolver) + { + ModuleIO::write_wfc_pw(GlobalV::KPAR, + GlobalV::MY_POOL, + GlobalV::MY_RANK, + PARAM.inp.nbands, + PARAM.inp.nspin, + PARAM.globalv.npol, + GlobalV::RANK_IN_POOL, + GlobalV::NPROC_IN_POOL, + PARAM.inp.out_wfc_pw, + PARAM.inp.ecutwfc, + PARAM.globalv.global_out_dir, + this->psi[0], + this->kv, + this->pw_wfc, + GlobalV::ofs_running); + } //---------------------------------------------------------- // 4) check if oscillate for delta_spin method @@ -718,11 +728,6 @@ void ESolver_KS_PW::after_scf(UnitCell& ucell, const int istep, const this->pw_rhod->ny, this->pw_rhod->nz, this->pw_rhod->nxyz, - this->kv.get_nks(), - this->kv.isk, - this->kv.wk, - this->pw_big->bz, - this->pw_big->nbz, this->chr.ngmc, &ucell, this->psi, @@ -731,16 +736,29 @@ void ESolver_KS_PW::after_scf(UnitCell& ucell, const int istep, const this->ctx, this->Pgrid, PARAM.globalv.global_out_dir, - PARAM.inp.if_separate_k); + PARAM.inp.if_separate_k, + this->kv, + GlobalV::KPAR, + GlobalV::MY_POOL, + &this->chr); } - - // tmp 2025-05-17, mohan note - ModuleIO::write_wfc_pw(GlobalV::KPAR, GlobalV::MY_POOL, GlobalV::MY_RANK, - PARAM.inp.nbands, PARAM.inp.nspin, PARAM.globalv.npol, - GlobalV::RANK_IN_POOL, GlobalV::NPROC_IN_POOL, - PARAM.inp.out_wfc_pw, PARAM.inp.ecutwfc, PARAM.globalv.global_out_dir, - this->psi[0], this->kv, this->pw_wfc, GlobalV::ofs_running); + // tmp 2025-05-17, mohan note + ModuleIO::write_wfc_pw(GlobalV::KPAR, + GlobalV::MY_POOL, + GlobalV::MY_RANK, + PARAM.inp.nbands, + PARAM.inp.nspin, + PARAM.globalv.npol, + GlobalV::RANK_IN_POOL, + GlobalV::NPROC_IN_POOL, + PARAM.inp.out_wfc_pw, + PARAM.inp.ecutwfc, + PARAM.globalv.global_out_dir, + this->psi[0], + this->kv, + this->pw_wfc, + GlobalV::ofs_running); //------------------------------------------------------------------ //! 5) calculate Wannier functions in pw basis @@ -931,7 +949,7 @@ void ESolver_KS_PW::after_all_runners(UnitCell& ucell) //---------------------------------------------------------- //---------------------------------------------------------- - //! The write_psi_r_1 interface will be removed in the very + //! The write_psi_r_1 interface will be removed in the very //! near future. Don't use it! //---------------------------------------------------------- // if (PARAM.inp.out_wfc_r == 1) // Peize Lin add 2021.11.21 @@ -951,19 +969,15 @@ void ESolver_KS_PW::after_all_runners(UnitCell& ucell) this->pw_rhod->ny, this->pw_rhod->nz, this->pw_rhod->nxyz, - this->kv.get_nks(), - this->kv.isk, - this->kv.wk, - this->pw_big->bz, - this->pw_big->nbz, - this->chr.ngmc, &ucell, this->psi, - this->pw_rhod, this->pw_wfc, this->ctx, this->Pgrid, - PARAM.globalv.global_out_dir); + PARAM.globalv.global_out_dir, + this->kv, + GlobalV::KPAR, + GlobalV::MY_POOL); } //---------------------------------------------------------- diff --git a/source/module_hamilt_pw/hamilt_pwdft/parallel_grid.cpp b/source/module_hamilt_pw/hamilt_pwdft/parallel_grid.cpp index 9f65265978..6d0eac2c59 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/parallel_grid.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/parallel_grid.cpp @@ -1,61 +1,62 @@ #include "parallel_grid.h" + #include "module_base/parallel_global.h" #include "module_parameter/parameter.h" Parallel_Grid::Parallel_Grid() { this->allocate = false; - this->allocate_final_scf = false; //LiuXh add 20180619 + this->allocate_final_scf = false; // LiuXh add 20180619 } Parallel_Grid::~Parallel_Grid() { - if(this->allocate || this->allocate_final_scf) //LiuXh add 20180619 - { - for(int ip=0; ipallocate || this->allocate_final_scf) // LiuXh add 20180619 + { + for (int ip = 0; ip < GlobalV::KPAR; ip++) + { + delete[] numz[ip]; + delete[] startz[ip]; + delete[] whichpro[ip]; + delete[] whichpro_loc[ip]; + } + delete[] numz; + delete[] startz; + delete[] whichpro; + delete[] whichpro_loc; delete[] nproc_in_pool; - } + } } - -void Parallel_Grid::init( - const int &ncx_in, - const int &ncy_in, - const int &ncz_in, - const int &nczp_in, - const int &nrxx_in, - const int &nbz_in, - const int &bz_in) +void Parallel_Grid::init(const int& ncx_in, + const int& ncy_in, + const int& ncz_in, + const int& nczp_in, + const int& nrxx_in, + const int& nbz_in, + const int& bz_in) { - ModuleBase::TITLE("Parallel_Grid","init"); - - this->ncx = ncx_in; - this->ncy = ncy_in; - this->ncz = ncz_in; - this->nczp = nczp_in; - this->nrxx = nrxx_in; - this->nbz = nbz_in; + ModuleBase::TITLE("Parallel_Grid", "init"); + + this->ncx = ncx_in; + this->ncy = ncy_in; + this->ncz = ncz_in; + this->nczp = nczp_in; + this->nrxx = nrxx_in; + this->nbz = nbz_in; this->bz = bz_in; - if(nczp<0) - { - GlobalV::ofs_warning << " nczp = " << nczp << std::endl; - ModuleBase::WARNING_QUIT("Parallel_Grid::init","nczp<0"); - } + if (nczp < 0) + { + GlobalV::ofs_warning << " nczp = " << nczp << std::endl; + ModuleBase::WARNING_QUIT("Parallel_Grid::init", "nczp<0"); + } - assert(ncx > 0); - assert(ncy > 0); - assert(ncz > 0); + assert(ncx > 0); + assert(ncy > 0); + assert(ncz > 0); - this->ncxy = ncx * ncy; + this->ncxy = ncx * ncy; this->ncxyz = ncxy * ncz; #ifndef __MPI @@ -63,129 +64,144 @@ void Parallel_Grid::init( #endif // enable to call this function again liuyu 2023-03-10 - if(this->allocate) + if (this->allocate) { - for(int ip=0; ipallocate = false; } - // (2) - assert(allocate==false); - assert(GlobalV::KPAR > 0); + // (2) + assert(allocate == false); + assert(GlobalV::KPAR > 0); - this->nproc_in_pool = new int[GlobalV::KPAR]; - int nprocgroup; - if(PARAM.inp.esolver_type == "sdft") { nprocgroup = GlobalV::NPROC_IN_BNDGROUP; - } else { nprocgroup = GlobalV::NPROC; -} + this->nproc_in_pool = new int[GlobalV::KPAR]; + int nprocgroup; + if (PARAM.inp.esolver_type == "sdft") + { + nprocgroup = GlobalV::NPROC_IN_BNDGROUP; + } + else + { + nprocgroup = GlobalV::NPROC; + } - const int remain_pro = nprocgroup%GlobalV::KPAR; - for(int i=0; inproc_in_pool[i]++; -} - } - - this->numz = new int*[GlobalV::KPAR]; - this->startz = new int*[GlobalV::KPAR]; - this->whichpro = new int*[GlobalV::KPAR]; - - for(int ip=0; ipnumz[ip] = new int[nproc]; - this->startz[ip] = new int[nproc]; - this->whichpro[ip] = new int[this->ncz]; - ModuleBase::GlobalFunc::ZEROS(this->numz[ip], nproc); - ModuleBase::GlobalFunc::ZEROS(this->startz[ip], nproc); - ModuleBase::GlobalFunc::ZEROS(this->whichpro[ip], this->ncz); - } - - this->allocate = true; - this->z_distribution(); - - return; + const int remain_pro = nprocgroup % GlobalV::KPAR; + for (int i = 0; i < GlobalV::KPAR; i++) + { + nproc_in_pool[i] = nprocgroup / GlobalV::KPAR; + if (i < remain_pro) + { + this->nproc_in_pool[i]++; + } + } + + this->numz = new int*[GlobalV::KPAR]; + this->startz = new int*[GlobalV::KPAR]; + this->whichpro = new int*[GlobalV::KPAR]; + this->whichpro_loc = new int*[GlobalV::KPAR]; + + for (int ip = 0; ip < GlobalV::KPAR; ip++) + { + const int nproc = nproc_in_pool[ip]; + this->numz[ip] = new int[nproc]; + this->startz[ip] = new int[nproc]; + this->whichpro[ip] = new int[this->ncz]; + this->whichpro_loc[ip] = new int[this->ncz]; + ModuleBase::GlobalFunc::ZEROS(this->numz[ip], nproc); + ModuleBase::GlobalFunc::ZEROS(this->startz[ip], nproc); + ModuleBase::GlobalFunc::ZEROS(this->whichpro[ip], this->ncz); + ModuleBase::GlobalFunc::ZEROS(this->whichpro_loc[ip], this->ncz); + } + + this->allocate = true; + this->z_distribution(); + + return; } void Parallel_Grid::z_distribution() { - assert(allocate); - - int* startp = new int[GlobalV::KPAR]; - startp[0] = 0; - for(int ip=0; ipzpiece_to_stogroup(zpiece,iz,rho); - return; - } - assert(allocate); - //ModuleBase::TITLE("Parallel_Grid","zpiece_to_all"); - MPI_Status ierror; - - const int znow = iz - this->startz[GlobalV::MY_POOL][GlobalV::RANK_IN_POOL]; - const int proc = this->whichpro[GlobalV::MY_POOL][iz]; - - if(GlobalV::MY_POOL==0) - { - // case 1: the first part of rho in processor 0. - // and send zpeice to to other pools. - if(proc == 0 && GlobalV::MY_RANK ==0) - { - for(int ir=0; irwhichpro[ipool][iz], iz, MPI_COMM_WORLD); - } - } - - // case 2: processor n (n!=0) receive rho from processor 0. - // and the receive tag is iz. - else if(proc == GlobalV::RANK_IN_POOL ) - { - MPI_Recv(zpiece, ncxy, MPI_DOUBLE, 0, iz, MPI_COMM_WORLD,&ierror); - for(int ir=0; ir first part rho: processor 0 send the rho - // to all pools. The tag is iz, because processor may - // send more than once, and the only tag to distinguish - // them is iz. - else if(GlobalV::RANK_IN_POOL==0) - { - for(int ipool=0; ipool < GlobalV::KPAR; ipool++) - { - MPI_Send(zpiece, ncxy, MPI_DOUBLE, this->whichpro[ipool][iz], iz, MPI_COMM_WORLD); - } - } - }// GlobalV::MY_POOL == 0 - else - { - //GlobalV::ofs_running << "\n Receive charge density iz=" << iz << std::endl; - // the processors in other pools always receive rho from - // processor 0. the tag is 'iz' - if(proc == GlobalV::MY_RANK ) - { - MPI_Recv(zpiece, ncxy, MPI_DOUBLE, 0, iz, MPI_COMM_WORLD,&ierror); - for(int ir=0; irzpiece_to_stogroup(zpiece, iz, rho); + return; + } + assert(allocate); + // ModuleBase::TITLE("Parallel_Grid","zpiece_to_all"); + MPI_Status ierror; + + const int znow = iz - this->startz[GlobalV::MY_POOL][GlobalV::RANK_IN_POOL]; + const int proc = this->whichpro[GlobalV::MY_POOL][iz]; + + if (GlobalV::MY_POOL == 0) + { + // case 1: the first part of rho in processor 0. + // and send zpeice to to other pools. + if (proc == 0 && GlobalV::MY_RANK == 0) + { + for (int ir = 0; ir < ncxy; ir++) + { + rho[ir * nczp + znow] = zpiece[ir]; + } + for (int ipool = 1; ipool < GlobalV::KPAR; ipool++) + { + MPI_Send(zpiece, ncxy, MPI_DOUBLE, this->whichpro[ipool][iz], iz, MPI_COMM_WORLD); + } + } + + // case 2: processor n (n!=0) receive rho from processor 0. + // and the receive tag is iz. + else if (proc == GlobalV::RANK_IN_POOL) + { + MPI_Recv(zpiece, ncxy, MPI_DOUBLE, 0, iz, MPI_COMM_WORLD, &ierror); + for (int ir = 0; ir < ncxy; ir++) + { + rho[ir * nczp + znow] = zpiece[ir]; + } + } + + // case 2: > first part rho: processor 0 send the rho + // to all pools. The tag is iz, because processor may + // send more than once, and the only tag to distinguish + // them is iz. + else if (GlobalV::RANK_IN_POOL == 0) + { + for (int ipool = 0; ipool < GlobalV::KPAR; ipool++) + { + MPI_Send(zpiece, ncxy, MPI_DOUBLE, this->whichpro[ipool][iz], iz, MPI_COMM_WORLD); + } + } + } // GlobalV::MY_POOL == 0 + else + { + // GlobalV::ofs_running << "\n Receive charge density iz=" << iz << std::endl; + // the processors in other pools always receive rho from + // processor 0. the tag is 'iz' + if (proc == GlobalV::MY_RANK) + { + MPI_Recv(zpiece, ncxy, MPI_DOUBLE, 0, iz, MPI_COMM_WORLD, &ierror); + for (int ir = 0; ir < ncxy; ir++) + { + rho[ir * nczp + znow] = zpiece[ir]; + } + } + } + + // GlobalV::ofs_running << "\n iz = " << iz << " Done."; + return; } #endif #ifdef __MPI -void Parallel_Grid::zpiece_to_stogroup(double *zpiece, const int &iz, double *rho) const +void Parallel_Grid::zpiece_to_stogroup(double* zpiece, const int& iz, double* rho) const { - assert(allocate); - //TITLE("Parallel_Grid","zpiece_to_all"); - MPI_Status ierror; - - const int znow = iz - this->startz[GlobalV::MY_POOL][GlobalV::RANK_IN_POOL]; - const int proc = this->whichpro[GlobalV::MY_POOL][iz]; - - if(GlobalV::MY_POOL==0) - { - // case 1: the first part of rho in processor 0. - // and send zpeice to to other pools. - if(proc == 0 && GlobalV::RANK_IN_BPGROUP ==0) - { - for(int ir=0; irwhichpro[ipool][iz], iz, INT_BGROUP); - } - } - - // case 2: processor n (n!=0) receive rho from processor 0. - // and the receive tag is iz. - else if(proc == GlobalV::RANK_IN_POOL ) - { - MPI_Recv(zpiece, ncxy, MPI_DOUBLE, 0, iz, INT_BGROUP,&ierror); - for(int ir=0; ir first part rho: processor 0 send the rho - // to all pools. The tag is iz, because processor may - // send more than once, and the only tag to distinguish - // them is iz. - else if(GlobalV::RANK_IN_POOL==0) - { - for(int ipool=0; ipool < GlobalV::KPAR; ipool++) - { - MPI_Send(zpiece, ncxy, MPI_DOUBLE, this->whichpro[ipool][iz], iz, INT_BGROUP); - } - } - }// MY_POOL == 0 - else - { - //ofs_running << "\n Receive charge density iz=" << iz << endl; - // the processors in other pools always receive rho from - // processor 0. the tag is 'iz' - if(proc == GlobalV::RANK_IN_BPGROUP ) - { - MPI_Recv(zpiece, ncxy, MPI_DOUBLE, 0, iz, INT_BGROUP,&ierror); - for(int ir=0; irstartz[GlobalV::MY_POOL][GlobalV::RANK_IN_POOL]; + const int proc = this->whichpro[GlobalV::MY_POOL][iz]; + + if (GlobalV::MY_POOL == 0) + { + // case 1: the first part of rho in processor 0. + // and send zpeice to to other pools. + if (proc == 0 && GlobalV::RANK_IN_BPGROUP == 0) + { + for (int ir = 0; ir < ncxy; ir++) + { + rho[ir * nczp + znow] = zpiece[ir]; + } + for (int ipool = 1; ipool < GlobalV::KPAR; ipool++) + { + MPI_Send(zpiece, ncxy, MPI_DOUBLE, this->whichpro[ipool][iz], iz, INT_BGROUP); + } + } + + // case 2: processor n (n!=0) receive rho from processor 0. + // and the receive tag is iz. + else if (proc == GlobalV::RANK_IN_POOL) + { + MPI_Recv(zpiece, ncxy, MPI_DOUBLE, 0, iz, INT_BGROUP, &ierror); + for (int ir = 0; ir < ncxy; ir++) + { + rho[ir * nczp + znow] = zpiece[ir]; + } + } + + // case 2: > first part rho: processor 0 send the rho + // to all pools. The tag is iz, because processor may + // send more than once, and the only tag to distinguish + // them is iz. + else if (GlobalV::RANK_IN_POOL == 0) + { + for (int ipool = 0; ipool < GlobalV::KPAR; ipool++) + { + MPI_Send(zpiece, ncxy, MPI_DOUBLE, this->whichpro[ipool][iz], iz, INT_BGROUP); + } + } + } // MY_POOL == 0 + else + { + // ofs_running << "\n Receive charge density iz=" << iz << endl; + // the processors in other pools always receive rho from + // processor 0. the tag is 'iz' + if (proc == GlobalV::RANK_IN_BPGROUP) + { + MPI_Recv(zpiece, ncxy, MPI_DOUBLE, 0, iz, INT_BGROUP, &ierror); + for (int ir = 0; ir < ncxy; ir++) + { + rho[ir * nczp + znow] = zpiece[ir]; + } + } + } + // ofs_running << "\n iz = " << iz << " Done."; + return; } -void Parallel_Grid::reduce(double* rhotot, const double* const rhoin)const +void Parallel_Grid::reduce(double* rhotot, const double* const rhoin, const bool reduce_all_pool) const { - //ModuleBase::TITLE("Parallel_Grid","reduce"); - - // if not the first pool, wait here until processpr 0 - // send the Barrier command. - if(GlobalV::MY_POOL!=0) - { - return; - } - - double* zpiece = new double[this->ncxy]; - - for(int iz=0; izncz; iz++) - { - const int znow = iz - this->startz[GlobalV::MY_POOL][GlobalV::RANK_IN_POOL]; - const int proc = this->whichpro[GlobalV::MY_POOL][iz]; - ModuleBase::GlobalFunc::ZEROS(zpiece, this->ncxy); - int tag = iz; - MPI_Status ierror; - - // case 1: the first part of rho in processor 0. - if(proc == 0 && GlobalV::RANK_IN_POOL ==0) - { - for(int ir=0; irnczp + znow]; - } - } - - // case 2: > first part rho: send the rho to - // processor 0. - else if(proc == GlobalV::RANK_IN_POOL ) - { - for(int ir=0; irnczp + znow]; - } - MPI_Send(zpiece, ncxy, MPI_DOUBLE, 0, tag, POOL_WORLD); - } - - // case 2: > first part rho: processor 0 receive the rho - // from other processors - else if(GlobalV::RANK_IN_POOL==0) - { - MPI_Recv(zpiece, ncxy, MPI_DOUBLE, proc, tag, POOL_WORLD, &ierror); - } - - if(GlobalV::MY_RANK==0) - { - for (int ixy = 0; ixy < this->ncxy;++ixy) + // ModuleBase::TITLE("Parallel_Grid","reduce"); + + // if not the first pool, wait here until processpr 0 + // send the Barrier command. + if (!reduce_all_pool && GlobalV::MY_POOL != 0) + { + return; + } + + double* zpiece = new double[this->ncxy]; + + for (int iz = 0; iz < this->ncz; iz++) + { + const int znow = iz - this->startz[GlobalV::MY_POOL][GlobalV::RANK_IN_POOL]; + const int proc = this->whichpro[GlobalV::MY_POOL][iz]; + const int proc_loc = this->whichpro_loc[GlobalV::MY_POOL][iz]; // Obtain the local processor index in the pool + ModuleBase::GlobalFunc::ZEROS(zpiece, this->ncxy); + int tag = iz; + MPI_Status ierror; + + // Local processor 0 collects data from all other processors in the pool + // proc = proc_loc if GlobalV::MY_POOL == 0 + if (proc_loc == GlobalV::RANK_IN_POOL) + { + for (int ir = 0; ir < ncxy; ir++) + { + zpiece[ir] = rhoin[ir * this->nczp + znow]; + } + // Send data to the root of the pool + if (GlobalV::RANK_IN_POOL != 0) + { + MPI_Send(zpiece, ncxy, MPI_DOUBLE, 0, tag, POOL_WORLD); + } + } + + // The root of the pool receives data from other processors + if (GlobalV::RANK_IN_POOL == 0 && proc_loc != GlobalV::RANK_IN_POOL) + { + MPI_Recv(zpiece, ncxy, MPI_DOUBLE, proc_loc, tag, POOL_WORLD, &ierror); + } + + if (GlobalV::RANK_IN_POOL == 0) + { + for (int ixy = 0; ixy < this->ncxy; ++ixy) { rhotot[ixy * ncz + iz] = zpiece[ixy]; - } - } - } + } + } + } - delete[] zpiece; + delete[] zpiece; - return; + return; } #endif -void Parallel_Grid::init_final_scf(const int &ncx_in, const int &ncy_in, const int &ncz_in, const int &nczp_in, -const int &nrxx_in, const int &nbz_in, const int &bz_in) +void Parallel_Grid::init_final_scf(const int& ncx_in, + const int& ncy_in, + const int& ncz_in, + const int& nczp_in, + const int& nrxx_in, + const int& nbz_in, + const int& bz_in) { - ModuleBase::TITLE("Parallel_Grid","init"); - - this->ncx = ncx_in; - this->ncy = ncy_in; - this->ncz = ncz_in; - this->nczp = nczp_in; - this->nrxx = nrxx_in; - this->nbz = nbz_in; + ModuleBase::TITLE("Parallel_Grid", "init"); + + this->ncx = ncx_in; + this->ncy = ncy_in; + this->ncz = ncz_in; + this->nczp = nczp_in; + this->nrxx = nrxx_in; + this->nbz = nbz_in; this->bz = bz_in; - if(nczp<0) - { - GlobalV::ofs_warning << " nczp = " << nczp << std::endl; - ModuleBase::WARNING_QUIT("Parallel_Grid::init","nczp<0"); - } + if (nczp < 0) + { + GlobalV::ofs_warning << " nczp = " << nczp << std::endl; + ModuleBase::WARNING_QUIT("Parallel_Grid::init", "nczp<0"); + } - assert(ncx > 0); - assert(ncy > 0); - assert(ncz > 0); + assert(ncx > 0); + assert(ncy > 0); + assert(ncz > 0); - this->ncxy = ncx * ncy; + this->ncxy = ncx * ncy; this->ncxyz = ncxy * ncz; #ifndef __MPI return; #endif - // (2) - assert(allocate_final_scf==false); - assert(GlobalV::KPAR > 0); + // (2) + assert(allocate_final_scf == false); + assert(GlobalV::KPAR > 0); - this->nproc_in_pool = new int[GlobalV::KPAR]; - const int remain_pro = GlobalV::NPROC%GlobalV::KPAR; - for(int i=0; inproc_in_pool[i]++; -} - } - - this->numz = new int*[GlobalV::KPAR]; - this->startz = new int*[GlobalV::KPAR]; - this->whichpro = new int*[GlobalV::KPAR]; - - for(int ip=0; ipnumz[ip] = new int[nproc]; - this->startz[ip] = new int[nproc]; - this->whichpro[ip] = new int[this->ncz]; - ModuleBase::GlobalFunc::ZEROS(this->numz[ip], nproc); - ModuleBase::GlobalFunc::ZEROS(this->startz[ip], nproc); - ModuleBase::GlobalFunc::ZEROS(this->whichpro[ip], this->ncz); - } - - this->allocate_final_scf = true; - this->z_distribution(); - - return; + this->nproc_in_pool = new int[GlobalV::KPAR]; + const int remain_pro = GlobalV::NPROC % GlobalV::KPAR; + for (int i = 0; i < GlobalV::KPAR; i++) + { + nproc_in_pool[i] = GlobalV::NPROC / GlobalV::KPAR; + if (i < remain_pro) + { + this->nproc_in_pool[i]++; + } + } + + this->numz = new int*[GlobalV::KPAR]; + this->startz = new int*[GlobalV::KPAR]; + this->whichpro = new int*[GlobalV::KPAR]; + + for (int ip = 0; ip < GlobalV::KPAR; ip++) + { + const int nproc = nproc_in_pool[ip]; + this->numz[ip] = new int[nproc]; + this->startz[ip] = new int[nproc]; + this->whichpro[ip] = new int[this->ncz]; + ModuleBase::GlobalFunc::ZEROS(this->numz[ip], nproc); + ModuleBase::GlobalFunc::ZEROS(this->startz[ip], nproc); + ModuleBase::GlobalFunc::ZEROS(this->whichpro[ip], this->ncz); + } + + this->allocate_final_scf = true; + this->z_distribution(); + + return; } diff --git a/source/module_hamilt_pw/hamilt_pwdft/parallel_grid.h b/source/module_hamilt_pw/hamilt_pwdft/parallel_grid.h index 1da46fa394..748a612cd0 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/parallel_grid.h +++ b/source/module_hamilt_pw/hamilt_pwdft/parallel_grid.h @@ -31,9 +31,9 @@ class Parallel_Grid void zpiece_to_stogroup(double* zpiece, const int& iz, double* rho) const; //qainrui add for sto-dft 2021-7-21 /// @brief Broadcast data from root to all processors. The index order is [x][y][z]. - void bcast(const double* const data_global, double* data_local, const int& rank)const; + void bcast(const double* const data_global, double* data_local, const int& rank) const; /// @brief Reduce data from all processors to root. The index order is [x][y][z]. - void reduce(double* rhotot, const double* constrhoin)const; + void reduce(double* rhotot, const double* constrhoin, const bool reduce_all_pool) const; #endif const int& nx = this->ncx; @@ -48,6 +48,7 @@ class Parallel_Grid int **numz = nullptr; int **startz = nullptr; int **whichpro = nullptr; + int **whichpro_loc = nullptr; int ncx=0; int ncy=0; diff --git a/source/module_io/cal_ldos.cpp b/source/module_io/cal_ldos.cpp index 2b6d112d02..49b58f120a 100644 --- a/source/module_io/cal_ldos.cpp +++ b/source/module_io/cal_ldos.cpp @@ -323,7 +323,7 @@ void trilinear_interpolate(const std::vector>& points, #ifdef __MPI if (GlobalV::MY_POOL == 0 && GlobalV::MY_BNDGROUP == 0) { - pgrid.reduce(data_full.data(), data.data()); + pgrid.reduce(data_full.data(), data.data(), false); } MPI_Barrier(MPI_COMM_WORLD); #else diff --git a/source/module_io/cube_io.h b/source/module_io/cube_io.h index d7e6b706a6..a5725438f9 100644 --- a/source/module_io/cube_io.h +++ b/source/module_io/cube_io.h @@ -1,105 +1,105 @@ #ifndef CUBE_IO_H #define CUBE_IO_H -#include #include "module_cell/unitcell.h" + +#include class Parallel_Grid; namespace ModuleIO { - /// read volumetric data from .cube file into the parallel distributed grid. - bool read_vdata_palgrid( - const Parallel_Grid& pgrid, - const int my_rank, - std::ofstream& ofs_running, - const std::string& fn, - double* const data, - const int nat); +/// read volumetric data from .cube file into the parallel distributed grid. +bool read_vdata_palgrid(const Parallel_Grid& pgrid, + const int my_rank, + std::ofstream& ofs_running, + const std::string& fn, + double* const data, + const int nat); - /// write volumetric data on the parallized grid into a .cube file - void write_vdata_palgrid( - const Parallel_Grid& pgrid, - const double* const data, - const int is, - const int nspin, - const int iter, - const std::string& fn, - const double ef, - const UnitCell*const ucell, - const int precision = 11, - const int out_fermi = 1); // mohan add 2007-10-17 +/// write volumetric data on the parallized grid into a .cube file +void write_vdata_palgrid(const Parallel_Grid& pgrid, + const double* const data, + const int is, + const int nspin, + const int iter, + const std::string& fn, + const double ef, + const UnitCell* const ucell, + const int precision = 11, + const int out_fermi = 1, + const bool reduce_all_pool = false); // only reduce in the main pool as default - /// read the full data from a cube file - bool read_cube(const std::string& file, - std::vector& comment, - int& natom, - std::vector& origin, - int& nx, - int& ny, - int& nz, - std::vector& dx, - std::vector& dy, - std::vector& dz, - std::vector& atom_type, - std::vector& atom_charge, - std::vector>& atom_pos, - std::vector& data); +/// read the full data from a cube file +bool read_cube(const std::string& file, + std::vector& comment, + int& natom, + std::vector& origin, + int& nx, + int& ny, + int& nz, + std::vector& dx, + std::vector& dy, + std::vector& dz, + std::vector& atom_type, + std::vector& atom_charge, + std::vector>& atom_pos, + std::vector& data); - /// write a cube file - void write_cube(const std::string& file, - const std::vector& comment, - const int& natom, - const std::vector& origin, - const int& nx, - const int& ny, - const int& nz, - const std::vector& dx, - const std::vector& dy, - const std::vector& dz, - const std::vector& atom_type, - const std::vector& atom_charge, - const std::vector>& atom_pos, - const std::vector& data, - const int precision, - const int ndata_line = 6); +/// write a cube file +void write_cube(const std::string& file, + const std::vector& comment, + const int& natom, + const std::vector& origin, + const int& nx, + const int& ny, + const int& nz, + const std::vector& dx, + const std::vector& dy, + const std::vector& dz, + const std::vector& atom_type, + const std::vector& atom_charge, + const std::vector>& atom_pos, + const std::vector& data, + const int precision, + const int ndata_line = 6); - /** - * @brief The trilinear interpolation method - * - * Trilinear interpolation is a method for interpolating grid data in 3D space. - * It estimates the value at a given position by interpolating the data along the three adjacent points. - * - * Specifically, for 3D grid data, trilinear interpolation requires determining the eight data points that are - * closest to the point where the estimation is required. These data points form a cube, with vertices at - * (x0,y0,z0), (x0+1,y0,z0), (x0,y0+1,z0), (x0+1,y0+1,z0), (x0,y0,z0+1), (x0+1,y0,z0+1), (x0,y0+1,z0+1) and - * (x0+1,y0+1,z0+1). Here, (x0,y0,z0) is the data point closest to the estimation point and has coordinate - * values less than those of the estimation point. - * - * For the estimation location (x,y,z), its estimated value in the grid can be calculated using the following - * formula: f(x,y,z) = f000(1-dx)(1-dy)(1-dz) + f100dx(1-dy)(1-dz) + f010(1-dx)dy(1-dz) + - * f001(1-dx)(1-dy)dz + f101dx(1-dy)dz + f011(1-dx)dydz + - * f110dxdy(1-dz) + f111dxdydz - * where fijk represents the data value at vertex i,j,k of the cube, and dx = x - x0, dy = y - y0, dz = z - z0 - * represent the distance between the estimation point and the closest data point in each of the three - * directions, divided by the grid spacing. Here, it is assumed that the grid spacing is equal and can be - * omitted during computation. - * - * @param data_in the input data of size nxyz_read - * @param nx_read nx read from file - * @param ny_read ny read from file - * @param nz_read nz read from file - * @param nx the dimension of grids along x - * @param ny the dimension of grids along y - * @param nz the dimension of grids along z - * @param data_out the interpolated results of size nxyz - */ +/** + * @brief The trilinear interpolation method + * + * Trilinear interpolation is a method for interpolating grid data in 3D space. + * It estimates the value at a given position by interpolating the data along the three adjacent points. + * + * Specifically, for 3D grid data, trilinear interpolation requires determining the eight data points that are + * closest to the point where the estimation is required. These data points form a cube, with vertices at + * (x0,y0,z0), (x0+1,y0,z0), (x0,y0+1,z0), (x0+1,y0+1,z0), (x0,y0,z0+1), (x0+1,y0,z0+1), (x0,y0+1,z0+1) and + * (x0+1,y0+1,z0+1). Here, (x0,y0,z0) is the data point closest to the estimation point and has coordinate + * values less than those of the estimation point. + * + * For the estimation location (x,y,z), its estimated value in the grid can be calculated using the following + * formula: f(x,y,z) = f000(1-dx)(1-dy)(1-dz) + f100dx(1-dy)(1-dz) + f010(1-dx)dy(1-dz) + + * f001(1-dx)(1-dy)dz + f101dx(1-dy)dz + f011(1-dx)dydz + + * f110dxdy(1-dz) + f111dxdydz + * where fijk represents the data value at vertex i,j,k of the cube, and dx = x - x0, dy = y - y0, dz = z - z0 + * represent the distance between the estimation point and the closest data point in each of the three + * directions, divided by the grid spacing. Here, it is assumed that the grid spacing is equal and can be + * omitted during computation. + * + * @param data_in the input data of size nxyz_read + * @param nx_read nx read from file + * @param ny_read ny read from file + * @param nz_read nz read from file + * @param nx the dimension of grids along x + * @param ny the dimension of grids along y + * @param nz the dimension of grids along z + * @param data_out the interpolated results of size nxyz + */ void trilinear_interpolate(const double* const data_in, - const int& nx_read, - const int& ny_read, - const int& nz_read, - const int& nx, - const int& ny, - const int& nz, - double* data_out); -} + const int& nx_read, + const int& ny_read, + const int& nz_read, + const int& nx, + const int& ny, + const int& nz, + double* data_out); +} // namespace ModuleIO #endif diff --git a/source/module_io/get_pchg_pw.h b/source/module_io/get_pchg_pw.h index 98dfb72e9c..4e601b0d6e 100644 --- a/source/module_io/get_pchg_pw.h +++ b/source/module_io/get_pchg_pw.h @@ -8,6 +8,7 @@ #include "module_basis/module_pw/pw_basis_k.h" #include "module_cell/unitcell.h" #include "module_elecstate/elecstate.h" +#include "module_elecstate/module_charge/charge.h" #include "module_elecstate/module_charge/symmetry_rho.h" #include "module_hamilt_pw/hamilt_pwdft/parallel_grid.h" #include "module_psi/psi.h" @@ -25,11 +26,6 @@ void get_pchg_pw(const std::vector& out_pchg, const int ny, const int nz, const int nxyz, - const int nks, - const std::vector& isk, - const std::vector& wk, - const int pw_big_bz, - const int pw_big_nbz, const int ngmc, UnitCell* ucell, const psi::Psi>* psi, @@ -38,168 +34,188 @@ void get_pchg_pw(const std::vector& out_pchg, const Device* ctx, const Parallel_Grid& pgrid, const std::string& global_out_dir, - const bool if_separate_k) + const bool if_separate_k, + const K_Vectors& kv, + const int kpar, + const int my_pool, + const Charge* chg) // Charge class is needed for the charge density reduce { - // bands_picked is a vector of 0s and 1s, where 1 means the band is picked to output - std::vector bands_picked(nbands, 0); + // Get necessary parameters from kv + const int nks = kv.get_nks(); // current process pool k-point count + const int nkstot = kv.get_nkstot(); // total k-point count - // Check if length of out_pchg is valid - if (static_cast(out_pchg.size()) > nbands) + // Loop over k-parallelism + for (int ip = 0; ip < kpar; ++ip) { - ModuleBase::WARNING_QUIT("ModuleIO::get_pchg_pw", - "The number of bands specified by `out_pchg` in the " - "INPUT file exceeds `nbands`!"); - } - - // Check if all elements in bands_picked are 0 or 1 - for (int value: out_pchg) - { - if (value != 0 && value != 1) + if (my_pool != ip) { - ModuleBase::WARNING_QUIT("ModuleIO::get_pchg_pw", - "The elements of `out_pchg` must be either 0 or 1. " - "Invalid values found!"); + continue; } - } - - // Fill bands_picked with values from out_pchg - // Remaining bands are already set to 0 - int length = std::min(static_cast(out_pchg.size()), nbands); - for (int i = 0; i < length; ++i) - { - // out_pchg rely on function parse_expression - bands_picked[i] = static_cast(out_pchg[i]); - } - std::vector> wfcr(nxyz); - std::vector> rho_band(nspin, std::vector(nxyz)); + // bands_picked is a vector of 0s and 1s, where 1 means the band is picked to output + std::vector bands_picked(nbands, 0); - for (int ib = 0; ib < nbands; ++ib) - { - // Skip the loop iteration if bands_picked[ib] is 0 - if (!bands_picked[ib]) + // Check if length of out_pchg is valid + if (static_cast(out_pchg.size()) > nbands) { - continue; + ModuleBase::WARNING_QUIT("ModuleIO::get_pchg_pw", + "The number of bands specified by `out_pchg` in the " + "INPUT file exceeds `nbands`!"); } - for (int is = 0; is < nspin; ++is) + // Check if all elements in bands_picked are 0 or 1 + for (int value: out_pchg) { - std::fill(rho_band[is].begin(), rho_band[is].end(), 0.0); + if (value != 0 && value != 1) + { + ModuleBase::WARNING_QUIT("ModuleIO::get_pchg_pw", + "The elements of `out_pchg` must be either 0 or 1. " + "Invalid values found!"); + } } - if (if_separate_k) + // Fill bands_picked with values from out_pchg + // Remaining bands are already set to 0 + int length = std::min(static_cast(out_pchg.size()), nbands); + for (int i = 0; i < length; ++i) { - for (int ik = 0; ik < nks; ++ik) - { - const int spin_index = isk[ik]; - std::cout << " Calculating band-decomposed charge density for band " << ib + 1 << ", k-point " - << ik % (nks / nspin) + 1 << ", spin " << spin_index + 1 << std::endl; + // out_pchg rely on function parse_expression + bands_picked[i] = static_cast(out_pchg[i]); + } - psi->fix_k(ik); - pw_wfc->recip_to_real(ctx, &psi[0](ib, 0), wfcr.data(), ik); + std::vector> wfcr(nxyz); + std::vector> rho_band(nspin, std::vector(nxyz)); - // To ensure the normalization of charge density in multi-k calculation (if if_separate_k is true) - double wg_sum_k = 0; - for (int ik_tmp = 0; ik_tmp < nks / nspin; ++ik_tmp) - { - wg_sum_k += wk[ik_tmp]; - } + for (int ib = 0; ib < nbands; ++ib) + { + // Skip the loop iteration if bands_picked[ib] is 0 + if (!bands_picked[ib]) + { + continue; + } - double w1 = static_cast(wg_sum_k / ucell->omega); + for (int is = 0; is < nspin; ++is) + { + std::fill(rho_band[is].begin(), rho_band[is].end(), 0.0); + } - for (int i = 0; i < nxyz; ++i) + if (if_separate_k) + { + for (int ik = 0; ik < nks; ++ik) { - rho_band[spin_index][i] = std::norm(wfcr[i]) * w1; + const int ikstot = kv.ik2iktot[ik]; // global k-point index + const int spin_index = kv.isk[ik]; // spin index + const int k_number = ikstot % (nkstot / nspin) + 1; // k-point number, starting from 1 + + psi->fix_k(ik); + pw_wfc->recip_to_real(ctx, &psi[0](ib, 0), wfcr.data(), ik); + + // To ensure the normalization of charge density in multi-k calculation (if if_separate_k is true) + double wg_sum_k = 0.0; + if (nspin == 1) + { + wg_sum_k = 2.0; + } + else if (nspin == 2) + { + wg_sum_k = 1.0; + } + else + { + ModuleBase::WARNING_QUIT("ModuleIO::get_pchg_pw", + "Real space partial charge output currently do not support " + "noncollinear polarized calculation (nspin = 4)!"); + } + + double w1 = static_cast(wg_sum_k / ucell->omega); + + for (int i = 0; i < nxyz; ++i) + { + rho_band[spin_index][i] = std::norm(wfcr[i]) * w1; + } + + std::stringstream ssc; + ssc << global_out_dir << "BAND" << ib + 1 << "_K" << k_number << "_SPIN" << spin_index + 1 + << "_CHG.cube"; + + ModuleIO::write_vdata_palgrid(pgrid, + rho_band[spin_index].data(), + spin_index, + nspin, + 0, + ssc.str(), + 0.0, + ucell, + 11, + 1, + true); // reduce_all_pool is true } - - std::cout << " Writing cube files..."; - - std::stringstream ssc; - ssc << global_out_dir << "BAND" << ib + 1 << "_K" << ik % (nks / nspin) + 1 << "_SPIN" << spin_index + 1 - << "_CHG.cube"; - - ModuleIO::write_vdata_palgrid(pgrid, - rho_band[spin_index].data(), - spin_index, - nspin, - 0, - ssc.str(), - 0.0, - ucell); - - std::cout << " Complete!" << std::endl; } - } - else - { - for (int ik = 0; ik < nks; ++ik) + else { - const int spin_index = isk[ik]; - std::cout << " Calculating band-decomposed charge density for band " << ib + 1 << ", k-point " - << ik % (nks / nspin) + 1 << ", spin " << spin_index + 1 << std::endl; + for (int ik = 0; ik < nks; ++ik) + { + const int ikstot = kv.ik2iktot[ik]; // global k-point index + const int spin_index = kv.isk[ik]; // spin index + const int k_number = ikstot % (nkstot / nspin) + 1; // k-point number, starting from 1 - psi->fix_k(ik); - pw_wfc->recip_to_real(ctx, &psi[0](ib, 0), wfcr.data(), ik); + psi->fix_k(ik); + pw_wfc->recip_to_real(ctx, &psi[0](ib, 0), wfcr.data(), ik); - double w1 = static_cast(wk[ik] / ucell->omega); + double w1 = static_cast(kv.wk[ik] / ucell->omega); - for (int i = 0; i < nxyz; ++i) - { - rho_band[spin_index][i] += std::norm(wfcr[i]) * w1; + for (int i = 0; i < nxyz; ++i) + { + rho_band[spin_index][i] += std::norm(wfcr[i]) * w1; + } } - } - // Symmetrize the charge density, otherwise the results are incorrect if the symmetry is on - std::cout << " Symmetrizing band-decomposed charge density..." << std::endl; - Symmetry_rho srho; - for (int is = 0; is < nspin; ++is) - { - // Use vector instead of raw pointers - std::vector rho_save_pointers(nspin); - for (int s = 0; s < nspin; ++s) +#ifdef __MPI + // Reduce the charge density across all pools if kpar > 1 + if (kpar > 1 && chg != nullptr) { - rho_save_pointers[s] = rho_band[s].data(); + for (int is = 0; is < nspin; ++is) + { + chg->reduce_diff_pools(rho_band[is].data()); + } } +#endif - std::vector>> rhog(nspin, std::vector>(ngmc)); - - // Convert vector of vectors to vector of pointers - std::vector*> rhog_pointers(nspin); - for (int s = 0; s < nspin; ++s) + // Symmetrize the charge density, otherwise the results are incorrect if the symmetry is on + std::cout << " Symmetrizing band-decomposed charge density..." << std::endl; + Symmetry_rho srho; + for (int is = 0; is < nspin; ++is) { - rhog_pointers[s] = rhog[s].data(); + // Use vector instead of raw pointers + std::vector rho_save_pointers(nspin); + for (int s = 0; s < nspin; ++s) + { + rho_save_pointers[s] = rho_band[s].data(); + } + + std::vector>> rhog(nspin, std::vector>(ngmc)); + + // Convert vector of vectors to vector of pointers + std::vector*> rhog_pointers(nspin); + for (int s = 0; s < nspin; ++s) + { + rhog_pointers[s] = rhog[s].data(); + } + + srho.begin(is, rho_save_pointers.data(), rhog_pointers.data(), ngmc, nullptr, pw_rhod, ucell->symm); } - srho.begin(is, - rho_save_pointers.data(), - rhog_pointers.data(), - ngmc, - nullptr, - pw_rhod, - ucell->symm); - } - - std::cout << " Writing cube files..."; - - for (int is = 0; is < nspin; ++is) - { - std::stringstream ssc; - ssc << global_out_dir << "BAND" << ib + 1 << "_SPIN" << is + 1 << "_CHG.cube"; - - ModuleIO::write_vdata_palgrid(pgrid, - rho_band[is].data(), - is, - nspin, - 0, - ssc.str(), - 0.0, - ucell); - } + for (int is = 0; is < nspin; ++is) + { + std::stringstream ssc; + ssc << global_out_dir << "BAND" << ib + 1 << "_SPIN" << is + 1 << "_CHG.cube"; - std::cout << " Complete!" << std::endl; - } - } -} + ModuleIO::write_vdata_palgrid(pgrid, rho_band[is].data(), is, nspin, 0, ssc.str(), 0.0, ucell); + } + } // else if_separate_k is false + } // end of ib loop over nbands + } // end of ip loop over kpar +} // get_pchg_pw } // namespace ModuleIO #endif // GET_PCHG_PW_H diff --git a/source/module_io/get_wf_pw.h b/source/module_io/get_wf_pw.h index 29b831f786..edeb78fccf 100644 --- a/source/module_io/get_wf_pw.h +++ b/source/module_io/get_wf_pw.h @@ -26,193 +26,220 @@ void get_wf_pw(const std::vector& out_wfc_norm, const int ny, const int nz, const int nxyz, - const int nks, - const std::vector& isk, - const std::vector& wk, - const int pw_big_bz, - const int pw_big_nbz, - const int ngmc, UnitCell* ucell, const psi::Psi>* psi, - const ModulePW::PW_Basis* pw_rhod, const ModulePW::PW_Basis_K* pw_wfc, const Device* ctx, const Parallel_Grid& pgrid, - const std::string& global_out_dir) + const std::string& global_out_dir, + const K_Vectors& kv, + const int kpar, + const int my_pool) { - // bands_picked is a vector of 0s and 1s, where 1 means the band is picked to output - std::vector bands_picked_norm(nbands, 0); - std::vector bands_picked_re_im(nbands, 0); + // Get necessary parameters from kv + const int nks = kv.get_nks(); // current process pool k-point count + const int nkstot = kv.get_nkstot(); // total k-point count - // Check if length of out_wfc_norm and out_wfc_re_im is valid - if (static_cast(out_wfc_norm.size()) > nbands || static_cast(out_wfc_re_im.size()) > nbands) + // Loop over k-parallelism + for (int ip = 0; ip < kpar; ++ip) { - ModuleBase::WARNING_QUIT("ModuleIO::get_wf_pw", - "The number of bands specified by `out_wfc_norm` or `out_wfc_re_im` in the " - "INPUT file exceeds `nbands`!"); - } - - // Check if all elements in bands_picked are 0 or 1 - for (int value: out_wfc_norm) - { - if (value != 0 && value != 1) + if (my_pool != ip) { - ModuleBase::WARNING_QUIT("ModuleIO::get_wf_pw", - "The elements of `out_wfc_norm` must be either 0 or 1. " - "Invalid values found!"); + continue; } - } - for (int value: out_wfc_re_im) - { - if (value != 0 && value != 1) + + // bands_picked is a vector of 0s and 1s, where 1 means the band is picked to output + std::vector bands_picked_norm(nbands, 0); + std::vector bands_picked_re_im(nbands, 0); + + // Check if length of out_wfc_norm and out_wfc_re_im is valid + if (static_cast(out_wfc_norm.size()) > nbands || static_cast(out_wfc_re_im.size()) > nbands) { ModuleBase::WARNING_QUIT("ModuleIO::get_wf_pw", - "The elements of `out_wfc_re_im` must be either 0 or 1. " - "Invalid values found!"); + "The number of bands specified by `out_wfc_norm` or `out_wfc_re_im` in the " + "INPUT file exceeds `nbands`!"); } - } - - // Fill bands_picked with values from out_wfc_norm - // Remaining bands are already set to 0 - int length = std::min(static_cast(out_wfc_norm.size()), nbands); - for (int i = 0; i < length; ++i) - { - // out_wfc_norm rely on function parse_expression - bands_picked_norm[i] = static_cast(out_wfc_norm[i]); - } - length = std::min(static_cast(out_wfc_re_im.size()), nbands); - for (int i = 0; i < length; ++i) - { - bands_picked_re_im[i] = static_cast(out_wfc_re_im[i]); - } - std::vector> wfcr_norm(nxyz); - std::vector> rho_band_norm(nspin, std::vector(nxyz)); - - for (int ib = 0; ib < nbands; ++ib) - { - // Skip the loop iteration if bands_picked[ib] is 0 - if (!bands_picked_norm[ib]) + // Check if all elements in bands_picked are 0 or 1 + for (int value: out_wfc_norm) { - continue; + if (value != 0 && value != 1) + { + ModuleBase::WARNING_QUIT("ModuleIO::get_wf_pw", + "The elements of `out_wfc_norm` must be either 0 or 1. " + "Invalid values found!"); + } + } + for (int value: out_wfc_re_im) + { + if (value != 0 && value != 1) + { + ModuleBase::WARNING_QUIT("ModuleIO::get_wf_pw", + "The elements of `out_wfc_re_im` must be either 0 or 1. " + "Invalid values found!"); + } } - for (int is = 0; is < nspin; ++is) + // Fill bands_picked with values from out_wfc_norm + // Remaining bands are already set to 0 + int length = std::min(static_cast(out_wfc_norm.size()), nbands); + for (int i = 0; i < length; ++i) { - std::fill(rho_band_norm[is].begin(), rho_band_norm[is].end(), 0.0); + // out_wfc_norm rely on function parse_expression + bands_picked_norm[i] = static_cast(out_wfc_norm[i]); } - for (int ik = 0; ik < nks; ++ik) + length = std::min(static_cast(out_wfc_re_im.size()), nbands); + for (int i = 0; i < length; ++i) { - const int spin_index = isk[ik]; - std::cout << " Calculating wave function norm for band " << ib + 1 << ", k-point " << ik % (nks / nspin) + 1 - << ", spin " << spin_index + 1 << std::endl; + bands_picked_re_im[i] = static_cast(out_wfc_re_im[i]); + } - psi->fix_k(ik); - pw_wfc->recip_to_real(ctx, &psi[0](ib, 0), wfcr_norm.data(), ik); + std::vector> wfcr_norm(nxyz); + std::vector> rho_band_norm(nspin, std::vector(nxyz)); - // To ensure the normalization of charge density in multi-k calculation - double wg_sum_k = 0; - for (int ik_tmp = 0; ik_tmp < nks / nspin; ++ik_tmp) + for (int ib = 0; ib < nbands; ++ib) + { + // Skip the loop iteration if bands_picked[ib] is 0 + if (!bands_picked_norm[ib]) { - wg_sum_k += wk[ik_tmp]; + continue; } - double w1 = static_cast(wg_sum_k / ucell->omega); - - for (int i = 0; i < nxyz; ++i) + for (int is = 0; is < nspin; ++is) { - rho_band_norm[spin_index][i] = std::abs(wfcr_norm[i]) * std::sqrt(w1); + std::fill(rho_band_norm[is].begin(), rho_band_norm[is].end(), 0.0); + } + for (int ik = 0; ik < nks; ++ik) + { + const int ikstot = kv.ik2iktot[ik]; // global k-point index + const int spin_index = kv.isk[ik]; // spin index + const int k_number = ikstot % (nkstot / nspin) + 1; // k-point number, starting from 1 + + psi->fix_k(ik); + pw_wfc->recip_to_real(ctx, &psi[0](ib, 0), wfcr_norm.data(), ik); + + // To ensure the normalization of charge density in multi-k calculation + double wg_sum_k = 0.0; + if (nspin == 1) + { + wg_sum_k = 2.0; + } + else if (nspin == 2) + { + wg_sum_k = 1.0; + } + else + { + ModuleBase::WARNING_QUIT("ModuleIO::get_wf_pw", + "Real space wavefunction output currently do not support noncollinear " + "polarized calculation (nspin = 4)!"); + } + + double w1 = static_cast(wg_sum_k / ucell->omega); + + for (int i = 0; i < nxyz; ++i) + { + rho_band_norm[spin_index][i] = std::abs(wfcr_norm[i]) * std::sqrt(w1); + } + + std::stringstream ss_file; + ss_file << global_out_dir << "wf" << ib + 1 << "s" << spin_index + 1 << "k" << k_number << ".cube"; + + ModuleIO::write_vdata_palgrid(pgrid, + rho_band_norm[spin_index].data(), + spin_index, + nspin, + 0, + ss_file.str(), + 0.0, + ucell, + 11, + 1, + true); // reduce_all_pool is true } - - std::cout << " Writing cube files..."; - - std::stringstream ss_file; - ss_file << global_out_dir << "wf" << ib + 1 << "s" << spin_index + 1 << "k" << ik % (nks / nspin) + 1 - << ".cube"; - - ModuleIO::write_vdata_palgrid(pgrid, - rho_band_norm[spin_index].data(), - spin_index, - nspin, - 0, - ss_file.str(), - 0.0, - ucell); - - std::cout << " Complete!" << std::endl; } - } - std::vector> wfc_re_im(nxyz); - std::vector> rho_band_re(nspin, std::vector(nxyz)); - std::vector> rho_band_im(nspin, std::vector(nxyz)); + std::vector> wfc_re_im(nxyz); + std::vector> rho_band_re(nspin, std::vector(nxyz)); + std::vector> rho_band_im(nspin, std::vector(nxyz)); - for (int ib = 0; ib < nbands; ++ib) - { - // Skip the loop iteration if bands_picked[ib] is 0 - if (!bands_picked_re_im[ib]) + for (int ib = 0; ib < nbands; ++ib) { - continue; - } - - for (int is = 0; is < nspin; ++is) - { - std::fill(rho_band_re[is].begin(), rho_band_re[is].end(), 0.0); - std::fill(rho_band_im[is].begin(), rho_band_im[is].end(), 0.0); - } - for (int ik = 0; ik < nks; ++ik) - { - const int spin_index = isk[ik]; - std::cout << " Calculating wave function real and imaginary part for band " << ib + 1 << ", k-point " - << ik % (nks / nspin) + 1 << ", spin " << spin_index + 1 << std::endl; - - psi->fix_k(ik); - pw_wfc->recip_to_real(ctx, &psi[0](ib, 0), wfc_re_im.data(), ik); - - // To ensure the normalization of charge density in multi-k calculation - double wg_sum_k = 0; - for (int ik_tmp = 0; ik_tmp < nks / nspin; ++ik_tmp) + // Skip the loop iteration if bands_picked[ib] is 0 + if (!bands_picked_re_im[ib]) { - wg_sum_k += wk[ik_tmp]; + continue; } - double w1 = static_cast(wg_sum_k / ucell->omega); - - for (int i = 0; i < nxyz; ++i) + for (int is = 0; is < nspin; ++is) { - rho_band_re[spin_index][i] = std::real(wfc_re_im[i]) * std::sqrt(w1); - rho_band_im[spin_index][i] = std::imag(wfc_re_im[i]) * std::sqrt(w1); + std::fill(rho_band_re[is].begin(), rho_band_re[is].end(), 0.0); + std::fill(rho_band_im[is].begin(), rho_band_im[is].end(), 0.0); + } + for (int ik = 0; ik < nks; ++ik) + { + const int ikstot = kv.ik2iktot[ik]; // global k-point index + const int spin_index = kv.isk[ik]; // spin index + const int k_number = ikstot % (nkstot / nspin) + 1; // k-point number, starting from 1 + + psi->fix_k(ik); + pw_wfc->recip_to_real(ctx, &psi[0](ib, 0), wfc_re_im.data(), ik); + + // To ensure the normalization of charge density in multi-k calculation + double wg_sum_k = 0.0; + if (nspin == 1) + { + wg_sum_k = 2.0; + } + else if (nspin == 2) + { + wg_sum_k = 1.0; + } + else + { + ModuleBase::WARNING_QUIT("ModuleIO::get_wf_pw", + "Real space wavefunction output currently do not support noncollinear " + "polarized calculation (nspin = 4)!"); + } + + double w1 = static_cast(wg_sum_k / ucell->omega); + + for (int i = 0; i < nxyz; ++i) + { + rho_band_re[spin_index][i] = std::real(wfc_re_im[i]) * std::sqrt(w1); + rho_band_im[spin_index][i] = std::imag(wfc_re_im[i]) * std::sqrt(w1); + } + + std::stringstream ss_real; + ss_real << global_out_dir << "wf" << ib + 1 << "s" << spin_index + 1 << "k" << k_number << "real.cube"; + + ModuleIO::write_vdata_palgrid(pgrid, + rho_band_re[spin_index].data(), + spin_index, + nspin, + 0, + ss_real.str(), + 0.0, + ucell, + 11, + 1, + true); // reduce_all_pool is true + + std::stringstream ss_imag; + ss_imag << global_out_dir << "wf" << ib + 1 << "s" << spin_index + 1 << "k" << k_number << "imag.cube"; + + ModuleIO::write_vdata_palgrid(pgrid, + rho_band_im[spin_index].data(), + spin_index, + nspin, + 0, + ss_imag.str(), + 0.0, + ucell, + 11, + 1, + true); // reduce_all_pool is true } - - std::cout << " Writing cube files..."; - - std::stringstream ss_real; - ss_real << global_out_dir << "wf" << ib + 1 << "s" << spin_index + 1 << "k" << ik % (nks / nspin) + 1 - << "real.cube"; - - ModuleIO::write_vdata_palgrid(pgrid, - rho_band_re[spin_index].data(), - spin_index, - nspin, - 0, - ss_real.str(), - 0.0, - ucell); - - std::stringstream ss_imag; - ss_imag << global_out_dir << "wf" << ib + 1 << "s" << spin_index + 1 << "k" << ik % (nks / nspin) + 1 - << "imag.cube"; - - ModuleIO::write_vdata_palgrid(pgrid, - rho_band_im[spin_index].data(), - spin_index, - nspin, - 0, - ss_imag.str(), - 0.0, - ucell); - - std::cout << " Complete!" << std::endl; } } } diff --git a/source/module_io/write_cube.cpp b/source/module_io/write_cube.cpp index b6c0fb8f9a..acff92eac1 100644 --- a/source/module_io/write_cube.cpp +++ b/source/module_io/write_cube.cpp @@ -1,31 +1,34 @@ #include "module_base/element_name.h" +#include "module_base/parallel_comm.h" +#include "module_hamilt_pw/hamilt_pwdft/parallel_grid.h" #include "module_io/cube_io.h" #include "module_parameter/parameter.h" -#include -#include "module_hamilt_pw/hamilt_pwdft/parallel_grid.h" + +#include #ifdef __MPI #include #endif -#include - -void ModuleIO::write_vdata_palgrid( - const Parallel_Grid& pgrid, - const double* const data, - const int is, - const int nspin, - const int iter, - const std::string& fn, - const double ef, - const UnitCell*const ucell, - const int precision, - const int out_fermi) +#include + +void ModuleIO::write_vdata_palgrid(const Parallel_Grid& pgrid, + const double* const data, + const int is, + const int nspin, + const int iter, + const std::string& fn, + const double ef, + const UnitCell* const ucell, + const int precision, + const int out_fermi, + const bool reduce_all_pool) { ModuleBase::TITLE("ModuleIO", "write_vdata_palgrid"); const int my_rank = GlobalV::MY_RANK; const int my_pool = GlobalV::MY_POOL; + const int rank_in_pool = GlobalV::RANK_IN_POOL; time_t start; time_t end; @@ -39,19 +42,26 @@ void ModuleIO::write_vdata_palgrid( start = time(nullptr); // reduce - std::vector data_xyz_full(nxyz); // data to be written -#ifdef __MPI // reduce to rank 0 - if (my_pool == 0 && GlobalV::MY_BNDGROUP == 0) + std::vector data_xyz_full(nxyz); // data to be written +#ifdef __MPI // reduce to rank 0 + if (GlobalV::MY_BNDGROUP == 0) + { + pgrid.reduce(data_xyz_full.data(), data, reduce_all_pool); + } + if (!reduce_all_pool) { - pgrid.reduce(data_xyz_full.data(), data); + MPI_Barrier(MPI_COMM_WORLD); + } + else + { + MPI_Barrier(POOL_WORLD); } - MPI_Barrier(MPI_COMM_WORLD); #else std::memcpy(data_xyz_full.data(), data, nxyz * sizeof(double)); #endif // build the info structure - if (my_rank == 0) + if ((!reduce_all_pool && my_rank == 0) || (reduce_all_pool && rank_in_pool == 0)) { /// output header for cube file ss << "STEP: " << iter << " Cubefile created from ABACUS. Inner loop is z, followed by y and x" << std::endl; @@ -83,23 +93,23 @@ void ModuleIO::write_vdata_palgrid( } std::vector comment(2); - for (int i = 0;i < 2;++i) - { - std::getline(ss, comment[i]); - } + for (int i = 0; i < 2; ++i) + { + std::getline(ss, comment[i]); + } - double fac = ucell->lat0; - std::vector dx = { fac * ucell->latvec.e11 / double(nx), - fac * ucell->latvec.e12 / double(nx), - fac * ucell->latvec.e13 / double(nx) }; + double fac = ucell->lat0; + std::vector dx = {fac * ucell->latvec.e11 / double(nx), + fac * ucell->latvec.e12 / double(nx), + fac * ucell->latvec.e13 / double(nx)}; - std::vector dy = { fac * ucell->latvec.e21 / double(ny), - fac * ucell->latvec.e22 / double(ny), - fac * ucell->latvec.e23 / double(ny) }; + std::vector dy = {fac * ucell->latvec.e21 / double(ny), + fac * ucell->latvec.e22 / double(ny), + fac * ucell->latvec.e23 / double(ny)}; - std::vector dz = { fac * ucell->latvec.e31 / double(nz), - fac * ucell->latvec.e32 / double(nz), - fac * ucell->latvec.e33 / double(nz) }; + std::vector dz = {fac * ucell->latvec.e31 / double(nz), + fac * ucell->latvec.e32 / double(nz), + fac * ucell->latvec.e33 / double(nz)}; std::string element = ""; std::vector atom_type; @@ -135,17 +145,27 @@ void ModuleIO::write_vdata_palgrid( } } atom_type.push_back(z); - atom_charge.push_back(ucell->atoms[it].ncpp.zv); - atom_pos.push_back({ fac * ucell->atoms[it].tau[ia].x, - fac * ucell->atoms[it].tau[ia].y, - fac * ucell->atoms[it].tau[ia].z }); + atom_charge.push_back(ucell->atoms[it].ncpp.zv); + atom_pos.push_back({fac * ucell->atoms[it].tau[ia].x, + fac * ucell->atoms[it].tau[ia].y, + fac * ucell->atoms[it].tau[ia].z}); } } - write_cube(fn, comment, ucell->nat, {0.0, 0.0, 0.0}, - nx, ny, nz, - dx, dy, dz, - atom_type, atom_charge, atom_pos, - data_xyz_full, precision); + write_cube(fn, + comment, + ucell->nat, + {0.0, 0.0, 0.0}, + nx, + ny, + nz, + dx, + dy, + dz, + atom_type, + atom_charge, + atom_pos, + data_xyz_full, + precision); end = time(nullptr); ModuleBase::GlobalFunc::OUT_TIME("write_vdata_palgrid", start, end); @@ -155,27 +175,27 @@ void ModuleIO::write_vdata_palgrid( } void ModuleIO::write_cube(const std::string& file, - const std::vector& comment, - const int& natom, - const std::vector& origin, - const int& nx, - const int& ny, - const int& nz, - const std::vector& dx, - const std::vector& dy, - const std::vector& dz, - const std::vector& atom_type, - const std::vector& atom_charge, - const std::vector>& atom_pos, - const std::vector& data, - const int precision, - const int ndata_line) + const std::vector& comment, + const int& natom, + const std::vector& origin, + const int& nx, + const int& ny, + const int& nz, + const std::vector& dx, + const std::vector& dy, + const std::vector& dz, + const std::vector& atom_type, + const std::vector& atom_charge, + const std::vector>& atom_pos, + const std::vector& data, + const int precision, + const int ndata_line) { assert(comment.size() >= 2); - for (int i = 0;i < 2;++i) - { - assert(comment[i].find("\n") == std::string::npos); - } + for (int i = 0; i < 2; ++i) + { + assert(comment[i].find("\n") == std::string::npos); + } assert(origin.size() >= 3); assert(dx.size() >= 3); @@ -185,35 +205,34 @@ void ModuleIO::write_cube(const std::string& file, assert(atom_charge.size() >= natom); assert(atom_pos.size() >= natom); - for (int i = 0;i < natom;++i) - { - assert(atom_pos[i].size() >= 3); - } + for (int i = 0; i < natom; ++i) + { + assert(atom_pos[i].size() >= 3); + } assert(data.size() >= nx * ny * nz); std::ofstream ofs(file); - for (int i = 0;i < 2;++i) - { - ofs << comment[i] << "\n"; - } + for (int i = 0; i < 2; ++i) + { + ofs << comment[i] << "\n"; + } ofs << std::fixed; - ofs << std::setprecision(1); // as before + ofs << std::setprecision(1); // as before ofs << natom << " " << origin[0] << " " << origin[1] << " " << origin[2] << " \n"; - ofs << std::setprecision(6); //as before + ofs << std::setprecision(6); // as before ofs << nx << " " << dx[0] << " " << dx[1] << " " << dx[2] << "\n"; ofs << ny << " " << dy[0] << " " << dy[1] << " " << dy[2] << "\n"; ofs << nz << " " << dz[0] << " " << dz[1] << " " << dz[2] << "\n"; - for (int i = 0;i < natom;++i) + for (int i = 0; i < natom; ++i) { - ofs << " " << atom_type[i] << " " << atom_charge[i] - << " " << atom_pos[i][0] << " " << atom_pos[i][1] - << " " << atom_pos[i][2] << "\n"; + ofs << " " << atom_type[i] << " " << atom_charge[i] << " " << atom_pos[i][0] << " " << atom_pos[i][1] << " " + << atom_pos[i][2] << "\n"; } ofs.unsetf(std::ofstream::fixed); @@ -222,10 +241,13 @@ void ModuleIO::write_cube(const std::string& file, const int nxy = nx * ny; for (int ixy = 0; ixy < nxy; ++ixy) { - for (int iz = 0;iz < nz;++iz) + for (int iz = 0; iz < nz; ++iz) { ofs << " " << data[ixy * nz + iz]; - if ((iz + 1) % ndata_line == 0 && iz != nz - 1) { ofs << "\n"; } + if ((iz + 1) % ndata_line == 0 && iz != nz - 1) + { + ofs << "\n"; + } } ofs << "\n"; } diff --git a/tests/01_PW/113_PW_get_pchg_kpar/INPUT b/tests/01_PW/113_PW_get_pchg_kpar/INPUT new file mode 100644 index 0000000000..1a7c8d2f35 --- /dev/null +++ b/tests/01_PW/113_PW_get_pchg_kpar/INPUT @@ -0,0 +1,22 @@ +INPUT_PARAMETERS +#Parameters (1.General) +suffix autotest +calculation scf + +nbands 6 +symmetry 1 +pseudo_dir ../../PP_ORB + +#Parameters (2.Iteration) +ecutwfc 20 +scf_thr 1e-9 +scf_nmax 100 + +#Parameters (3.Basis) +basis_type pw + +out_pchg 4*1 + +kpar 2 + +pw_seed 1 diff --git a/tests/01_PW/113_PW_get_pchg_kpar/KPT b/tests/01_PW/113_PW_get_pchg_kpar/KPT new file mode 100644 index 0000000000..f5f7f4ec34 --- /dev/null +++ b/tests/01_PW/113_PW_get_pchg_kpar/KPT @@ -0,0 +1,4 @@ +K_POINTS +0 +Gamma +2 2 2 0 0 0 diff --git a/tests/01_PW/113_PW_get_pchg_kpar/README b/tests/01_PW/113_PW_get_pchg_kpar/README new file mode 100644 index 0000000000..81b2c5e4ca --- /dev/null +++ b/tests/01_PW/113_PW_get_pchg_kpar/README @@ -0,0 +1 @@ +test the output of out_pchg when kpar > 1 diff --git a/tests/01_PW/113_PW_get_pchg_kpar/STRU b/tests/01_PW/113_PW_get_pchg_kpar/STRU new file mode 100644 index 0000000000..0041740d12 --- /dev/null +++ b/tests/01_PW/113_PW_get_pchg_kpar/STRU @@ -0,0 +1,19 @@ +ATOMIC_SPECIES +Si 14 Si_ONCV_PBE-1.0.upf upf201 + +LATTICE_CONSTANT +10.2 // add lattice constant + +LATTICE_VECTORS +0.0 0.5 0.5 +0.5 0.0 0.5 +0.5 0.5 0.0 + +ATOMIC_POSITIONS +Direct + +Si // Element type +0.0 // magnetism +2 +0.00 0.00 0.00 1 1 1 +0.25 0.25 0.25 1 1 1 diff --git a/tests/01_PW/113_PW_get_pchg_kpar/result.ref b/tests/01_PW/113_PW_get_pchg_kpar/result.ref new file mode 100644 index 0000000000..66703fb395 --- /dev/null +++ b/tests/01_PW/113_PW_get_pchg_kpar/result.ref @@ -0,0 +1,10 @@ +etotref -211.8032873347874 +etotperatomref -105.9016436674 +BAND1_SPIN1_CHG.cube 2 +BAND2_SPIN1_CHG.cube 2 +BAND3_SPIN1_CHG.cube 2 +BAND4_SPIN1_CHG.cube 2 +pointgroupref T_d +spacegroupref O_h +nksibzref 3 +totaltimeref 1.35 \ No newline at end of file diff --git a/tests/01_PW/114_PW_get_pchg_sepk/INPUT b/tests/01_PW/114_PW_get_pchg_sepk/INPUT new file mode 100644 index 0000000000..8bb4019fb5 --- /dev/null +++ b/tests/01_PW/114_PW_get_pchg_sepk/INPUT @@ -0,0 +1,24 @@ +INPUT_PARAMETERS +#Parameters (1.General) +suffix autotest +calculation scf + +nbands 6 +symmetry 1 +pseudo_dir ../../PP_ORB + +#Parameters (2.Iteration) +ecutwfc 20 +scf_thr 1e-9 +scf_nmax 100 + +#Parameters (3.Basis) +basis_type pw + +out_pchg 4*1 + +kpar 2 + +if_separate_k 1 + +pw_seed 1 diff --git a/tests/01_PW/114_PW_get_pchg_sepk/KPT b/tests/01_PW/114_PW_get_pchg_sepk/KPT new file mode 100644 index 0000000000..f5f7f4ec34 --- /dev/null +++ b/tests/01_PW/114_PW_get_pchg_sepk/KPT @@ -0,0 +1,4 @@ +K_POINTS +0 +Gamma +2 2 2 0 0 0 diff --git a/tests/01_PW/114_PW_get_pchg_sepk/README b/tests/01_PW/114_PW_get_pchg_sepk/README new file mode 100644 index 0000000000..1d9ca8bf74 --- /dev/null +++ b/tests/01_PW/114_PW_get_pchg_sepk/README @@ -0,0 +1 @@ +test the output of out_pchg when if_separate_k is true diff --git a/tests/01_PW/114_PW_get_pchg_sepk/STRU b/tests/01_PW/114_PW_get_pchg_sepk/STRU new file mode 100644 index 0000000000..0041740d12 --- /dev/null +++ b/tests/01_PW/114_PW_get_pchg_sepk/STRU @@ -0,0 +1,19 @@ +ATOMIC_SPECIES +Si 14 Si_ONCV_PBE-1.0.upf upf201 + +LATTICE_CONSTANT +10.2 // add lattice constant + +LATTICE_VECTORS +0.0 0.5 0.5 +0.5 0.0 0.5 +0.5 0.5 0.0 + +ATOMIC_POSITIONS +Direct + +Si // Element type +0.0 // magnetism +2 +0.00 0.00 0.00 1 1 1 +0.25 0.25 0.25 1 1 1 diff --git a/tests/01_PW/114_PW_get_pchg_sepk/result.ref b/tests/01_PW/114_PW_get_pchg_sepk/result.ref new file mode 100644 index 0000000000..03ea1ebc47 --- /dev/null +++ b/tests/01_PW/114_PW_get_pchg_sepk/result.ref @@ -0,0 +1,18 @@ +etotref -211.8032873348292 +etotperatomref -105.9016436674 +BAND1_K1_SPIN1_CHG.cube 2 +BAND1_K2_SPIN1_CHG.cube 2 +BAND1_K3_SPIN1_CHG.cube 2 +BAND2_K1_SPIN1_CHG.cube 2 +BAND2_K2_SPIN1_CHG.cube 2 +BAND2_K3_SPIN1_CHG.cube 2 +BAND3_K1_SPIN1_CHG.cube 2 +BAND3_K2_SPIN1_CHG.cube 2 +BAND3_K3_SPIN1_CHG.cube 2 +BAND4_K1_SPIN1_CHG.cube 2 +BAND4_K2_SPIN1_CHG.cube 2 +BAND4_K3_SPIN1_CHG.cube 2 +pointgroupref T_d +spacegroupref O_h +nksibzref 3 +totaltimeref 1.23 diff --git a/tests/01_PW/115_PW_get_wf_kpar/INPUT b/tests/01_PW/115_PW_get_wf_kpar/INPUT new file mode 100644 index 0000000000..ee62db77cd --- /dev/null +++ b/tests/01_PW/115_PW_get_wf_kpar/INPUT @@ -0,0 +1,22 @@ +INPUT_PARAMETERS +#Parameters (1.General) +suffix autotest +calculation scf + +nbands 6 +symmetry 1 +pseudo_dir ../../PP_ORB + +#Parameters (2.Iteration) +ecutwfc 100 +scf_thr 1e-9 +scf_nmax 100 + +#Parameters (3.Basis) +basis_type pw + +out_wfc_norm 1 3*0 + +kpar 3 + +pw_seed 1 diff --git a/tests/01_PW/115_PW_get_wf_kpar/KPT b/tests/01_PW/115_PW_get_wf_kpar/KPT new file mode 100644 index 0000000000..f5f7f4ec34 --- /dev/null +++ b/tests/01_PW/115_PW_get_wf_kpar/KPT @@ -0,0 +1,4 @@ +K_POINTS +0 +Gamma +2 2 2 0 0 0 diff --git a/tests/01_PW/115_PW_get_wf_kpar/README b/tests/01_PW/115_PW_get_wf_kpar/README new file mode 100644 index 0000000000..66fbd6257e --- /dev/null +++ b/tests/01_PW/115_PW_get_wf_kpar/README @@ -0,0 +1 @@ +test the output of out_wfc_norm and out_wfc_re_im when kpar > 1 diff --git a/tests/01_PW/115_PW_get_wf_kpar/STRU b/tests/01_PW/115_PW_get_wf_kpar/STRU new file mode 100644 index 0000000000..0041740d12 --- /dev/null +++ b/tests/01_PW/115_PW_get_wf_kpar/STRU @@ -0,0 +1,19 @@ +ATOMIC_SPECIES +Si 14 Si_ONCV_PBE-1.0.upf upf201 + +LATTICE_CONSTANT +10.2 // add lattice constant + +LATTICE_VECTORS +0.0 0.5 0.5 +0.5 0.0 0.5 +0.5 0.5 0.0 + +ATOMIC_POSITIONS +Direct + +Si // Element type +0.0 // magnetism +2 +0.00 0.00 0.00 1 1 1 +0.25 0.25 0.25 1 1 1 diff --git a/tests/01_PW/115_PW_get_wf_kpar/result.ref b/tests/01_PW/115_PW_get_wf_kpar/result.ref new file mode 100644 index 0000000000..2085b97b8e --- /dev/null +++ b/tests/01_PW/115_PW_get_wf_kpar/result.ref @@ -0,0 +1,9 @@ +etotref -211.8789253813293 +etotperatomref -105.9394626907 +wf1s1k1.cube 21.94720511 +wf1s1k2.cube 19.65846447 +wf1s1k3.cube 19.34110139 +pointgroupref T_d +spacegroupref O_h +nksibzref 3 +totaltimeref 1.95 \ No newline at end of file diff --git a/tests/01_PW/CASES_CPU.txt b/tests/01_PW/CASES_CPU.txt index f9eaa40556..66bb461e4e 100644 --- a/tests/01_PW/CASES_CPU.txt +++ b/tests/01_PW/CASES_CPU.txt @@ -99,6 +99,9 @@ 109_PW_PBE0 111_PW_get_pchg 112_PW_get_wf +113_PW_get_pchg_kpar +114_PW_get_pchg_sepk +115_PW_get_wf_kpar 201_PW_UPF201_Ce_f 202_PW_ONCV_Libxc 203_PW_OK