diff --git a/source/Makefile.Objects b/source/Makefile.Objects index 7a0ff0aadf..0fb198712d 100644 --- a/source/Makefile.Objects +++ b/source/Makefile.Objects @@ -189,8 +189,7 @@ OBJS_CELL=atom_pseudo.o\ check_atomic_stru.o\ OBJS_DEEPKS=LCAO_deepks.o\ - deepks_fgamma.o\ - deepks_fk.o\ + deepks_force.o\ LCAO_deepks_odelta.o\ LCAO_deepks_io.o\ LCAO_deepks_mpi.o\ diff --git a/source/module_esolver/lcao_before_scf.cpp b/source/module_esolver/lcao_before_scf.cpp index d51731bbde..e1c2fa4652 100644 --- a/source/module_esolver/lcao_before_scf.cpp +++ b/source/module_esolver/lcao_before_scf.cpp @@ -210,6 +210,8 @@ void ESolver_KS_LCAO::before_scf(UnitCell& ucell, const int istep) if (PARAM.globalv.deepks_setorb) { const Parallel_Orbitals* pv = &this->pv; + // allocate , psialpha is different every ion step, so it is allocated here + GlobalC::ld.allocate_psialpha(PARAM.inp.cal_force, ucell, orb_, this->gd); // build and save at beginning GlobalC::ld.build_psialpha(PARAM.inp.cal_force, ucell, orb_, this->gd, *(two_center_bundle_.overlap_orb_alpha)); diff --git a/source/module_esolver/lcao_others.cpp b/source/module_esolver/lcao_others.cpp index fe27014192..0907237682 100644 --- a/source/module_esolver/lcao_others.cpp +++ b/source/module_esolver/lcao_others.cpp @@ -216,6 +216,8 @@ void ESolver_KS_LCAO::others(UnitCell& ucell, const int istep) if (PARAM.globalv.deepks_setorb) { const Parallel_Orbitals* pv = &this->pv; + // allocate , psialpha is different every ion step, so it is allocated here + GlobalC::ld.allocate_psialpha(PARAM.inp.cal_force, ucell, orb_, this->gd); // build and save at beginning GlobalC::ld.build_psialpha(PARAM.inp.cal_force, ucell, orb_, this->gd, *(two_center_bundle_.overlap_orb_alpha)); diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp index 988999c777..194da269e7 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp @@ -511,7 +511,14 @@ void Force_Stress_LCAO::getForceStress(UnitCell& ucell, { const std::vector>& dm_gamma = dynamic_cast*>(pelec)->get_DM()->get_DMK_vector(); - GlobalC::ld.cal_gdmx(dm_gamma, ucell, orb, gd, kv.get_nks(), kv.kvec_d, isstress); + GlobalC::ld.cal_gdmx(dm_gamma, + ucell, + orb, + gd, + kv.get_nks(), + kv.kvec_d, + GlobalC::ld.psialpha, + isstress); } else { @@ -520,7 +527,8 @@ void Force_Stress_LCAO::getForceStress(UnitCell& ucell, ->get_DM() ->get_DMK_vector(); - GlobalC::ld.cal_gdmx(dm_k, ucell, orb, gd, kv.get_nks(), kv.kvec_d, isstress); + GlobalC::ld + .cal_gdmx(dm_k, ucell, orb, gd, kv.get_nks(), kv.kvec_d, GlobalC::ld.psialpha, isstress); } if (PARAM.inp.deepks_out_unittest) { diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp index 41b26e7e9f..59d71d2a4f 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp @@ -1,10 +1,10 @@ #include "FORCE.h" #include "module_base/memory.h" -#include "module_parameter/parameter.h" #include "module_base/parallel_reduce.h" #include "module_base/timer.h" #include "module_cell/module_neighbor/sltk_grid_driver.h" #include "module_hamilt_pw/hamilt_pwdft/global.h" +#include "module_parameter/parameter.h" #ifdef __DEEPKS #include "module_hamilt_lcao/module_deepks/LCAO_deepks.h" //caoyu add for deepks on 20210813 #include "module_hamilt_lcao/module_deepks/LCAO_deepks_io.h" @@ -12,8 +12,8 @@ #include "module_cell/module_neighbor/sltk_grid_driver.h" //GridD #include "module_elecstate/elecstate_lcao.h" #include "module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h" -#include "module_io/write_HS.h" #include "module_hamilt_lcao/hamilt_lcaodft/pulay_force_stress.h" +#include "module_io/write_HS.h" template <> void Force_LCAO::allocate(const UnitCell& ucell, @@ -147,28 +147,28 @@ void Force_LCAO::finish_ftable(ForceStressArrays& fsr) return; } -//template <> -//void Force_LCAO::test(Parallel_Orbitals& pv, double* mm, const std::string& name) +// template <> +// void Force_LCAO::test(Parallel_Orbitals& pv, double* mm, const std::string& name) //{ -// std::cout << "\n PRINT " << name << std::endl; -// std::cout << std::setprecision(6) << std::endl; -// for (int i = 0; i < PARAM.globalv.nlocal; i++) -// { -// for (int j = 0; j < PARAM.globalv.nlocal; j++) -// { -// if (std::abs(mm[i * PARAM.globalv.nlocal + j]) > 1.0e-5) -// { -// std::cout << std::setw(12) << mm[i * PARAM.globalv.nlocal + j]; -// } -// else -// { -// std::cout << std::setw(12) << "0"; -// } -// } -// std::cout << std::endl; -// } -// return; -//} +// std::cout << "\n PRINT " << name << std::endl; +// std::cout << std::setprecision(6) << std::endl; +// for (int i = 0; i < PARAM.globalv.nlocal; i++) +// { +// for (int j = 0; j < PARAM.globalv.nlocal; j++) +// { +// if (std::abs(mm[i * PARAM.globalv.nlocal + j]) > 1.0e-5) +// { +// std::cout << std::setw(12) << mm[i * PARAM.globalv.nlocal + j]; +// } +// else +// { +// std::cout << std::setw(12) << "0"; +// } +// } +// std::cout << std::endl; +// } +// return; +// } // be called in force_lo.cpp template <> @@ -210,20 +210,40 @@ void Force_LCAO::ftable(const bool isforce, // allocate DHloc_fixed_x, DHloc_fixed_y, DHloc_fixed_z this->allocate(ucell, gd, pv, fsr, two_center_bundle, orb); - const double* dSx[3] = { fsr.DSloc_x, fsr.DSloc_y, fsr.DSloc_z }; - const double* dSxy[6] = { fsr.DSloc_11, fsr.DSloc_12, fsr.DSloc_13, fsr.DSloc_22, fsr.DSloc_23, fsr.DSloc_33 }; + const double* dSx[3] = {fsr.DSloc_x, fsr.DSloc_y, fsr.DSloc_z}; + const double* dSxy[6] = {fsr.DSloc_11, fsr.DSloc_12, fsr.DSloc_13, fsr.DSloc_22, fsr.DSloc_23, fsr.DSloc_33}; // calculate the force related to 'energy density matrix'. - PulayForceStress::cal_pulay_fs(foverlap, soverlap, + PulayForceStress::cal_pulay_fs( + foverlap, + soverlap, this->cal_edm(pelec, *psi, *dm, *kv, pv, PARAM.inp.nspin, PARAM.inp.nbands, ucell, *ra), - ucell, pv, dSx, dSxy, isforce, isstress); - - const double* dHx[3] = { fsr.DHloc_fixed_x, fsr.DHloc_fixed_y, fsr.DHloc_fixed_z }; - const double* dHxy[6] = { fsr.DHloc_fixed_11, fsr.DHloc_fixed_12, fsr.DHloc_fixed_13, fsr.DHloc_fixed_22, fsr.DHloc_fixed_23, fsr.DHloc_fixed_33 }; - //tvnl_dphi + ucell, + pv, + dSx, + dSxy, + isforce, + isstress); + + const double* dHx[3] = {fsr.DHloc_fixed_x, fsr.DHloc_fixed_y, fsr.DHloc_fixed_z}; + const double* dHxy[6] = {fsr.DHloc_fixed_11, + fsr.DHloc_fixed_12, + fsr.DHloc_fixed_13, + fsr.DHloc_fixed_22, + fsr.DHloc_fixed_23, + fsr.DHloc_fixed_33}; + // tvnl_dphi PulayForceStress::cal_pulay_fs(ftvnl_dphi, stvnl_dphi, *dm, ucell, pv, dHx, dHxy, isforce, isstress); // vl_dphi - PulayForceStress::cal_pulay_fs(fvl_dphi, svl_dphi, *dm, ucell, pelec->pot, gint, isforce, isstress, false/*reset dm to gint*/); + PulayForceStress::cal_pulay_fs(fvl_dphi, + svl_dphi, + *dm, + ucell, + pelec->pot, + gint, + isforce, + isstress, + false /*reset dm to gint*/); #ifdef __DEEPKS if (PARAM.inp.deepks_scf) @@ -237,21 +257,21 @@ void Force_LCAO::ftable(const bool isforce, GlobalC::ld.cal_gedm(ucell.nat); - const int nks=1; - DeePKS_domain::cal_f_delta_gamma(dm_gamma, - ucell, - orb, - gd, - *this->ParaV, - GlobalC::ld.lmaxd, - nks, - kv->kvec_d, - GlobalC::ld.nlm_save, - GlobalC::ld.gedm, - GlobalC::ld.inl_index, - GlobalC::ld.F_delta, - isstress, - svnl_dalpha); + const int nks = 1; + DeePKS_domain::cal_f_delta(dm_gamma, + ucell, + orb, + gd, + *this->ParaV, + GlobalC::ld.lmaxd, + nks, + kv->kvec_d, + GlobalC::ld.psialpha, + GlobalC::ld.gedm, + GlobalC::ld.inl_index, + GlobalC::ld.F_delta, + isstress, + svnl_dalpha); #ifdef __MPI Parallel_Reduce::reduce_all(GlobalC::ld.F_delta.c, GlobalC::ld.F_delta.nr * GlobalC::ld.F_delta.nc); @@ -265,7 +285,7 @@ void Force_LCAO::ftable(const bool isforce, if (PARAM.inp.deepks_out_unittest) { const int nks = 1; // 1 for gamma-only - LCAO_deepks_io::print_dm(nks, PARAM.globalv.nlocal, this->ParaV->nrow, dm_gamma); + LCAO_deepks_io::print_dm(nks, PARAM.globalv.nlocal, this->ParaV->nrow, dm_gamma); GlobalC::ld.check_projected_dm(); @@ -273,7 +293,7 @@ void Force_LCAO::ftable(const bool isforce, GlobalC::ld.check_gedm(); - GlobalC::ld.cal_e_delta_band(dm_gamma,nks); + GlobalC::ld.cal_e_delta_band(dm_gamma, nks); std::ofstream ofs("E_delta_bands.dat"); ofs << std::setprecision(10) << GlobalC::ld.e_delta_band; diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp index e9203d6352..8aa1c68409 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp @@ -1,6 +1,5 @@ #include "FORCE.h" #include "module_base/memory.h" -#include "module_parameter/parameter.h" #include "module_base/parallel_reduce.h" #include "module_base/timer.h" #include "module_base/tool_threading.h" @@ -10,9 +9,10 @@ #include "module_elecstate/elecstate_lcao.h" #include "module_elecstate/module_dm/cal_dm_psi.h" #include "module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h" +#include "module_hamilt_lcao/hamilt_lcaodft/pulay_force_stress.h" #include "module_hamilt_pw/hamilt_pwdft/global.h" #include "module_io/write_HS.h" -#include "module_hamilt_lcao/hamilt_lcaodft/pulay_force_stress.h" +#include "module_parameter/parameter.h" #include #include @@ -138,8 +138,7 @@ void Force_LCAO>::allocate(const UnitCell& ucell, { cal_deri = false; - ModuleBase::WARNING_QUIT("cal_syns", - "This function has been broken and will be fixed later."); + ModuleBase::WARNING_QUIT("cal_syns", "This function has been broken and will be fixed later."); LCAO_domain::build_ST_new(fsr, 'S', @@ -156,7 +155,7 @@ void Force_LCAO>::allocate(const UnitCell& ucell, for (int ik = 0; ik < nks; ik++) { - + bool bit = false; // LiuXh, 2017-03-21 } } @@ -188,83 +187,82 @@ void Force_LCAO>::finish_ftable(ForceStressArrays& fsr) return; } -//template <> -//void Force_LCAO>::test(Parallel_Orbitals& pv, double* mmm, const std::string& name) +// template <> +// void Force_LCAO>::test(Parallel_Orbitals& pv, double* mmm, const std::string& name) //{ -// // mohan remove 'const' for pv, 2024-03-31 -// if (GlobalV::NPROC != 1) -// { -// return; -// } +// // mohan remove 'const' for pv, 2024-03-31 +// if (GlobalV::NPROC != 1) +// { +// return; +// } // -// std::cout << "test!" << std::endl; +// std::cout << "test!" << std::endl; // -// int irr = 0; -// int ca = 0; +// int irr = 0; +// int ca = 0; // -// GlobalV::ofs_running << " Calculate the test in Force_LCAO_k" << std::endl; -// Record_adj RA; +// GlobalV::ofs_running << " Calculate the test in Force_LCAO_k" << std::endl; +// Record_adj RA; // -// // mohan update 2024-03-31 -// RA.for_2d(pv, GlobalV::GAMMA_ONLY_LOCAL, GlobalC::ORB.cutoffs()); +// // mohan update 2024-03-31 +// RA.for_2d(pv, GlobalV::GAMMA_ONLY_LOCAL, GlobalC::ORB.cutoffs()); // -// double* test; -// test = new double[PARAM.globalv.nlocal * PARAM.globalv.nlocal]; -// ModuleBase::GlobalFunc::ZEROS(test, PARAM.globalv.nlocal * PARAM.globalv.nlocal); +// double* test; +// test = new double[PARAM.globalv.nlocal * PARAM.globalv.nlocal]; +// ModuleBase::GlobalFunc::ZEROS(test, PARAM.globalv.nlocal * PARAM.globalv.nlocal); // -// for (int T1 = 0; T1 < ucell.ntype; T1++) -// { -// Atom* atom1 = &ucell.atoms[T1]; -// for (int I1 = 0; I1 < atom1->na; I1++) -// { -// // const int iat = ucell.itia2iat(T1,I1); -// const int start1 = ucell.itiaiw2iwt(T1, I1, 0); -// for (int cb = 0; cb < RA.na_each[ca]; cb++) -// { -// const int T2 = RA.info[ca][cb][3]; -// const int I2 = RA.info[ca][cb][4]; -// Atom* atom2 = &ucell.atoms[T2]; -// const int start2 = ucell.itiaiw2iwt(T2, I2, 0); +// for (int T1 = 0; T1 < ucell.ntype; T1++) +// { +// Atom* atom1 = &ucell.atoms[T1]; +// for (int I1 = 0; I1 < atom1->na; I1++) +// { +// // const int iat = ucell.itia2iat(T1,I1); +// const int start1 = ucell.itiaiw2iwt(T1, I1, 0); +// for (int cb = 0; cb < RA.na_each[ca]; cb++) +// { +// const int T2 = RA.info[ca][cb][3]; +// const int I2 = RA.info[ca][cb][4]; +// Atom* atom2 = &ucell.atoms[T2]; +// const int start2 = ucell.itiaiw2iwt(T2, I2, 0); // -// for (int jj = 0; jj < atom1->nw; jj++) -// { -// const int iw1_all = start1 + jj; -// for (int kk = 0; kk < atom2->nw; kk++) -// { -// const int iw2_all = start2 + kk; -// assert(irr < pv.nnr); -// test[iw1_all * PARAM.globalv.nlocal + iw2_all] += mmm[irr]; -// ++irr; -// } -// } -// } -// ++ca; -// } -// } +// for (int jj = 0; jj < atom1->nw; jj++) +// { +// const int iw1_all = start1 + jj; +// for (int kk = 0; kk < atom2->nw; kk++) +// { +// const int iw2_all = start2 + kk; +// assert(irr < pv.nnr); +// test[iw1_all * PARAM.globalv.nlocal + iw2_all] += mmm[irr]; +// ++irr; +// } +// } +// } +// ++ca; +// } +// } // -// std::cout << "\n " << name << std::endl; -// std::cout << std::setprecision(4); -// for (int i = 0; i < PARAM.globalv.nlocal; i++) -// { -// for (int j = 0; j < PARAM.globalv.nlocal; j++) -// { -// if (std::abs(test[i * PARAM.globalv.nlocal + j]) > 1.0e-5) -// { -// std::cout << std::setw(12) << test[i * PARAM.globalv.nlocal + j]; -// } -// else -// { -// std::cout << std::setw(12) << "0"; -// } -// } -// std::cout << std::endl; -// } -// delete[] test; +// std::cout << "\n " << name << std::endl; +// std::cout << std::setprecision(4); +// for (int i = 0; i < PARAM.globalv.nlocal; i++) +// { +// for (int j = 0; j < PARAM.globalv.nlocal; j++) +// { +// if (std::abs(test[i * PARAM.globalv.nlocal + j]) > 1.0e-5) +// { +// std::cout << std::setw(12) << test[i * PARAM.globalv.nlocal + j]; +// } +// else +// { +// std::cout << std::setw(12) << "0"; +// } +// } +// std::cout << std::endl; +// } +// delete[] test; // -// RA.delete_grid(); // xiaohui add 2015-02-04 -// return; -//} - +// RA.delete_grid(); // xiaohui add 2015-02-04 +// return; +// } // be called in Force_LCAO::start_force_calculation template <> @@ -308,21 +306,39 @@ void Force_LCAO>::ftable(const bool isforce, kv->get_nks(), kv->kvec_d); - const double* dSx[3] = { fsr.DSloc_Rx, fsr.DSloc_Ry, fsr.DSloc_Rz }; + const double* dSx[3] = {fsr.DSloc_Rx, fsr.DSloc_Ry, fsr.DSloc_Rz}; // calculate the energy density matrix // and the force related to overlap matrix and energy density matrix. - PulayForceStress::cal_pulay_fs(foverlap, soverlap, + PulayForceStress::cal_pulay_fs( + foverlap, + soverlap, this->cal_edm(pelec, *psi, *dm, *kv, pv, PARAM.inp.nspin, PARAM.inp.nbands, ucell, *ra), - ucell, pv, dSx, fsr.DH_r, isforce, isstress, ra, -1.0, 1.0); - - const double* dHx[3] = { fsr.DHloc_fixedR_x, fsr.DHloc_fixedR_y, fsr.DHloc_fixedR_z }; // T+Vnl - const double* dHxy[6] = { fsr.stvnl11, fsr.stvnl12, fsr.stvnl13, fsr.stvnl22, fsr.stvnl23, fsr.stvnl33 }; //T + ucell, + pv, + dSx, + fsr.DH_r, + isforce, + isstress, + ra, + -1.0, + 1.0); + + const double* dHx[3] = {fsr.DHloc_fixedR_x, fsr.DHloc_fixedR_y, fsr.DHloc_fixedR_z}; // T+Vnl + const double* dHxy[6] = {fsr.stvnl11, fsr.stvnl12, fsr.stvnl13, fsr.stvnl22, fsr.stvnl23, fsr.stvnl33}; // T // tvnl_dphi PulayForceStress::cal_pulay_fs(ftvnl_dphi, stvnl_dphi, *dm, ucell, pv, dHx, dHxy, isforce, isstress, ra, 1.0, -1.0); // doing on the real space grid. // vl_dphi - PulayForceStress::cal_pulay_fs(fvl_dphi, svl_dphi, *dm, ucell, pelec->pot, gint, isforce, isstress, false/*reset dm to gint*/); + PulayForceStress::cal_pulay_fs(fvl_dphi, + svl_dphi, + *dm, + ucell, + pelec->pot, + gint, + isforce, + isstress, + false /*reset dm to gint*/); #ifdef __DEEPKS if (PARAM.inp.deepks_scf) @@ -330,26 +346,26 @@ void Force_LCAO>::ftable(const bool isforce, const std::vector>>& dm_k = dm->get_DMK_vector(); // when deepks_scf is on, the init pdm should be same as the out pdm, so we should not recalculate the pdm - // GlobalC::ld.cal_projected_DM_k(dm, ucell, orb, gd); + // GlobalC::ld.cal_projected_DM(dm, ucell, orb, gd); GlobalC::ld.cal_descriptor(ucell.nat); GlobalC::ld.cal_gedm(ucell.nat); - DeePKS_domain::cal_f_delta_k(dm_k, - ucell, - orb, - gd, - pv, - GlobalC::ld.lmaxd, - kv->get_nks(), - kv->kvec_d, - GlobalC::ld.nlm_save_k, - GlobalC::ld.gedm, - GlobalC::ld.inl_index, - GlobalC::ld.F_delta, - isstress, - svnl_dalpha); + DeePKS_domain::cal_f_delta>(dm_k, + ucell, + orb, + gd, + pv, + GlobalC::ld.lmaxd, + kv->get_nks(), + kv->kvec_d, + GlobalC::ld.psialpha, + GlobalC::ld.gedm, + GlobalC::ld.inl_index, + GlobalC::ld.F_delta, + isstress, + svnl_dalpha); #ifdef __MPI Parallel_Reduce::reduce_all(GlobalC::ld.F_delta.c, GlobalC::ld.F_delta.nr * GlobalC::ld.F_delta.nc); diff --git a/source/module_hamilt_lcao/module_deepks/CMakeLists.txt b/source/module_hamilt_lcao/module_deepks/CMakeLists.txt index f32151bafe..9d0049ee47 100644 --- a/source/module_hamilt_lcao/module_deepks/CMakeLists.txt +++ b/source/module_hamilt_lcao/module_deepks/CMakeLists.txt @@ -1,8 +1,7 @@ if(ENABLE_DEEPKS) list(APPEND objects LCAO_deepks.cpp - deepks_fgamma.cpp - deepks_fk.cpp + deepks_force.cpp LCAO_deepks_odelta.cpp LCAO_deepks_io.cpp LCAO_deepks_mpi.cpp diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks.cpp index 2e34264ab7..9f5163cdf3 100644 --- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks.cpp +++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks.cpp @@ -1,22 +1,21 @@ -//wenfei 2022-1-5 -//This file contains constructor and destructor of the class LCAO_deepks, +// wenfei 2022-1-5 +// This file contains constructor and destructor of the class LCAO_deepks, #include "module_parameter/parameter.h" -//as well as subroutines for initializing and releasing relevant data structures - -//Other than the constructor and the destructor, it contains 3 types of subroutines: -//1. subroutines that are related to calculating descriptors: -// - init : allocates some arrays -// - init_index : records the index (inl) -// - allocate_nlm : allocates data structures (nlm_save) which is used to store -//2. subroutines that are related to calculating force label: -// - init_gdmx : allocates gdmx; it is a private subroutine -// - del_gdmx : releases gdmx -//3. subroutines that are related to calculating force label: -// - init_gdmepsl : allocates gdm_epsl; it is a private subroutine -// - del_gdmepsl : releases gdm_epsl -//4. subroutines that are related to V_delta: -// - allocate_V_delta : allocates H_V_delta; if calculating force, it also calls -// init_gdmx, as well as allocating F_delta +// as well as subroutines for initializing and releasing relevant data structures + +// Other than the constructor and the destructor, it contains 3 types of subroutines: +// 1. subroutines that are related to calculating descriptors: +// - init : allocates some arrays +// - init_index : records the index (inl) +// 2. subroutines that are related to calculating force label: +// - init_gdmx : allocates gdmx; it is a private subroutine +// - del_gdmx : releases gdmx +// 3. subroutines that are related to calculating force label: +// - init_gdmepsl : allocates gdm_epsl; it is a private subroutine +// - del_gdmepsl : releases gdm_epsl +// 4. subroutines that are related to V_delta: +// - allocate_V_delta : allocates H_V_delta; if calculating force, it also calls +// init_gdmx, as well as allocating F_delta #ifdef __DEEPKS @@ -25,19 +24,20 @@ namespace GlobalC { - LCAO_Deepks ld; +LCAO_Deepks ld; } -//Constructor of the class +// Constructor of the class LCAO_Deepks::LCAO_Deepks() { alpha_index = new ModuleBase::IntArray[1]; inl_index = new ModuleBase::IntArray[1]; inl_l = nullptr; gedm = nullptr; + this->psialpha.resize(1); } -//Desctructor of the class +// Desctructor of the class LCAO_Deepks::~LCAO_Deepks() { delete[] alpha_index; @@ -45,18 +45,18 @@ LCAO_Deepks::~LCAO_Deepks() delete[] inl_l; //=======1. to use deepks, pdm is required========== - //delete pdm** - for (int inl = 0;inl < this->inlmax;inl++) + // delete pdm** + for (int inl = 0; inl < this->inlmax; inl++) { delete[] pdm[inl]; } delete[] pdm; //=======2. "deepks_scf" part========== - //if (PARAM.inp.deepks_scf) + // if (PARAM.inp.deepks_scf) if (gedm) { - //delete gedm** - for (int inl = 0;inl < this->inlmax;inl++) + // delete gedm** + for (int inl = 0; inl < this->inlmax; inl++) { delete[] gedm[inl]; } @@ -64,16 +64,14 @@ LCAO_Deepks::~LCAO_Deepks() } del_gdmx(); - } -void LCAO_Deepks::init( - const LCAO_Orbitals& orb, - const int nat, - const int ntype, - const int nks, - const Parallel_Orbitals& pv_in, - std::vector na) +void LCAO_Deepks::init(const LCAO_Orbitals& orb, + const int nat, + const int ntype, + const int nks, + const Parallel_Orbitals& pv_in, + std::vector na) { ModuleBase::TITLE("LCAO_Deepks", "init"); @@ -86,51 +84,51 @@ void LCAO_Deepks::init( assert(lm >= 0); assert(nm >= 0); assert(tot_inl_per_atom >= 0); - + int tot_inl = tot_inl_per_atom * nat; - if(PARAM.inp.deepks_equiv) + if (PARAM.inp.deepks_equiv) { tot_inl = nat; } this->lmaxd = lm; this->nmaxd = nm; - + GlobalV::ofs_running << " lmax of descriptor = " << this->lmaxd << std::endl; GlobalV::ofs_running << " nmax of descriptor = " << nmaxd << std::endl; int pdm_size = 0; this->inlmax = tot_inl; - if(!PARAM.inp.deepks_equiv) + if (!PARAM.inp.deepks_equiv) { GlobalV::ofs_running << " total basis (all atoms) for descriptor = " << std::endl; - //init pdm** + // init pdm** pdm_size = (this->lmaxd * 2 + 1) * (this->lmaxd * 2 + 1); } else { - for(int il = 0; il < this->lmaxd + 1; il++) + for (int il = 0; il < this->lmaxd + 1; il++) { pdm_size += (2 * il + 1) * orb.Alpha[0].getNchi(il); } pdm_size = pdm_size * pdm_size; - this->des_per_atom=pdm_size; + this->des_per_atom = pdm_size; GlobalV::ofs_running << " Equivariant version, size of pdm matrices : " << pdm_size << std::endl; } - this->pdm = new double* [this->inlmax]; - for (int inl = 0;inl < this->inlmax;inl++) + this->pdm = new double*[this->inlmax]; + for (int inl = 0; inl < this->inlmax; inl++) { this->pdm[inl] = new double[pdm_size]; ModuleBase::GlobalFunc::ZEROS(this->pdm[inl], pdm_size); } // cal n(descriptor) per atom , related to Lmax, nchi(L) and m. (not total_nchi!) - if(!PARAM.inp.deepks_equiv) + if (!PARAM.inp.deepks_equiv) { - this->des_per_atom=0; // mohan add 2021-04-21 + this->des_per_atom = 0; // mohan add 2021-04-21 for (int l = 0; l <= this->lmaxd; l++) { this->des_per_atom += orb.Alpha[0].getNchi(l) * (2 * l + 1); @@ -139,14 +137,17 @@ void LCAO_Deepks::init( this->init_index(ntype, nat, na, tot_inl, orb); } - this->allocate_nlm(nat); this->pv = &pv_in; return; } -void LCAO_Deepks::init_index(const int ntype, const int nat, std::vector na, const int Total_nchi, const LCAO_Orbitals &orb) +void LCAO_Deepks::init_index(const int ntype, + const int nat, + std::vector na, + const int Total_nchi, + const LCAO_Orbitals& orb) { delete[] this->alpha_index; this->alpha_index = new ModuleBase::IntArray[ntype]; @@ -160,23 +161,18 @@ void LCAO_Deepks::init_index(const int ntype, const int nat, std::vector na int alpha = 0; for (int it = 0; it < ntype; it++) { - this->alpha_index[it].create( - na[it], - this->lmaxd + 1, // l starts from 0 - this->nmaxd, - 2 * this->lmaxd + 1); // m ==> 2*l+1 + this->alpha_index[it].create(na[it], + this->lmaxd + 1, // l starts from 0 + this->nmaxd, + 2 * this->lmaxd + 1); // m ==> 2*l+1 - this->inl_index[it].create( - na[it], - this->lmaxd + 1, - this->nmaxd); + this->inl_index[it].create(na[it], this->lmaxd + 1, this->nmaxd); - GlobalV::ofs_running << " Type " << it + 1 - << " number_of_atoms " << na[it] << std::endl; + GlobalV::ofs_running << " Type " << it + 1 << " number_of_atoms " << na[it] << std::endl; for (int ia = 0; ia < na[it]; ia++) { - //alpha + // alpha for (int l = 0; l < this->lmaxd + 1; l++) { for (int n = 0; n < orb.Alpha[0].getNchi(l); n++) @@ -191,51 +187,39 @@ void LCAO_Deepks::init_index(const int ntype, const int nat, std::vector na inl++; } } - }//end ia - }//end it + } // end ia + } // end it assert(this->n_descriptor == alpha); assert(Total_nchi == inl); GlobalV::ofs_running << " descriptors_per_atom " << this->des_per_atom << std::endl; GlobalV::ofs_running << " total_descriptors " << this->n_descriptor << std::endl; - return; -} - -void LCAO_Deepks::allocate_nlm(const int nat) -{ - if(PARAM.globalv.gamma_only_local) - { - this->nlm_save.resize(nat); - } - else - { - this->nlm_save_k.resize(nat); - } + return; } void LCAO_Deepks::init_gdmx(const int nat) { - this->gdmx = new double** [nat]; - this->gdmy = new double** [nat]; - this->gdmz = new double** [nat]; + this->gdmx = new double**[nat]; + this->gdmy = new double**[nat]; + this->gdmz = new double**[nat]; int pdm_size = 0; - if(!PARAM.inp.deepks_equiv) + if (!PARAM.inp.deepks_equiv) { pdm_size = (this->lmaxd * 2 + 1) * (this->lmaxd * 2 + 1); } else { - pdm_size = this -> des_per_atom; + pdm_size = this->des_per_atom; } - - for (int iat = 0;iat < nat;iat++) + + for (int iat = 0; iat < nat; iat++) { - this->gdmx[iat] = new double* [inlmax]; - this->gdmy[iat] = new double* [inlmax]; - this->gdmz[iat] = new double* [inlmax]; - for (int inl = 0;inl < inlmax;inl++) + this->gdmx[iat] = new double*[inlmax]; + this->gdmy[iat] = new double*[inlmax]; + this->gdmz[iat] = new double*[inlmax]; + for (int inl = 0; inl < inlmax; inl++) { - this->gdmx[iat][inl] = new double [pdm_size]; - this->gdmy[iat][inl] = new double [pdm_size]; + this->gdmx[iat][inl] = new double[pdm_size]; + this->gdmy[iat][inl] = new double[pdm_size]; this->gdmz[iat][inl] = new double[pdm_size]; ModuleBase::GlobalFunc::ZEROS(gdmx[iat][inl], pdm_size); ModuleBase::GlobalFunc::ZEROS(gdmy[iat][inl], pdm_size); @@ -246,12 +230,12 @@ void LCAO_Deepks::init_gdmx(const int nat) return; } -//void LCAO_Deepks::del_gdmx(const int nat) +// void LCAO_Deepks::del_gdmx(const int nat) void LCAO_Deepks::del_gdmx() { - for (int iat = 0;iat < nat_gdm;iat++) + for (int iat = 0; iat < nat_gdm; iat++) { - for (int inl = 0;inl < inlmax;inl++) + for (int inl = 0; inl < inlmax; inl++) { delete[] this->gdmx[iat][inl]; delete[] this->gdmy[iat][inl]; @@ -269,24 +253,24 @@ void LCAO_Deepks::del_gdmx() void LCAO_Deepks::init_gdmepsl() { - this->gdm_epsl = new double** [6]; - + this->gdm_epsl = new double**[6]; + int pdm_size = 0; - if(!PARAM.inp.deepks_equiv) + if (!PARAM.inp.deepks_equiv) { pdm_size = (this->lmaxd * 2 + 1) * (this->lmaxd * 2 + 1); } else { - pdm_size = this -> des_per_atom; - } + pdm_size = this->des_per_atom; + } - for (int ipol = 0;ipol < 6;ipol++) + for (int ipol = 0; ipol < 6; ipol++) { - this->gdm_epsl[ipol] = new double* [inlmax]; - for (int inl = 0;inl < inlmax;inl++) + this->gdm_epsl[ipol] = new double*[inlmax]; + for (int inl = 0; inl < inlmax; inl++) { - this->gdm_epsl[ipol][inl] = new double [pdm_size]; + this->gdm_epsl[ipol][inl] = new double[pdm_size]; ModuleBase::GlobalFunc::ZEROS(gdm_epsl[ipol][inl], pdm_size); } } @@ -295,9 +279,9 @@ void LCAO_Deepks::init_gdmepsl() void LCAO_Deepks::del_gdmepsl() { - for (int ipol = 0;ipol < 6;ipol++) + for (int ipol = 0; ipol < 6; ipol++) { - for (int inl = 0;inl < inlmax;inl++) + for (int inl = 0; inl < inlmax; inl++) { delete[] this->gdm_epsl[ipol][inl]; } @@ -307,14 +291,13 @@ void LCAO_Deepks::del_gdmepsl() return; } - void LCAO_Deepks::allocate_V_delta(const int nat, const int nks) { ModuleBase::TITLE("LCAO_Deepks", "allocate_V_delta"); nks_V_delta = nks; - //initialize the H matrix H_V_delta - if(PARAM.globalv.gamma_only_local) + // initialize the H matrix H_V_delta + if (PARAM.globalv.gamma_only_local) { H_V_delta.resize(1); // the first dimension is for the consistence with H_V_delta_k this->H_V_delta[0].resize(pv->nloc); @@ -323,47 +306,46 @@ void LCAO_Deepks::allocate_V_delta(const int nat, const int nks) else { H_V_delta_k.resize(nks); - for(int ik=0;ikH_V_delta_k[ik].resize(pv->nloc); ModuleBase::GlobalFunc::ZEROS(this->H_V_delta_k[ik].data(), pv->nloc); } } - //init gedm** + // init gedm** int pdm_size = 0; - if(!PARAM.inp.deepks_equiv) + if (!PARAM.inp.deepks_equiv) { pdm_size = (this->lmaxd * 2 + 1) * (this->lmaxd * 2 + 1); } else { - pdm_size = this -> des_per_atom; + pdm_size = this->des_per_atom; } - this->gedm = new double* [this->inlmax]; - for (int inl = 0;inl < this->inlmax;inl++) + this->gedm = new double*[this->inlmax]; + for (int inl = 0; inl < this->inlmax; inl++) { this->gedm[inl] = new double[pdm_size]; ModuleBase::GlobalFunc::ZEROS(this->gedm[inl], pdm_size); } if (PARAM.inp.cal_force) { - //init F_delta + // init F_delta F_delta.create(nat, 3); - if(PARAM.inp.deepks_out_labels) - { + if (PARAM.inp.deepks_out_labels) + { this->init_gdmx(nat); this->init_gdmepsl(); } - //gdmx is used only in calculating gvx + // gdmx is used only in calculating gvx } if (PARAM.inp.deepks_bandgap) { - //init o_delta + // init o_delta o_delta.create(nks, 1); - } return; @@ -371,20 +353,21 @@ void LCAO_Deepks::allocate_V_delta(const int nat, const int nks) void LCAO_Deepks::init_orbital_pdm_shell(const int nks) { - - this->orbital_pdm_shell = new double*** [nks]; - for (int iks=0; iksorbital_pdm_shell = new double***[nks]; + + for (int iks = 0; iks < nks; iks++) { - this->orbital_pdm_shell[iks] = new double** [1]; - for (int hl=0; hl < 1; hl++) + this->orbital_pdm_shell[iks] = new double**[1]; + for (int hl = 0; hl < 1; hl++) { - this->orbital_pdm_shell[iks][hl] = new double* [this->inlmax]; + this->orbital_pdm_shell[iks][hl] = new double*[this->inlmax]; - for(int inl = 0; inl < this->inlmax; inl++) + for (int inl = 0; inl < this->inlmax; inl++) { - this->orbital_pdm_shell[iks][hl][inl] = new double [(2 * this->lmaxd + 1) * (2 * this->lmaxd + 1)]; - ModuleBase::GlobalFunc::ZEROS(orbital_pdm_shell[iks][hl][inl], (2 * this->lmaxd + 1) * (2 * this->lmaxd + 1)); + this->orbital_pdm_shell[iks][hl][inl] = new double[(2 * this->lmaxd + 1) * (2 * this->lmaxd + 1)]; + ModuleBase::GlobalFunc::ZEROS(orbital_pdm_shell[iks][hl][inl], + (2 * this->lmaxd + 1) * (2 * this->lmaxd + 1)); } } } @@ -392,14 +375,13 @@ void LCAO_Deepks::init_orbital_pdm_shell(const int nks) return; } - void LCAO_Deepks::del_orbital_pdm_shell(const int nks) { - for (int iks=0; iksinlmax; inl++) + for (int inl = 0; inl < this->inlmax; inl++) { delete[] this->orbital_pdm_shell[iks][hl][inl]; } @@ -407,57 +389,58 @@ void LCAO_Deepks::del_orbital_pdm_shell(const int nks) } delete[] this->orbital_pdm_shell[iks]; } - delete[] this->orbital_pdm_shell; + delete[] this->orbital_pdm_shell; return; } -void LCAO_Deepks::init_v_delta_pdm_shell(const int nks,const int nlocal) +void LCAO_Deepks::init_v_delta_pdm_shell(const int nks, const int nlocal) { - const int mn_size=(2 * this->lmaxd + 1) * (2 * this->lmaxd + 1); - if (nks==1){ - this->v_delta_pdm_shell = new double**** [nks]; - for (int iks=0; ikslmaxd + 1) * (2 * this->lmaxd + 1); + if (nks == 1) + { + this->v_delta_pdm_shell = new double****[nks]; + for (int iks = 0; iks < nks; iks++) { - this->v_delta_pdm_shell[iks] = new double*** [nlocal]; + this->v_delta_pdm_shell[iks] = new double***[nlocal]; - for (int mu=0; muv_delta_pdm_shell[iks][mu] = new double** [nlocal]; + this->v_delta_pdm_shell[iks][mu] = new double**[nlocal]; - for (int nu=0; nuv_delta_pdm_shell[iks][mu][nu] = new double* [this->inlmax]; + this->v_delta_pdm_shell[iks][mu][nu] = new double*[this->inlmax]; - for(int inl = 0; inl < this->inlmax; inl++) + for (int inl = 0; inl < this->inlmax; inl++) { - this->v_delta_pdm_shell[iks][mu][nu][inl] = new double [mn_size]; + this->v_delta_pdm_shell[iks][mu][nu][inl] = new double[mn_size]; ModuleBase::GlobalFunc::ZEROS(v_delta_pdm_shell[iks][mu][nu][inl], mn_size); - } + } } } } } else { - this->v_delta_pdm_shell_complex = new std::complex**** [nks]; - for (int iks=0; iksv_delta_pdm_shell_complex = new std::complex****[nks]; + for (int iks = 0; iks < nks; iks++) { - this->v_delta_pdm_shell_complex[iks] = new std::complex*** [nlocal]; + this->v_delta_pdm_shell_complex[iks] = new std::complex***[nlocal]; - for (int mu=0; muv_delta_pdm_shell_complex[iks][mu] = new std::complex** [nlocal]; + this->v_delta_pdm_shell_complex[iks][mu] = new std::complex**[nlocal]; - for (int nu=0; nuv_delta_pdm_shell_complex[iks][mu][nu] = new std::complex* [this->inlmax]; + this->v_delta_pdm_shell_complex[iks][mu][nu] = new std::complex*[this->inlmax]; - for(int inl = 0; inl < this->inlmax; inl++) + for (int inl = 0; inl < this->inlmax; inl++) { - this->v_delta_pdm_shell_complex[iks][mu][nu][inl] = new std::complex [mn_size]; + this->v_delta_pdm_shell_complex[iks][mu][nu][inl] = new std::complex[mn_size]; ModuleBase::GlobalFunc::ZEROS(v_delta_pdm_shell_complex[iks][mu][nu][inl], mn_size); - } + } } } } @@ -466,47 +449,47 @@ void LCAO_Deepks::init_v_delta_pdm_shell(const int nks,const int nlocal) return; } -void LCAO_Deepks::del_v_delta_pdm_shell(const int nks,const int nlocal) +void LCAO_Deepks::del_v_delta_pdm_shell(const int nks, const int nlocal) { - if (nks==1) + if (nks == 1) { - for (int iks=0; iksinlmax; inl++) + for (int inl = 0; inl < this->inlmax; inl++) { delete[] this->v_delta_pdm_shell[iks][mu][nu][inl]; } - delete[] this->v_delta_pdm_shell[iks][mu][nu]; + delete[] this->v_delta_pdm_shell[iks][mu][nu]; } - delete[] this->v_delta_pdm_shell[iks][mu]; + delete[] this->v_delta_pdm_shell[iks][mu]; } - delete[] this->v_delta_pdm_shell[iks]; + delete[] this->v_delta_pdm_shell[iks]; } - delete[] this->v_delta_pdm_shell; + delete[] this->v_delta_pdm_shell; } else { - for (int iks=0; iksinlmax; inl++) + for (int inl = 0; inl < this->inlmax; inl++) { delete[] this->v_delta_pdm_shell_complex[iks][mu][nu][inl]; } - delete[] this->v_delta_pdm_shell_complex[iks][mu][nu]; + delete[] this->v_delta_pdm_shell_complex[iks][mu][nu]; } - delete[] this->v_delta_pdm_shell_complex[iks][mu]; + delete[] this->v_delta_pdm_shell_complex[iks][mu]; } - delete[] this->v_delta_pdm_shell_complex[iks]; + delete[] this->v_delta_pdm_shell_complex[iks]; } - delete[] this->v_delta_pdm_shell_complex; + delete[] this->v_delta_pdm_shell_complex; } return; @@ -519,6 +502,8 @@ void LCAO_Deepks::dpks_cal_e_delta_band(const std::vector>& dm, } template void LCAO_Deepks::dpks_cal_e_delta_band(const std::vector>& dm, const int nks); -template void LCAO_Deepks::dpks_cal_e_delta_band>(const std::vector>>& dm, const int nks); +template void LCAO_Deepks::dpks_cal_e_delta_band>( + const std::vector>>& dm, + const int nks); #endif diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h b/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h index 89276b7835..e0deda6aa4 100644 --- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h +++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h @@ -1,8 +1,10 @@ -#ifndef LCAO_DEEPKS_H -#define LCAO_DEEPKS_H +#ifndef LCAO_DEEPKS_H +#define LCAO_DEEPKS_H #ifdef __DEEPKS +#include "deepks_force.h" +#include "deepks_hmat.h" #include "module_base/complexmatrix.h" #include "module_base/intarray.h" #include "module_base/matrix.h" @@ -11,15 +13,13 @@ #include "module_basis/module_nao/two_center_integrator.h" #include "module_cell/module_neighbor/sltk_grid_driver.h" #include "module_elecstate/module_dm/density_matrix.h" +#include "module_hamilt_lcao/module_hcontainer/hcontainer.h" #include "module_io/winput.h" #include #include #include -#include "deepks_force.h" -#include "deepks_hmat.h" - /// /// The LCAO_Deepks contains subroutines for implementation of the DeePKS method in atomic basis. /// In essential, it is a machine-learned correction term to the XC potential @@ -56,7 +56,7 @@ class LCAO_Deepks /// Correction term to the Hamiltonian matrix: \f$\langle\psi|V_\delta|\psi\rangle\f$ (for gamma only) /// The size of first dimension is 1, which is used for the consitence with H_V_delta_k - std::vector> H_V_delta; + std::vector> H_V_delta; /// Correction term to Hamiltonian, for multi-k std::vector>> H_V_delta_k; @@ -97,8 +97,8 @@ class LCAO_Deepks //------------------- // private variables //------------------- -// private: - public: // change to public to reconstuct the code, 2024-07-22 by mohan + // private: + public: // change to public to reconstuct the code, 2024-07-22 by mohan int lmaxd = 0; // max l of descirptors int nmaxd = 0; //#. descriptors per l int inlmax = 0; // tot. number {i,n,l} - atom, n, l @@ -111,12 +111,9 @@ class LCAO_Deepks // related derivatives. torch::jit::script::Module module; - // saves , for gamma only - std::vector>>>> nlm_save; - - // saves , for multi_k - typedef std::tuple key_tuple; - std::vector>>>> nlm_save_k; + // saves and its derivatives + // index 0 for itself and index 1-3 for derivatives over x,y,z + std::vector*> psialpha; // projected density matrix double** pdm; //[tot_Inl][2l+1][2l+1] caoyu modified 2021-05-07; if equivariant version: [nat][nlm*nlm] @@ -164,7 +161,7 @@ class LCAO_Deepks std::complex***** v_delta_pdm_shell_complex; // for multi-k // v_delta_precalc[nks,nlocal,nlocal,NAt,NDscrpt] = gvdm * v_delta_pdm_shell; torch::Tensor v_delta_precalc_tensor; - //for v_delta==2 , new v_delta_precalc storage method + // for v_delta==2 , new v_delta_precalc storage method torch::Tensor psialpha_tensor; torch::Tensor gevdm_tensor; @@ -198,7 +195,6 @@ class LCAO_Deepks // 1. subroutines that are related to calculating descriptors: // - init : allocates some arrays // - init_index : records the index (inl) - // - allocate_nlm : allocates data structures (nlm_save) which is used to store // 2. subroutines that are related to calculating force label: // - init_gdmx : allocates gdmx; it is a private subroutine // - del_gdmx : releases gdmx @@ -240,25 +236,31 @@ class LCAO_Deepks // for bandgap label calculation; QO added on 2022-1-7 void init_orbital_pdm_shell(const int nks); void del_orbital_pdm_shell(const int nks); - - //for v_delta label calculation; xinyuan added on 2023-2-22 - void init_v_delta_pdm_shell(const int nks,const int nlocal); - void del_v_delta_pdm_shell(const int nks,const int nlocal); + + // for v_delta label calculation; xinyuan added on 2023-2-22 + void init_v_delta_pdm_shell(const int nks, const int nlocal); + void del_v_delta_pdm_shell(const int nks, const int nlocal); //------------------- // LCAO_deepks_psialpha.cpp //------------------- - // wenfei 2022-1-5 - // This file contains 2 subroutines: - // 1. build_psialpha, which calculates the overlap + // E.Wu 2024-12-24 + // This file contains 3 subroutines: + // 1. allocate_psialpha, which allocates memory for psialpha + // 2. build_psialpha, which calculates the overlap // between atomic basis and projector alpha : // which will be used in calculating pdm, gdmx, H_V_delta, F_delta; - // 2. check_psialpha, which prints the results into .dat files + // 3. check_psialpha, which prints the results into .dat files // for checking public: // calculates + void allocate_psialpha(const bool& cal_deri, + const UnitCell& ucell, + const LCAO_Orbitals& orb, + const Grid_Driver& GridD); + void build_psialpha(const bool& cal_deri /**< [in] 0 for 2-center intergration, 1 for its derivation*/, const UnitCell& ucell, const LCAO_Orbitals& orb, @@ -282,21 +284,22 @@ class LCAO_Deepks // It also contains subroutines for printing pdm and gdmx // for checking purpose - // There are 6 subroutines in this file: - // 1. cal_projected_DM, which is used for calculating pdm for gamma point calculation + // There are 4 subroutines in this file: + // 1. cal_projected_DM, which is used for calculating pdm // 2. check_projected_dm, which prints pdm to descriptor.dat - // 3. cal_gdmx, calculating gdmx (and optionally gdm_epsl for stress) for gamma point + // 3. cal_gdmx, calculating gdmx (and optionally gdm_epsl for stress) // 4. check_gdmx, which prints gdmx to a series of .dat files public: - /** + /** * @brief calculate projected density matrix: * pdm = sum_i,occ * 3 cases to skip calculation of pdm: * 1. NSCF calculation of DeePKS, init_chg = file and pdm has been read * 2. SCF calculation of DeePKS with init_chg = file and pdm has been read for restarting SCF - * 3. Relax/Cell-Relax/MD calculation, non-first step will use the convergence pdm from the last step as initial pdm + * 3. Relax/Cell-Relax/MD calculation, non-first step will use the convergence pdm from the last step as initial + * pdm */ template void cal_projected_DM(const elecstate::DensityMatrix* dm, @@ -316,11 +319,12 @@ class LCAO_Deepks const Grid_Driver& GridD, const int nks, const std::vector>& kvec_d, + std::vector*> psialpha, const bool isstress); void check_gdmx(const int nat); - /** + /** * @brief set init_pdm to skip the calculation of pdm in SCF iteration */ void set_init_pdm(bool ipdm) @@ -354,7 +358,6 @@ class LCAO_Deepks //! a temporary interface for cal_e_delta_band and cal_e_delta_band_k template void dpks_cal_e_delta_band(const std::vector>& dm, const int nks); - //------------------- // LCAO_deepks_odelta.cpp @@ -365,9 +368,9 @@ class LCAO_Deepks public: template - void cal_o_delta(const std::vector>& - dm_hl /**<[in] modified density matrix that contains HOMO and LUMO only*/, - const int nks); + void cal_o_delta( + const std::vector>& dm_hl /**<[in] modified density matrix that contains HOMO and LUMO only*/, + const int nks); //------------------- // LCAO_deepks_torch.cpp @@ -411,7 +414,7 @@ class LCAO_Deepks /// which are eigenvalues of pdm in blocks of I_n_l void cal_descriptor(const int nat); /// print descriptors based on LCAO basis - void check_descriptor(const UnitCell& ucell, const std::string &out_dir); + void check_descriptor(const UnitCell& ucell, const std::string& out_dir); void cal_descriptor_equiv(const int nat); @@ -448,7 +451,7 @@ class LCAO_Deepks const LCAO_Orbitals& orb, const Grid_Driver& GridD); - //calculates v_delta_precalc + // calculates v_delta_precalc template void cal_v_delta_precalc(const int nlocal, const int nat, @@ -473,11 +476,9 @@ class LCAO_Deepks template void check_vdp_psialpha(const int nat, const int nks, const int nlocal); - + // prepare gevdm for outputting npy file - void prepare_gevdm( - const int nat, - const LCAO_Orbitals &orb); + void prepare_gevdm(const int nat, const LCAO_Orbitals& orb); void check_vdp_gevdm(const int nat); private: @@ -492,15 +493,12 @@ class LCAO_Deepks int ndim, // second dimension double** mat); // the array being reduced #endif - - }; namespace GlobalC { - extern LCAO_Deepks ld; +extern LCAO_Deepks ld; } - #endif #endif diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_pdm.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_pdm.cpp index 62dfc76aef..7e7365c069 100644 --- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_pdm.cpp +++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_pdm.cpp @@ -1,40 +1,40 @@ -//wenfei 2022-1-11 -//This file contains subroutines for calculating pdm, +// wenfei 2022-1-11 +// This file contains subroutines for calculating pdm, #include "module_parameter/parameter.h" -//which is defind as sum_mu,nu rho_mu,nu (); -//as well as gdmx, which is the gradient of pdm, defined as -//sum_mu,nu rho_mu,nu d/dX() +// which is defind as sum_mu,nu rho_mu,nu (); +// as well as gdmx, which is the gradient of pdm, defined as +// sum_mu,nu rho_mu,nu d/dX() -//It also contains subroutines for printing pdm and gdmx -//for checking purpose +// It also contains subroutines for printing pdm and gdmx +// for checking purpose -//There are 3 subroutines in this file: -//1. read_projected_DM, which reads pdm from file -//2. cal_projected_DM, which is used for calculating pdm -//3. check_projected_dm, which prints pdm to descriptor.dat +// There are 3 subroutines in this file: +// 1. read_projected_DM, which reads pdm from file +// 2. cal_projected_DM, which is used for calculating pdm +// 3. check_projected_dm, which prints pdm to descriptor.dat #ifdef __DEEPKS #include "LCAO_deepks.h" -#include "module_base/vector3.h" -#include "module_base/timer.h" #include "module_base/constants.h" -#include "module_hamilt_lcao/module_hcontainer/atom_pair.h" #include "module_base/libm/libm.h" +#include "module_base/timer.h" +#include "module_base/vector3.h" +#include "module_hamilt_lcao/module_hcontainer/atom_pair.h" void LCAO_Deepks::read_projected_DM(bool read_pdm_file, bool is_equiv, const Numerical_Orbital& alpha) { - if (read_pdm_file && !this->init_pdm) //for DeePKS NSCF calculation + if (read_pdm_file && !this->init_pdm) // for DeePKS NSCF calculation { int pdm_size = 0; - if(!is_equiv) + if (!is_equiv) { pdm_size = (this->lmaxd * 2 + 1) * (this->lmaxd * 2 + 1); } else { int nproj = 0; - for(int il = 0; il < this->lmaxd + 1; il++) + for (int il = 0; il < this->lmaxd + 1; il++) { nproj += (2 * il + 1) * alpha.getNchi(il); } @@ -48,12 +48,12 @@ void LCAO_Deepks::read_projected_DM(bool read_pdm_file, bool is_equiv, const Num { ModuleBase::WARNING_QUIT("LCAO_Deepks::read_projected_DM", "Cannot find the file deepks_projdm.dat"); } - for(int inl=0;inlinlmax;inl++) + for (int inl = 0; inl < this->inlmax; inl++) { - for(int ind=0;ind> c; + ifs >> c; pdm[inl][ind] = c; } } @@ -61,95 +61,94 @@ void LCAO_Deepks::read_projected_DM(bool read_pdm_file, bool is_equiv, const Num } } -//this subroutine performs the calculation of projected density matrices -//pdm_m,m'=\sum_{mu,nu} rho_{mu,nu} +// this subroutine performs the calculation of projected density matrices +// pdm_m,m'=\sum_{mu,nu} rho_{mu,nu} template -void LCAO_Deepks::cal_projected_DM( - const elecstate::DensityMatrix* dm, - const UnitCell &ucell, - const LCAO_Orbitals &orb, - const Grid_Driver& GridD) +void LCAO_Deepks::cal_projected_DM(const elecstate::DensityMatrix* dm, + const UnitCell& ucell, + const LCAO_Orbitals& orb, + const Grid_Driver& GridD) { ModuleBase::TITLE("LCAO_Deepks", "cal_projected_DM"); // if pdm has been initialized, skip the calculation - if(this->init_pdm) + if (this->init_pdm) { this->init_pdm = false; return; } int pdm_size = 0; - if(!PARAM.inp.deepks_equiv) + if (!PARAM.inp.deepks_equiv) { pdm_size = (this->lmaxd * 2 + 1) * (this->lmaxd * 2 + 1); } else { int nproj = 0; - for(int il = 0; il < this->lmaxd + 1; il++) + for (int il = 0; il < this->lmaxd + 1; il++) { nproj += (2 * il + 1) * orb.Alpha[0].getNchi(il); } pdm_size = nproj * nproj; } - //if(dm.size() == 0 || dm[0].size() == 0) + // if(dm.size() == 0 || dm[0].size() == 0) //{ - // return; - //} - ModuleBase::timer::tick("LCAO_Deepks","cal_projected_DM"); + // return; + // } + ModuleBase::timer::tick("LCAO_Deepks", "cal_projected_DM"); - for(int inl=0;inlna; I0++) + Atom* atom0 = &ucell.atoms[T0]; + for (int I0 = 0; I0 < atom0->na; I0++) { - const int iat = ucell.itia2iat(T0,I0); + const int iat = ucell.itia2iat(T0, I0); const ModuleBase::Vector3 tau0 = atom0->tau[I0]; - GridD.Find_atom(ucell, atom0->tau[I0] ,T0, I0); + GridD.Find_atom(ucell, atom0->tau[I0], T0, I0); - //trace alpha orbital + // trace alpha orbital std::vector trace_alpha_row; std::vector trace_alpha_col; - if(!PARAM.inp.deepks_equiv) + if (!PARAM.inp.deepks_equiv) { - int ib=0; - for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0) + int ib = 0; + for (int L0 = 0; L0 <= orb.Alpha[0].getLmax(); ++L0) { - for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0) + for (int N0 = 0; N0 < orb.Alpha[0].getNchi(L0); ++N0) { const int inl = this->inl_index[T0](I0, L0, N0); - const int nm = 2*L0+1; - - for (int m1=0; m1lmaxd + 1; il++) + for (int il = 0; il < this->lmaxd + 1; il++) { nproj += (2 * il + 1) * orb.Alpha[0].getNchi(il); } - for(int iproj = 0; iproj < nproj; iproj ++) + for (int iproj = 0; iproj < nproj; iproj++) { - for(int jproj = 0; jproj < nproj; jproj ++) + for (int jproj = 0; jproj < nproj; jproj++) { trace_alpha_row.push_back(iproj); trace_alpha_col.push_back(jproj); @@ -158,115 +157,97 @@ void LCAO_Deepks::cal_projected_DM( } const int trace_alpha_size = trace_alpha_row.size(); - for (int ad1=0; ad1 tau1 = GridD.getAdjacentTau(ad1); - const Atom* atom1 = &ucell.atoms[T1]; - const int nw1_tot = atom1->nw*PARAM.globalv.npol; - const double Rcut_AO1 = orb.Phi[T1].getRcut(); - const double dist1 = (tau1-tau0).norm() * ucell.lat0; + const Atom* atom1 = &ucell.atoms[T1]; + const int nw1_tot = atom1->nw * PARAM.globalv.npol; + const double Rcut_AO1 = orb.Phi[T1].getRcut(); + const double dist1 = (tau1 - tau0).norm() * ucell.lat0; if (dist1 >= Rcut_Alpha + Rcut_AO1) { continue; } - ModuleBase::Vector3 dR1(GridD.getBox(ad1).x, GridD.getBox(ad1).y, GridD.getBox(ad1).z); - key_tuple key_1(ibt1,dR1.x,dR1.y,dR1.z); + ModuleBase::Vector3 dR1(GridD.getBox(ad1).x, GridD.getBox(ad1).y, GridD.getBox(ad1).z); if constexpr (std::is_same>::value) { - if(this->nlm_save_k[iat].find(key_1) == this->nlm_save_k[iat].end()) - { + if (this->psialpha[0]->find_matrix(iat, ibt1, dR1.x, dR1.y, dR1.z) == nullptr) + { continue; } } auto row_indexes = pv->get_indexes_row(ibt1); const int row_size = row_indexes.size(); - if(row_size == 0) - { + if (row_size == 0) + { continue; } // no possible to unexist key std::vector s_1t(trace_alpha_size * row_size); std::vector g_1dmt(trace_alpha_size * row_size, 0.0); - for(int irow=0;irow::value) - { - row_ptr = this->nlm_save[iat][ad1][row_indexes[irow]][0].data(); - } - else - { - row_ptr = this->nlm_save_k[iat][key_1][row_indexes[irow]][0].data(); - } + hamilt::BaseMatrix* row_ptr = this->psialpha[0]->find_matrix(iat, ibt1, dR1); - for(int i=0;iget_value(row_indexes[irow], trace_alpha_row[i]); } } - for (int ad2=0; ad2 < GridD.getAdjacentNum()+1 ; ad2++) - { - const int T2 = GridD.getType(ad2); - const int I2 = GridD.getNatom(ad2); - const int ibt2 = ucell.itia2iat(T2,I2); - const ModuleBase::Vector3 tau2 = GridD.getAdjacentTau(ad2); - const Atom* atom2 = &ucell.atoms[T2]; - const int nw2_tot = atom2->nw*PARAM.globalv.npol; - - ModuleBase::Vector3 dR2(GridD.getBox(ad2).x, GridD.getBox(ad2).y, GridD.getBox(ad2).z); - key_tuple key_2(ibt2,dR2.x,dR2.y,dR2.z); + for (int ad2 = 0; ad2 < GridD.getAdjacentNum() + 1; ad2++) + { + const int T2 = GridD.getType(ad2); + const int I2 = GridD.getNatom(ad2); + const int ibt2 = ucell.itia2iat(T2, I2); + const ModuleBase::Vector3 tau2 = GridD.getAdjacentTau(ad2); + const Atom* atom2 = &ucell.atoms[T2]; + const int nw2_tot = atom2->nw * PARAM.globalv.npol; + + ModuleBase::Vector3 dR2(GridD.getBox(ad2).x, GridD.getBox(ad2).y, GridD.getBox(ad2).z); if constexpr (std::is_same>::value) { - if (this->nlm_save_k[iat].find(key_2) == this->nlm_save_k[iat].end()) - { + if (this->psialpha[0]->find_matrix(iat, ibt2, dR2.x, dR2.y, dR2.z) == nullptr) + { continue; } } - const double Rcut_AO2 = orb.Phi[T2].getRcut(); - const double dist2 = (tau2-tau0).norm() * ucell.lat0; + const double Rcut_AO2 = orb.Phi[T2].getRcut(); + const double dist2 = (tau2 - tau0).norm() * ucell.lat0; - if (dist2 >= Rcut_Alpha + Rcut_AO2) - { - continue; - } + if (dist2 >= Rcut_Alpha + Rcut_AO2) + { + continue; + } auto col_indexes = pv->get_indexes_col(ibt2); const int col_size = col_indexes.size(); - if(col_size == 0) - { + if (col_size == 0) + { continue; } std::vector s_2t(trace_alpha_size * col_size); // no possible to unexist key - for(int icol=0;icol::value) - { - col_ptr = this->nlm_save[iat][ad2][col_indexes[icol]][0].data(); - } - else - { - col_ptr = this->nlm_save_k[iat][key_2][col_indexes[icol]][0].data(); - } - for(int i=0;i* col_ptr = this->psialpha[0]->find_matrix(iat, ibt2, dR2); + for (int i = 0; i < trace_alpha_size; i++) { - s_2t[i * col_size + icol] = col_ptr[trace_alpha_col[i]]; + s_2t[i * col_size + icol] = col_ptr->get_value(col_indexes[icol], trace_alpha_col[i]); } } // prepare DM_gamma from DMR - std::vector dm_array(row_size*col_size, 0.0); + std::vector dm_array(row_size * col_size, 0.0); const double* dm_current = nullptr; - for(int is=0;isget_DMR_vector().size();is++) + for (int is = 0; is < dm->get_DMR_vector().size(); is++) { int dRx, dRy, dRz; if constexpr (std::is_same::value) @@ -282,92 +263,99 @@ void LCAO_Deepks::cal_projected_DM( dRz = dR2.z - dR1.z; } auto* tmp = dm->get_DMR_vector()[is]->find_matrix(ibt1, ibt2, dRx, dRy, dRz); - if(tmp == nullptr) + if (tmp == nullptr) { - // in case of no deepks_scf but out_deepks_label, size of DMR would mismatch with deepks-orbitals - dm_current = nullptr; + // in case of no deepks_scf but out_deepks_label, size of DMR would mismatch with + // deepks-orbitals + dm_current = nullptr; break; } dm_current = tmp->get_pointer(); - for(int idm=0;idminl_index[T0](I0, L0, N0); - const int nm = 2*L0+1; - - for (int m1=0; m1lmaxd + 1; il++) + for (int il = 0; il < this->lmaxd + 1; il++) { nproj += (2 * il + 1) * orb.Alpha[0].getNchi(il); } - for(int iproj = 0; iproj < nproj; iproj ++) + for (int iproj = 0; iproj < nproj; iproj++) { - for(int jproj = 0; jproj < nproj; jproj ++) + for (int jproj = 0; jproj < nproj; jproj++) { - pdm[iat][iproj * nproj + jproj] += - ddot_(&row_size, g_1dmt.data()+index*row_size, &inc, s_1t.data()+index*row_size, &inc); - index ++; + pdm[iat][iproj * nproj + jproj] += ddot_(&row_size, + g_1dmt.data() + index * row_size, + &inc, + s_1t.data() + index * row_size, + &inc); + index++; } } } - }//ad1 - }//I0 - }//T0 + } // ad1 + } // I0 + } // T0 #ifdef __MPI - allsum_deepks(this->inlmax,pdm_size,this->pdm); + allsum_deepks(this->inlmax, pdm_size, this->pdm); #endif - ModuleBase::timer::tick("LCAO_Deepks","cal_projected_DM"); + ModuleBase::timer::tick("LCAO_Deepks", "cal_projected_DM"); return; } @@ -377,10 +365,10 @@ void LCAO_Deepks::check_projected_dm() std::ofstream ofs(file_projdm.c_str()); const int pdm_size = (this->lmaxd * 2 + 1) * (this->lmaxd * 2 + 1); - ofs<( - const elecstate::DensityMatrix* dm, - const UnitCell& ucell, - const LCAO_Orbitals& orb, - const Grid_Driver& GridD); +template void LCAO_Deepks::cal_projected_DM(const elecstate::DensityMatrix* dm, + const UnitCell& ucell, + const LCAO_Orbitals& orb, + const Grid_Driver& GridD); template void LCAO_Deepks::cal_projected_DM>( - const elecstate::DensityMatrix, double>* dm, - const UnitCell& ucell, - const LCAO_Orbitals& orb, + const elecstate::DensityMatrix, double>* dm, + const UnitCell& ucell, + const LCAO_Orbitals& orb, const Grid_Driver& GridD); #endif diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_psialpha.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_psialpha.cpp index dcd8440eea..71704eba3a 100644 --- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_psialpha.cpp +++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_psialpha.cpp @@ -13,7 +13,74 @@ #include "module_base/vector3.h" #include "module_parameter/parameter.h" -void LCAO_Deepks::build_psialpha(const bool& calc_deri, +void LCAO_Deepks::allocate_psialpha(const bool& cal_deri, + const UnitCell& ucell, + const LCAO_Orbitals& orb, + const Grid_Driver& GridD) +{ + ModuleBase::TITLE("LCAO_Deepks", "allocate_psialpha"); + + this->psialpha.resize(cal_deri ? 4 : 1); + + this->psialpha[0] = new hamilt::HContainer(pv); // psialpha is always real + // Do not use fix_gamma, since it may find wrong matrix for gamma-only case in DeePKS + + // cutoff for alpha is same for all types of atoms + const double Rcut_Alpha = orb.Alpha[0].getRcut(); + + // Total number of atomic basis of projected orbitals + int nw_alpha = 0; + for (int l = 0; l <= orb.Alpha[0].getLmax(); l++) + { + nw_alpha += orb.Alpha[0].getNchi(l) * (2 * l + 1); + } + + for (int T0 = 0; T0 < ucell.ntype; T0++) + { + Atom* atom0 = &ucell.atoms[T0]; + for (int I0 = 0; I0 < atom0->na; I0++) + { + const int iat = ucell.itia2iat(T0, I0); + auto tau_a = atom0->tau[I0]; + GridD.Find_atom(ucell, tau_a, T0, I0); + for (int ad = 0; ad < GridD.getAdjacentNum() + 1; ++ad) + { + const int T1 = GridD.getType(ad); + const int I1 = GridD.getNatom(ad); + const int ibt = ucell.itia2iat(T1, I1); + auto tau_b = GridD.getAdjacentTau(ad); + const int iw1_start = ucell.itiaiw2iwt(T1, I1, 0); + const Atom* atom1 = &ucell.atoms[T1]; + auto R_index = GridD.getBox(ad); + + const double Rcut_AO1 = orb.Phi[T1].getRcut(); + const double dist = (tau_b - tau_a).norm() * ucell.lat0; + if (dist > Rcut_Alpha + Rcut_AO1) + { + continue; + } + + hamilt::AtomPair pair(iat, ibt, R_index, pv); + // Notice: in AtomPair, the usage is set_size(ncol, nrow) + pair.set_size(nw_alpha, atom1->nw * PARAM.globalv.npol); + this->psialpha[0]->insert_pair(pair); + } + } + } + + this->psialpha[0]->allocate(nullptr, true); + // whether to calculate the derivative of psialpha + if (cal_deri) + { + for (int i = 1; i < 4; ++i) + { + this->psialpha[i] = new hamilt::HContainer(*this->psialpha[0], nullptr); // copy constructor + } + } + return; +} + +void LCAO_Deepks::build_psialpha(const bool& cal_deri, const UnitCell& ucell, const LCAO_Orbitals& orb, const Grid_Driver& GridD, @@ -25,14 +92,12 @@ void LCAO_Deepks::build_psialpha(const bool& calc_deri, // cutoff for alpha is same for all types of atoms const double Rcut_Alpha = orb.Alpha[0].getRcut(); - int job; - if (!calc_deri) + // Total number of atomic basis of projected orbitals + // nw_alpha will be used frequently, better to add a function in Numerical Orbital to get it + int nw_alpha = 0; + for (int l = 0; l <= orb.Alpha[0].getLmax(); l++) { - job = 0; - } - else - { - job = 1; + nw_alpha += orb.Alpha[0].getNchi(l) * (2 * l + 1); } for (int T0 = 0; T0 < ucell.ntype; T0++) @@ -40,97 +105,98 @@ void LCAO_Deepks::build_psialpha(const bool& calc_deri, Atom* atom0 = &ucell.atoms[T0]; for (int I0 = 0; I0 < atom0->na; I0++) { - // iat: atom index on which |alpha> is located const int iat = ucell.itia2iat(T0, I0); - const ModuleBase::Vector3 tau0 = atom0->tau[I0]; - GridD.Find_atom(ucell, atom0->tau[I0], T0, I0); - - if (PARAM.globalv.gamma_only_local) - { - this->nlm_save[iat].resize(GridD.getAdjacentNum() + 1); - } - // outermost loop : find all adjacent atoms + auto tau_a = atom0->tau[I0]; + GridD.Find_atom(ucell, tau_a, T0, I0); for (int ad = 0; ad < GridD.getAdjacentNum() + 1; ++ad) { const int T1 = GridD.getType(ad); const int I1 = GridD.getNatom(ad); const int ibt = ucell.itia2iat(T1, I1); - const double Rcut_AO1 = orb.Phi[T1].getRcut(); - - const ModuleBase::Vector3 tau1 = GridD.getAdjacentTau(ad); + auto tau_b = GridD.getAdjacentTau(ad); + const int iw1_start = ucell.itiaiw2iwt(T1, I1, 0); const Atom* atom1 = &ucell.atoms[T1]; + auto R_index = GridD.getBox(ad); + int R[3] = {R_index.x, R_index.y, R_index.z}; - std::unordered_map>> nlm_cur; - if (PARAM.globalv.gamma_only_local) - { - this->nlm_save[iat][ad].clear(); - } - else + const double Rcut_AO1 = orb.Phi[T1].getRcut(); + const double dist = (tau_b - tau_a).norm() * ucell.lat0; + if (dist > Rcut_Alpha + Rcut_AO1) { - nlm_cur.clear(); + continue; } - const double dist1 = (tau1 - tau0).norm() * ucell.lat0; - - if (dist1 > Rcut_Alpha + Rcut_AO1) + double* data_pointer = this->psialpha[0]->data(iat, ibt, R); + std::vector grad_pointer(3); + if (cal_deri) { - continue; + for (int i = 0; i < 3; ++i) + { + grad_pointer[i] = this->psialpha[i + 1]->data(iat, ibt, R); + } } + // Calculate psialpha + // Get all indexes of atomic basis on the neighbour atom in this core + // Notice that atom pair (a,b) and (b,a) are different when the former is changed to projected orbitals + // So we need both row and col indexes for complete calculation auto all_indexes = pv->get_indexes_row(ibt); auto col_indexes = pv->get_indexes_col(ibt); - // insert col_indexes into all_indexes to get universal set with no repeat elements all_indexes.insert(all_indexes.end(), col_indexes.begin(), col_indexes.end()); std::sort(all_indexes.begin(), all_indexes.end()); - all_indexes.erase(std::unique(all_indexes.begin(), all_indexes.end()), all_indexes.end()); + all_indexes.erase(std::unique(all_indexes.begin(), all_indexes.end()), all_indexes.end()); // for unique - // middle loop : all atomic basis on the adjacent atom ad + // inner loop : all atomic basis on the adjacent atom ad for (int iw1l = 0; iw1l < all_indexes.size(); iw1l += PARAM.globalv.npol) { const int iw1 = all_indexes[iw1l] / PARAM.globalv.npol; + std::vector> nlm; - // 2D, dim 0 contains the overlap + // 2D, dim 0 contains the overlap // dim 1-3 contains the gradient of overlap - int L1 = atom1->iw2l[iw1]; - int N1 = atom1->iw2n[iw1]; - int m1 = atom1->iw2m[iw1]; + const int L1 = atom1->iw2l[iw1]; + const int N1 = atom1->iw2n[iw1]; + const int m1 = atom1->iw2m[iw1]; // convert m (0,1,...2l) to M (-l, -l+1, ..., l-1, l) - int M1 = (m1 % 2 == 0) ? -m1 / 2 : (m1 + 1) / 2; + const int M1 = (m1 % 2 == 0) ? -m1 / 2 : (m1 + 1) / 2; - ModuleBase::Vector3 dtau = ucell.atoms[T0].tau[I0] - tau1; - overlap_orb_alpha.snap(T1, L1, N1, M1, 0, dtau * ucell.lat0, calc_deri, nlm); + const ModuleBase::Vector3 dtau = tau_a - tau_b; + const int T0_fixed = 0; // All the projected orbitals are the same, use 0 here + overlap_orb_alpha.snap(T1, L1, N1, M1, T0_fixed, dtau * ucell.lat0, cal_deri, nlm); - if (PARAM.globalv.gamma_only_local) - { - this->nlm_save[iat][ad].insert({all_indexes[iw1l], nlm}); - } - else + const int index_begin = all_indexes[iw1l] * nw_alpha * PARAM.globalv.npol; + for (int iw0 = 0; iw0 < nw_alpha; iw0++) { - nlm_cur.insert({all_indexes[iw1l], nlm}); + data_pointer[index_begin + iw0] = nlm[0][iw0]; + if (cal_deri) + { + grad_pointer[0][index_begin + iw0] = nlm[1][iw0]; + grad_pointer[1][index_begin + iw0] = nlm[2][iw0]; + grad_pointer[2][index_begin + iw0] = nlm[3][iw0]; + } if (PARAM.globalv.npol == 2) - nlm_cur.insert({all_indexes[iw1l + 1], nlm}); + { + data_pointer[index_begin + iw0 + nw_alpha] = nlm[0][iw0]; + if (cal_deri) + { + grad_pointer[0][index_begin + iw0 + nw_alpha] = nlm[1][iw0]; + grad_pointer[1][index_begin + iw0 + nw_alpha] = nlm[2][iw0]; + grad_pointer[2][index_begin + iw0 + nw_alpha] = nlm[3][iw0]; + } + } } } // end iw - - if (!PARAM.globalv.gamma_only_local) - { - const int rx = GridD.getBox(ad).x; - const int ry = GridD.getBox(ad).y; - const int rz = GridD.getBox(ad).z; - key_tuple key_1(ibt, rx, ry, rz); - this->nlm_save_k[iat][key_1] = nlm_cur; - } - } // end ad - } // end I0 - } // end T0 + } + } + } ModuleBase::timer::tick("LCAO_Deepks", "build_psialpha"); return; } -void LCAO_Deepks::check_psialpha(const bool& calc_deri, +void LCAO_Deepks::check_psialpha(const bool& cal_deri, const UnitCell& ucell, const LCAO_Orbitals& orb, const Grid_Driver& GridD) @@ -140,14 +206,10 @@ void LCAO_Deepks::check_psialpha(const bool& calc_deri, const double Rcut_Alpha = orb.Alpha[0].getRcut(); // same for all types of atoms - int job; - if (!calc_deri) + int nw_alpha = 0; + for (int l = 0; l <= orb.Alpha[0].getLmax(); l++) { - job = 0; - } - else - { - job = 1; + nw_alpha += orb.Alpha[0].getNchi(l) * (2 * l + 1); } std::ofstream ofs("psialpha.dat"); @@ -198,24 +260,33 @@ void LCAO_Deepks::check_psialpha(const bool& calc_deri, continue; } - int ibt, rx, ry, rz; - if (PARAM.globalv.gamma_only_local) + int ibt = ucell.itia2iat(T1, I1); + int R[3]; + + ofs << "ad : " << ad << " " << dist1 << std::endl; + ofs_x << "ad : " << ad << " " << dist1 << std::endl; + ofs_y << "ad : " << ad << " " << dist1 << std::endl; + ofs_z << "ad : " << ad << " " << dist1 << std::endl; + + R[0] = GridD.getBox(ad).x; + R[1] = GridD.getBox(ad).y; + R[2] = GridD.getBox(ad).z; + + if (!PARAM.globalv.gamma_only_local) { - ofs << "ad : " << ad << " " << dist1 << std::endl; - ofs_x << "ad : " << ad << " " << dist1 << std::endl; - ofs_y << "ad : " << ad << " " << dist1 << std::endl; - ofs_z << "ad : " << ad << " " << dist1 << std::endl; + ofs << "R : " << R[0] << " " << R[1] << " " << R[2] << std::endl; + ofs_x << "R : " << R[0] << " " << R[1] << " " << R[2] << std::endl; + ofs_y << "R : " << R[0] << " " << R[1] << " " << R[2] << std::endl; + ofs_z << "R : " << R[0] << " " << R[1] << " " << R[2] << std::endl; } - else + + const double* data_pointer = this->psialpha[0]->data(iat, ibt, R); + std::vector grad_pointer(3, nullptr); + if (cal_deri) { - ibt = ucell.itia2iat(T1, I1); - rx = GridD.getBox(ad).x; - ry = GridD.getBox(ad).y; - rz = GridD.getBox(ad).z; - ofs << "key : " << ibt << " " << rx << " " << ry << " " << rz << std::endl; - ofs_x << "key : " << ibt << " " << rx << " " << ry << " " << rz << std::endl; - ofs_y << "key : " << ibt << " " << rx << " " << ry << " " << rz << std::endl; - ofs_z << "key : " << ibt << " " << rx << " " << ry << " " << rz << std::endl; + grad_pointer[0] = this->psialpha[1]->data(iat, ibt, R); + grad_pointer[1] = this->psialpha[2]->data(iat, ibt, R); + grad_pointer[2] = this->psialpha[3]->data(iat, ibt, R); } for (int iw1 = 0; iw1 < nw1_tot; ++iw1) @@ -229,40 +300,30 @@ void LCAO_Deepks::check_psialpha(const bool& calc_deri, const int iw2_local = pv->global2local_col(iw1_all); if (iw1_local < 0 && iw2_local < 0) continue; - const int iw1_0 = iw1 / PARAM.globalv.npol; - - std::vector> nlm; - - if (PARAM.globalv.gamma_only_local) - { - nlm = this->nlm_save[iat][ad][iw1]; - } - else - { - key_tuple key_1(ibt, rx, ry, rz); - nlm = this->nlm_save_k[iat][key_1][iw1]; - } - for (int ind = 0; ind < nlm[0].size(); ind++) + for (int ind = 0; ind < nw_alpha; ind++) { - ofs << nlm[0][ind] << " "; + ofs << data_pointer[iw1 * nw_alpha + ind] << " "; + if (cal_deri) + { + ofs_x << grad_pointer[0][iw1 * nw_alpha + ind] << " "; + ofs_y << grad_pointer[1][iw1 * nw_alpha + ind] << " "; + ofs_z << grad_pointer[2][iw1 * nw_alpha + ind] << " "; + } + // 6 numbers per line if (ind % 6 == 5) - ofs << "\n"; - if (calc_deri) { - ofs_x << nlm[1][ind] << " "; - if (ind % 6 == 5) + ofs << "\n"; + if (cal_deri) + { ofs_x << "\n"; - ofs_y << nlm[2][ind] << " "; - if (ind % 6 == 5) ofs_y << "\n"; - ofs_z << nlm[3][ind] << " "; - if (ind % 6 == 5) ofs_z << "\n"; + } } } ofs << std::endl; - if (calc_deri) + if (cal_deri) { ofs_x << std::endl; ofs_y << std::endl; @@ -276,4 +337,5 @@ void LCAO_Deepks::check_psialpha(const bool& calc_deri, ModuleBase::timer::tick("LCAO_Deepks", "check_psialpha"); return; } + #endif diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_torch.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_torch.cpp index 5dbec1d76d..2e55216091 100644 --- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_torch.cpp +++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_torch.cpp @@ -1,17 +1,17 @@ -//This file contains interfaces with libtorch, -//including loading of model and calculating gradients -//as well as subroutines that prints the results for checking - -//The file contains 8 subroutines: -// cal_gvepsl : gvepsl is used for training with stress label, which is derivative of -// descriptors wrt strain tensor, calculated by -// d(des)/d\epsilon_{ab} = d(pdm)/d\epsilon_{ab} * d(des)/d(pdm) = gdm_epsl * gvdm -// using einsum -// cal_gvdm : d(des)/d(pdm) -// calculated using torch::autograd::grad -// load_model : loads model for applying V_delta -// prepare_psialpha : prepare psialpha for outputting npy file -// prepare_gevdm : prepare gevdm for outputting npy file +// This file contains interfaces with libtorch, +// including loading of model and calculating gradients +// as well as subroutines that prints the results for checking + +// The file contains 8 subroutines: +// cal_gvepsl : gvepsl is used for training with stress label, which is derivative of +// descriptors wrt strain tensor, calculated by +// d(des)/d\epsilon_{ab} = d(pdm)/d\epsilon_{ab} * d(des)/d(pdm) = gdm_epsl * gvdm +// using einsum +// cal_gvdm : d(des)/d(pdm) +// calculated using torch::autograd::grad +// load_model : loads model for applying V_delta +// prepare_psialpha : prepare psialpha for outputting npy file +// prepare_gevdm : prepare gevdm for outputting npy file #ifdef __DEEPKS @@ -25,48 +25,52 @@ #include "module_parameter/parameter.h" // calculates stress of descriptors from gradient of projected density matrices -void LCAO_Deepks::cal_gvepsl(const int nat) { +void LCAO_Deepks::cal_gvepsl(const int nat) +{ ModuleBase::TITLE("LCAO_Deepks", "cal_gvepsl"); // preconditions this->cal_gvdm(nat); - if (!gdmepsl_vector.empty()) { + if (!gdmepsl_vector.empty()) + { gdmepsl_vector.erase(gdmepsl_vector.begin(), gdmepsl_vector.end()); } // gdmr_vector : nat(derivative) * 3 * inl(projector) * nm * nm - if (GlobalV::MY_RANK == 0) { + if (GlobalV::MY_RANK == 0) + { // make gdmx as tensor int nlmax = this->inlmax / nat; - for (int nl = 0; nl < nlmax; ++nl) { + for (int nl = 0; nl < nlmax; ++nl) + { std::vector bmmv; // for (int ipol=0;ipol<6;++ipol) //{ // std::vector xmmv; - for (int i = 0; i < 6; ++i) { + for (int i = 0; i < 6; ++i) + { std::vector ammv; - for (int iat = 0; iat < nat; ++iat) { + for (int iat = 0; iat < nat; ++iat) + { int inl = iat * nlmax + nl; int nm = 2 * this->inl_l[inl] + 1; std::vector mmv; - for (int m1 = 0; m1 < nm; ++m1) { - for (int m2 = 0; m2 < nm; ++m2) { + for (int m1 = 0; m1 < nm; ++m1) + { + for (int m2 = 0; m2 < nm; ++m2) + { mmv.push_back(this->gdm_epsl[i][inl][m1 * nm + m2]); } } // nm^2 torch::Tensor mm - = torch::tensor( - mmv, - torch::TensorOptions().dtype(torch::kFloat64)) - .reshape({nm, nm}); // nm*nm + = torch::tensor(mmv, torch::TensorOptions().dtype(torch::kFloat64)).reshape({nm, nm}); // nm*nm ammv.push_back(mm); - } - torch::Tensor bmm = torch::stack(ammv, 0); // nat*nm*nm - bmmv.push_back(bmm); - } + } + torch::Tensor bmm = torch::stack(ammv, 0); // nat*nm*nm + bmmv.push_back(bmm); + } // torch::Tensor bmm = torch::stack(xmmv, 0); //3*nat*nm*nm // bmmv.push_back(bmm); //} - this->gdmepsl_vector.push_back( - torch::stack(bmmv, 0)); // nbt*3*nat*nm*nm + this->gdmepsl_vector.push_back(torch::stack(bmmv, 0)); // nbt*3*nat*nm*nm } assert(this->gdmepsl_vector.size() == nlmax); @@ -75,46 +79,45 @@ void LCAO_Deepks::cal_gvepsl(const int nat) { // gevdm_vector : a:inl * v:nm (descriptor) * m:nm (pdm, dim1) * n:nm // (pdm, dim2) gvepsl_vector : b:npol * a:inl(projector) * // m:nm(descriptor) - std::vector gvepsl_vector; - for (int nl = 0; nl < nlmax; ++nl) { - gvepsl_vector.push_back( - at::einsum("bamn, avmn->bav", - {this->gdmepsl_vector[nl], this->gevdm_vector[nl]})); - } - - // cat nv-> \sum_nl(nv) = \sum_nl(nm_nl)=des_per_atom - // concatenate index a(inl) and m(nm) - this->gvepsl_tensor = torch::cat(gvepsl_vector, -1); - assert(this->gvepsl_tensor.size(0) == 6); - assert(this->gvepsl_tensor.size(1) == nat); - assert(this->gvepsl_tensor.size(2) == this->des_per_atom); - } - - return; -} + std::vector gvepsl_vector; + for (int nl = 0; nl < nlmax; ++nl) + { + gvepsl_vector.push_back(at::einsum("bamn, avmn->bav", {this->gdmepsl_vector[nl], this->gevdm_vector[nl]})); + } + // cat nv-> \sum_nl(nv) = \sum_nl(nm_nl)=des_per_atom + // concatenate index a(inl) and m(nm) + this->gvepsl_tensor = torch::cat(gvepsl_vector, -1); + assert(this->gvepsl_tensor.size(0) == 6); + assert(this->gvepsl_tensor.size(1) == nat); + assert(this->gvepsl_tensor.size(2) == this->des_per_atom); + } + + return; +} // dDescriptor / dprojected density matrix -void LCAO_Deepks::cal_gvdm(const int nat) { +void LCAO_Deepks::cal_gvdm(const int nat) +{ ModuleBase::TITLE("LCAO_Deepks", "cal_gvdm"); - if (!gevdm_vector.empty()) { + if (!gevdm_vector.empty()) + { gevdm_vector.erase(gevdm_vector.begin(), gevdm_vector.end()); } // cal gevdm(d(EigenValue(D))/dD) int nlmax = inlmax / nat; - for (int nl = 0; nl < nlmax; ++nl) { + for (int nl = 0; nl < nlmax; ++nl) + { std::vector avmmv; - for (int iat = 0; iat < nat; ++iat) { + for (int iat = 0; iat < nat; ++iat) + { int inl = iat * nlmax + nl; int nm = 2 * this->inl_l[inl] + 1; // repeat each block for nm times in an additional dimension - torch::Tensor tmp_x - = this->pdm_tensor[inl].reshape({nm, nm}).unsqueeze(0).repeat( - {nm, 1, 1}); + torch::Tensor tmp_x = this->pdm_tensor[inl].reshape({nm, nm}).unsqueeze(0).repeat({nm, 1, 1}); // torch::Tensor tmp_y = std::get<0>(torch::symeig(tmp_x, true)); torch::Tensor tmp_y = std::get<0>(torch::linalg::eigh(tmp_x, "U")); - torch::Tensor tmp_yshell - = torch::eye(nm, torch::TensorOptions().dtype(torch::kFloat64)); + torch::Tensor tmp_yshell = torch::eye(nm, torch::TensorOptions().dtype(torch::kFloat64)); std::vector tmp_rpt; // repeated-pdm-tensor (x) std::vector tmp_rdt; // repeated-d-tensor (y) std::vector tmp_gst; // gvx-shell @@ -122,13 +125,12 @@ void LCAO_Deepks::cal_gvdm(const int nat) { tmp_rdt.push_back(tmp_y); tmp_gst.push_back(tmp_yshell); std::vector tmp_res; - tmp_res - = torch::autograd::grad(tmp_rdt, - tmp_rpt, - tmp_gst, - false, - false, - /*allow_unused*/ true); // nm(v)**nm*nm + tmp_res = torch::autograd::grad(tmp_rdt, + tmp_rpt, + tmp_gst, + false, + false, + /*allow_unused*/ true); // nm(v)**nm*nm avmmv.push_back(tmp_res[0]); } torch::Tensor avmm = torch::stack(avmmv, 0); // nat*nv**nm*nm @@ -138,12 +140,15 @@ void LCAO_Deepks::cal_gvdm(const int nat) { return; } -void LCAO_Deepks::load_model(const std::string& deepks_model) { +void LCAO_Deepks::load_model(const std::string& deepks_model) +{ ModuleBase::TITLE("LCAO_Deepks", "load_model"); - try { + try + { this->module = torch::jit::load(deepks_model); - } catch (const c10::Error& e) + } + catch (const c10::Error& e) { std::cerr << "error loading the model" << std::endl; @@ -163,136 +168,127 @@ void LCAO_Deepks::prepare_psialpha(const int nlocal, const Grid_Driver& GridD) { ModuleBase::TITLE("LCAO_Deepks", "prepare_psialpha"); - int nlmax = this->inlmax/nat; - int mmax = 2*this->lmaxd+1; + int nlmax = this->inlmax / nat; + int mmax = 2 * this->lmaxd + 1; if constexpr (std::is_same::value) { - this->psialpha_tensor = torch::zeros({ nat, nlmax, nks, nlocal, mmax }, torch::kFloat64); // support gamma-only + this->psialpha_tensor = torch::zeros({nat, nlmax, nks, nlocal, mmax}, torch::kFloat64); // support gamma-only } else { - this->psialpha_tensor = torch::zeros({ nat, nlmax, nks, nlocal, mmax }, torch::kComplexDouble); // support multi-k + this->psialpha_tensor = torch::zeros({nat, nlmax, nks, nlocal, mmax}, torch::kComplexDouble); // support multi-k } - //cutoff for alpha is same for all types of atoms + // cutoff for alpha is same for all types of atoms const double Rcut_Alpha = orb.Alpha[0].getRcut(); for (int T0 = 0; T0 < ucell.ntype; T0++) { - Atom* atom0 = &ucell.atoms[T0]; - for (int I0 =0; I0< atom0->na; I0++) + Atom* atom0 = &ucell.atoms[T0]; + for (int I0 = 0; I0 < atom0->na; I0++) { - //iat: atom index on which |alpha> is located - const int iat = ucell.itia2iat(T0,I0); - const ModuleBase::Vector3 tau0 = atom0->tau[I0]; - GridD.Find_atom(ucell, atom0->tau[I0] ,T0, I0); + // iat: atom index on which |alpha> is located + const int iat = ucell.itia2iat(T0, I0); + const ModuleBase::Vector3 tau0 = atom0->tau[I0]; + GridD.Find_atom(ucell, atom0->tau[I0], T0, I0); - //outermost loop : find all adjacent atoms - for (int ad=0; ad tau1 = GridD.getAdjacentTau(ad); - const Atom* atom1 = &ucell.atoms[T1]; - const int nw1_tot = atom1->nw*PARAM.globalv.npol; + const Atom* atom1 = &ucell.atoms[T1]; + const int nw1_tot = atom1->nw * PARAM.globalv.npol; - const double dist1 = (tau1-tau0).norm() * ucell.lat0; + const double dist1 = (tau1 - tau0).norm() * ucell.lat0; - if (dist1 > Rcut_Alpha + Rcut_AO1) - { - continue; - } - - ModuleBase::Vector3 dR(GridD.getBox(ad).x, - GridD.getBox(ad).y, - GridD.getBox(ad).z); + if (dist1 > Rcut_Alpha + Rcut_AO1) + { + continue; + } - key_tuple key(ibt, dR.x, dR.y, dR.z); + ModuleBase::Vector3 dR(GridD.getBox(ad).x, GridD.getBox(ad).y, GridD.getBox(ad).z); if constexpr (std::is_same>::value) { - if (this->nlm_save_k[iat].find(key) - == this->nlm_save_k[iat].end()) + if (this->psialpha[0]->find_matrix(iat, ibt, dR.x, dR.y, dR.z) == nullptr) { continue; } } - //middle loop : all atomic basis on the adjacent atom ad - for (int iw1=0; iw1global2local_row(iw1_all); - const int iw2_local = pv->global2local_col(iw1_all); - if(iw1_local < 0 || iw2_local < 0) {continue;} - const int iw1_0 = iw1/PARAM.globalv.npol; - std::vector nlm; - if constexpr (std::is_same::value) - { - nlm = this->nlm_save[iat][ad][iw1][0]; - } - else + // middle loop : all atomic basis on the adjacent atom ad + for (int iw1 = 0; iw1 < nw1_tot; ++iw1) + { + const int iw1_all = start1 + iw1; + const int iw1_local = pv->global2local_row(iw1_all); + const int iw2_local = pv->global2local_col(iw1_all); + if (iw1_local < 0 || iw2_local < 0) { - nlm = this->nlm_save_k[iat][key][iw1][0]; + continue; } + hamilt::BaseMatrix* overlap = psialpha[0]->find_matrix(iat, ibt, dR); - for (int ik = 0; ik kphase = std::complex(1.0, 0.0); if constexpr (std::is_same>::value) { - const double arg = - (kvec_d[ik] * dR) * ModuleBase::TWO_PI; + const double arg = -(kvec_d[ik] * ModuleBase::Vector3(dR)) * ModuleBase::TWO_PI; double sinp, cosp; ModuleBase::libm::sincos(arg, &sinp, &cosp); kphase = std::complex(cosp, sinp); } - int ib=0; - int nl=0; - for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0) + int ib = 0; + int nl = 0; + for (int L0 = 0; L0 <= orb.Alpha[0].getLmax(); ++L0) { - for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0) + for (int N0 = 0; N0 < orb.Alpha[0].getNchi(L0); ++N0) { - const int nm = 2*L0+1; - - for (int m1=0; m1::value) { - this->psialpha_tensor[iat][nl][ik][iw1_all][m1] = nlm[ib+m1]; + this->psialpha_tensor[iat][nl][ik][iw1_all][m1] + = overlap->get_value(iw1, ib + m1); } else { - std::complex nlm_phase = nlm[ib + m1] * kphase; - torch::Tensor nlm_tensor = torch::tensor({nlm_phase.real(), nlm_phase.imag()}, torch::kDouble); + std::complex nlm_phase = overlap->get_value(iw1, ib + m1) * kphase; + torch::Tensor nlm_tensor + = torch::tensor({nlm_phase.real(), nlm_phase.imag()}, torch::kDouble); torch::Tensor complex_tensor = torch::complex(nlm_tensor[0], nlm_tensor[1]); this->psialpha_tensor[iat][nl][ik][iw1_all][m1] = complex_tensor; } } - ib+=nm; + ib += nm; nl++; } - } - }//end ik - }//end iw - }//end ad - }//end I0 - }//end T0 + } + } // end ik + } // end iw + } // end ad + } // end I0 + } // end T0 #ifdef __MPI TK msg[mmax]; - for(int iat=0; iat< nat ; iat++) + for (int iat = 0; iat < nat; iat++) { - for(int nl = 0; nl < nlmax; nl++) + for (int nl = 0; nl < nlmax; nl++) { - for(int ik = 0; ik < nks; ik++) + for (int ik = 0; ik < nks; ik++) { - for(int mu = 0; mu < nlocal ; mu++) + for (int mu = 0; mu < nlocal; mu++) { - for(int m=0;m::value) { @@ -301,11 +297,12 @@ void LCAO_Deepks::prepare_psialpha(const int nlocal, else { auto tensor_value = this->psialpha_tensor.index({iat, nl, ik, mu, m}); - msg[m] = std::complex(torch::real(tensor_value).item(), torch::imag(tensor_value).item()); + msg[m] = std::complex(torch::real(tensor_value).item(), + torch::imag(tensor_value).item()); } } - Parallel_Reduce::reduce_all(msg,mmax); - for(int m=0;m::value) { @@ -323,7 +320,7 @@ void LCAO_Deepks::prepare_psialpha(const int nlocal, } } -#endif +#endif } template @@ -331,30 +328,32 @@ void LCAO_Deepks::check_vdp_psialpha(const int nat, const int nks, const int nlo { std::ofstream ofs("vdp_psialpha.dat"); ofs << std::setprecision(10); - - int nlmax = this->inlmax/nat; - int mmax = 2*this->lmaxd+1; - for(int iat=0; iat< nat ; iat++) + + int nlmax = this->inlmax / nat; + int mmax = 2 * this->lmaxd + 1; + for (int iat = 0; iat < nat; iat++) { - for(int nl = 0; nl < nlmax; nl++) + for (int nl = 0; nl < nlmax; nl++) { - for (int iks = 0; iks < nks ; iks++) + for (int iks = 0; iks < nks; iks++) { - for(int mu = 0; mu < nlocal ; mu++) + for (int mu = 0; mu < nlocal; mu++) { - for(int m=0; m< mmax; m++) + for (int m = 0; m < mmax; m++) { if constexpr (std::is_same::value) { - ofs << this->psialpha_tensor.index({ iat, nl, iks, mu, m }).item().toDouble() << " "; + ofs << this->psialpha_tensor.index({iat, nl, iks, mu, m}).item().toDouble() << " "; } else { auto tensor_value = this->psialpha_tensor.index({iat, nl, iks, mu, m}); - ofs << std::complex(torch::real(tensor_value).item(), torch::imag(tensor_value).item()) << " "; + ofs << std::complex(torch::real(tensor_value).item(), + torch::imag(tensor_value).item()) + << " "; } } - } + } } ofs << std::endl; } @@ -362,29 +361,27 @@ void LCAO_Deepks::check_vdp_psialpha(const int nat, const int nks, const int nlo ofs.close(); } -void LCAO_Deepks::prepare_gevdm( - const int nat, - const LCAO_Orbitals &orb) +void LCAO_Deepks::prepare_gevdm(const int nat, const LCAO_Orbitals& orb) { - int nlmax = this->inlmax/nat; - int mmax = 2*this->lmaxd+1; - this->gevdm_tensor = torch::zeros({ nat, nlmax, mmax, mmax, mmax}, torch::TensorOptions().dtype(torch::kFloat64)); + int nlmax = this->inlmax / nat; + int mmax = 2 * this->lmaxd + 1; + this->gevdm_tensor = torch::zeros({nat, nlmax, mmax, mmax, mmax}, torch::TensorOptions().dtype(torch::kFloat64)); this->cal_gvdm(nat); - int nl=0; - for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0) + int nl = 0; + for (int L0 = 0; L0 <= orb.Alpha[0].getLmax(); ++L0) { - for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0) + for (int N0 = 0; N0 < orb.Alpha[0].getNchi(L0); ++N0) { for (int iat = 0; iat < nat; iat++) { - const int nm = 2*L0+1; - for (int v=0; vgevdm_tensor[iat][nl][v][m][n] = this->gevdm_vector[nl][iat][v][m][n]; } @@ -402,19 +399,19 @@ void LCAO_Deepks::check_vdp_gevdm(const int nat) std::ofstream ofs("vdp_gevdm.dat"); ofs << std::setprecision(10); - int nlmax = this->inlmax/nat; - int mmax = 2*this->lmaxd+1; - for(int iat=0; iat< nat ; iat++) + int nlmax = this->inlmax / nat; + int mmax = 2 * this->lmaxd + 1; + for (int iat = 0; iat < nat; iat++) { - for(int nl = 0; nl < nlmax; nl++) + for (int nl = 0; nl < nlmax; nl++) { - for(int v=0; vgevdm_tensor.index({ iat,nl, v, m, n }).item().toDouble() << " "; + ofs << this->gevdm_tensor.index({iat, nl, v, m, n}).item().toDouble() << " "; } } } diff --git a/source/module_hamilt_lcao/module_deepks/cal_gdmx.cpp b/source/module_hamilt_lcao/module_deepks/cal_gdmx.cpp index b83e40f2ec..ba5f7528cf 100644 --- a/source/module_hamilt_lcao/module_deepks/cal_gdmx.cpp +++ b/source/module_hamilt_lcao/module_deepks/cal_gdmx.cpp @@ -1,23 +1,23 @@ #ifdef __DEEPKS -#include "module_parameter/parameter.h" #include "LCAO_deepks.h" -#include "module_base/vector3.h" -#include "module_base/timer.h" #include "module_base/constants.h" -#include "module_hamilt_lcao/module_hcontainer/atom_pair.h" #include "module_base/libm/libm.h" +#include "module_base/timer.h" +#include "module_base/vector3.h" +#include "module_hamilt_lcao/module_hcontainer/atom_pair.h" +#include "module_parameter/parameter.h" /// this subroutine calculates the gradient of projected density matrices /// gdmx_m,m = d/dX sum_{mu,nu} rho_{mu,nu} -/// if stress label is enabled, the gradient of PDM wrt strain tensor will -/// be calculated: -/// gdm_epsl = d/d\epsilon_{ab} * +/// if stress label is enabled, the gradient of PDM wrt strain tensor will +/// be calculated: +/// gdm_epsl = d/d\epsilon_{ab} * /// sum_{mu,nu} rho_{mu,nu} -//There are 2 subroutines in this file: -//1. cal_gdmx, calculating gdmx (and optionally gdm_epsl for stress) for gamma point -//2. check_gdmx, which prints gdmx to a series of .dat files +// There are 2 subroutines in this file: +// 1. cal_gdmx, calculating gdmx (and optionally gdm_epsl for stress) for gamma point +// 2. check_gdmx, which prints gdmx to a series of .dat files template void LCAO_Deepks::cal_gdmx(const std::vector>& dm, @@ -26,17 +26,18 @@ void LCAO_Deepks::cal_gdmx(const std::vector>& dm, const Grid_Driver& GridD, const int nks, const std::vector>& kvec_d, + std::vector*> psialpha, const bool isstress) { ModuleBase::TITLE("LCAO_Deepks", "cal_gdmx"); - ModuleBase::timer::tick("LCAO_Deepks","cal_gdmx"); - //get DS_alpha_mu and S_nu_beta + ModuleBase::timer::tick("LCAO_Deepks", "cal_gdmx"); + // get DS_alpha_mu and S_nu_beta int size = (2 * lmaxd + 1) * (2 * lmaxd + 1); int nrow = this->pv->nrow; - for (int iat = 0;iat < ucell.nat;iat++) + for (int iat = 0; iat < ucell.nat; iat++) { - for (int inl = 0;inl < inlmax;inl++) + for (int inl = 0; inl < inlmax; inl++) { ModuleBase::GlobalFunc::ZEROS(gdmx[iat][inl], size); ModuleBase::GlobalFunc::ZEROS(gdmy[iat][inl], size); @@ -46,9 +47,9 @@ void LCAO_Deepks::cal_gdmx(const std::vector>& dm, if (isstress) { - for (int ipol = 0;ipol < 6;ipol++) + for (int ipol = 0; ipol < 6; ipol++) { - for (int inl = 0;inl < inlmax;inl++) + for (int inl = 0; inl < inlmax; inl++) { ModuleBase::GlobalFunc::ZEROS(gdm_epsl[ipol][inl], size); } @@ -59,65 +60,69 @@ void LCAO_Deepks::cal_gdmx(const std::vector>& dm, for (int T0 = 0; T0 < ucell.ntype; T0++) { - Atom* atom0 = &ucell.atoms[T0]; - for (int I0 =0; I0< atom0->na; I0++) + Atom* atom0 = &ucell.atoms[T0]; + for (int I0 = 0; I0 < atom0->na; I0++) { - const int iat = ucell.itia2iat(T0,I0);//on which alpha is located + const int iat = ucell.itia2iat(T0, I0); // on which alpha is located const ModuleBase::Vector3 tau0 = atom0->tau[I0]; - GridD.Find_atom(ucell, atom0->tau[I0] ,T0, I0); + GridD.Find_atom(ucell, atom0->tau[I0], T0, I0); - for (int ad1=0; ad1 tau1 = GridD.getAdjacentTau(ad1); - const Atom* atom1 = &ucell.atoms[T1]; - const int nw1_tot = atom1->nw*PARAM.globalv.npol; - const double Rcut_AO1 = orb.Phi[T1].getRcut(); - - ModuleBase::Vector3 dR1(GridD.getBox(ad1).x, GridD.getBox(ad1).y, GridD.getBox(ad1).z); - - for (int ad2=0; ad2 < GridD.getAdjacentNum()+1 ; ad2++) - { - const int T2 = GridD.getType(ad2); - const int I2 = GridD.getNatom(ad2); - const int start2 = ucell.itiaiw2iwt(T2, I2, 0); - const int ibt2 = ucell.itia2iat(T2,I2); - const ModuleBase::Vector3 tau2 = GridD.getAdjacentTau(ad2); - const Atom* atom2 = &ucell.atoms[T2]; - const int nw2_tot = atom2->nw*PARAM.globalv.npol; - ModuleBase::Vector3 dR2(GridD.getBox(ad2).x, GridD.getBox(ad2).y, GridD.getBox(ad2).z); - - const double Rcut_AO2 = orb.Phi[T2].getRcut(); - const double dist1 = (tau1-tau0).norm() * ucell.lat0; - const double dist2 = (tau2-tau0).norm() * ucell.lat0; - - if (dist1 > Rcut_Alpha + Rcut_AO1 - || dist2 > Rcut_Alpha + Rcut_AO2) - { - continue; - } + const Atom* atom1 = &ucell.atoms[T1]; + const int nw1_tot = atom1->nw * PARAM.globalv.npol; + const double Rcut_AO1 = orb.Phi[T1].getRcut(); + + ModuleBase::Vector3 dR1(GridD.getBox(ad1).x, GridD.getBox(ad1).y, GridD.getBox(ad1).z); + + for (int ad2 = 0; ad2 < GridD.getAdjacentNum() + 1; ad2++) + { + const int T2 = GridD.getType(ad2); + const int I2 = GridD.getNatom(ad2); + const int start2 = ucell.itiaiw2iwt(T2, I2, 0); + const int ibt2 = ucell.itia2iat(T2, I2); + const ModuleBase::Vector3 tau2 = GridD.getAdjacentTau(ad2); + const Atom* atom2 = &ucell.atoms[T2]; + const int nw2_tot = atom2->nw * PARAM.globalv.npol; + ModuleBase::Vector3 dR2(GridD.getBox(ad2).x, GridD.getBox(ad2).y, GridD.getBox(ad2).z); + + const double Rcut_AO2 = orb.Phi[T2].getRcut(); + const double dist1 = (tau1 - tau0).norm() * ucell.lat0; + const double dist2 = (tau2 - tau0).norm() * ucell.lat0; + + if (dist1 > Rcut_Alpha + Rcut_AO1 || dist2 > Rcut_Alpha + Rcut_AO2) + { + continue; + } double r0[3]; double r1[3]; - if(isstress) + if (isstress) { - r1[0] = ( tau1.x - tau0.x) ; - r1[1] = ( tau1.y - tau0.y) ; - r1[2] = ( tau1.z - tau0.z) ; - r0[0] = ( tau2.x - tau0.x) ; - r0[1] = ( tau2.y - tau0.y) ; - r0[2] = ( tau2.z - tau0.z) ; + r1[0] = (tau1.x - tau0.x); + r1[1] = (tau1.y - tau0.y); + r1[2] = (tau1.z - tau0.z); + r0[0] = (tau2.x - tau0.x); + r0[1] = (tau2.y - tau0.y); + r0[2] = (tau2.z - tau0.z); } auto row_indexes = pv->get_indexes_row(ibt1); auto col_indexes = pv->get_indexes_col(ibt2); - if(row_indexes.size() * col_indexes.size() == 0) continue; - + if (row_indexes.size() * col_indexes.size() == 0) + { + continue; + } + double* dm_current; - int dRx, dRy, dRz; + int dRx; + int dRy; + int dRz; if constexpr (std::is_same::value) { dRx = 0; @@ -126,13 +131,15 @@ void LCAO_Deepks::cal_gdmx(const std::vector>& dm, } else { - dRx = (dR2-dR1).x; - dRy = (dR2-dR1).y; - dRz = (dR2-dR1).z; + dRx = (dR2 - dR1).x; + dRy = (dR2 - dR1).y; + dRz = (dR2 - dR1).z; } + ModuleBase::Vector3 dR(dRx, dRy, dRz); + hamilt::AtomPair dm_pair(ibt1, ibt2, dRx, dRy, dRz, pv); dm_pair.allocate(nullptr, 1); - for(int ik=0;ik::value) @@ -141,12 +148,12 @@ void LCAO_Deepks::cal_gdmx(const std::vector>& dm, } else { - const double arg = - (kvec_d[ik] * (dR2-dR1) ) * ModuleBase::TWO_PI; + const double arg = -(kvec_d[ik] * dR) * ModuleBase::TWO_PI; double sinp, cosp; ModuleBase::libm::sincos(arg, &sinp, &cosp); kphase = TK(cosp, sinp); } - if(ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)) + if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)) { dm_pair.add_from_matrix(dm[ik].data(), pv->get_row_size(), kphase, 1); } @@ -155,146 +162,163 @@ void LCAO_Deepks::cal_gdmx(const std::vector>& dm, dm_pair.add_from_matrix(dm[ik].data(), pv->get_col_size(), kphase, 0); } } - + dm_current = dm_pair.get_pointer(); - key_tuple key_1(ibt1,dR1.x,dR1.y,dR1.z); - key_tuple key_2(ibt2,dR2.x,dR2.y,dR2.z); - for (int iw1=0; iw1 nlm1; - std::vector> nlm2; + hamilt::BaseMatrix* overlap_1 = psialpha[0]->find_matrix(iat, ibt1, dR1); + hamilt::BaseMatrix* overlap_2 = psialpha[0]->find_matrix(iat, ibt2, dR2); + std::vector*> grad_overlap_1(3); + std::vector*> grad_overlap_2(3); - if constexpr (std::is_same::value) - { - nlm1 = this->nlm_save[iat][ad1][row_indexes[iw1]][0]; - nlm2 = this->nlm_save[iat][ad2][col_indexes[iw2]]; - } - else + assert(overlap_1->get_col_size() == overlap_2->get_col_size()); + + for (int i = 0; i < 3; ++i) { - nlm1 = this->nlm_save_k[iat][key_1][row_indexes[iw1]][0]; - nlm2 = this->nlm_save_k[iat][key_2][col_indexes[iw2]]; + grad_overlap_1[i] = psialpha[i + 1]->find_matrix(iat, ibt1, dR1); + grad_overlap_2[i] = psialpha[i + 1]->find_matrix(iat, ibt2, dR2); } - assert(nlm1.size()==nlm2[0].size()); - - int ib=0; - for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0) + int ib = 0; + for (int L0 = 0; L0 <= orb.Alpha[0].getLmax(); ++L0) { - for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0) + for (int N0 = 0; N0 < orb.Alpha[0].getNchi(L0); ++N0) { const int inl = this->inl_index[T0](I0, L0, N0); - const int nm = 2*L0+1; - for (int m1 = 0;m1 < nm;++m1) + const int nm = 2 * L0 + 1; + for (int m1 = 0; m1 < nm; ++m1) { - for (int m2 = 0; m2 ) - gdmx[iat][inl][m1*nm+m2] += nlm2[1][ib+m2] * nlm1[ib+m1] * *dm_current; - gdmy[iat][inl][m1*nm+m2] += nlm2[2][ib+m2] * nlm1[ib+m1] * *dm_current; - gdmz[iat][inl][m1*nm+m2] += nlm2[3][ib+m2] * nlm1[ib+m1] * *dm_current; + gdmx[iat][inl][m1 * nm + m2] + += grad_overlap_2[0]->get_value(col_indexes[iw2], ib + m2) + * overlap_1->get_value(row_indexes[iw1], ib + m1) * *dm_current; + gdmy[iat][inl][m1 * nm + m2] + += grad_overlap_2[1]->get_value(col_indexes[iw2], ib + m2) + * overlap_1->get_value(row_indexes[iw1], ib + m1) * *dm_current; + gdmz[iat][inl][m1 * nm + m2] + += grad_overlap_2[2]->get_value(col_indexes[iw2], ib + m2) + * overlap_1->get_value(row_indexes[iw1], ib + m1) * *dm_current; //() - gdmx[iat][inl][m2*nm+m1] += nlm2[1][ib+m2] * nlm1[ib+m1] * *dm_current; - gdmy[iat][inl][m2*nm+m1] += nlm2[2][ib+m2] * nlm1[ib+m1] * *dm_current; - gdmz[iat][inl][m2*nm+m1] += nlm2[3][ib+m2] * nlm1[ib+m1] * *dm_current; + gdmx[iat][inl][m2 * nm + m1] + += grad_overlap_2[0]->get_value(col_indexes[iw2], ib + m2) + * overlap_1->get_value(row_indexes[iw1], ib + m1) * *dm_current; + gdmy[iat][inl][m2 * nm + m1] + += grad_overlap_2[1]->get_value(col_indexes[iw2], ib + m2) + * overlap_1->get_value(row_indexes[iw1], ib + m1) * *dm_current; + gdmz[iat][inl][m2 * nm + m1] + += grad_overlap_2[2]->get_value(col_indexes[iw2], ib + m2) + * overlap_1->get_value(row_indexes[iw1], ib + m1) * *dm_current; - //() = -() - gdmx[ibt2][inl][m1*nm+m2] -= nlm2[1][ib+m2] * nlm1[ib+m1] * *dm_current; - gdmy[ibt2][inl][m1*nm+m2] -= nlm2[2][ib+m2] * nlm1[ib+m1] * *dm_current; - gdmz[ibt2][inl][m1*nm+m2] -= nlm2[3][ib+m2] * nlm1[ib+m1] * *dm_current; + //() = -() + gdmx[ibt2][inl][m1 * nm + m2] + -= grad_overlap_2[0]->get_value(col_indexes[iw2], ib + m2) + * overlap_1->get_value(row_indexes[iw1], ib + m1) * *dm_current; + gdmy[ibt2][inl][m1 * nm + m2] + -= grad_overlap_2[1]->get_value(col_indexes[iw2], ib + m2) + * overlap_1->get_value(row_indexes[iw1], ib + m1) * *dm_current; + gdmz[ibt2][inl][m1 * nm + m2] + -= grad_overlap_2[2]->get_value(col_indexes[iw2], ib + m2) + * overlap_1->get_value(row_indexes[iw1], ib + m1) * *dm_current; - //() = -() - gdmx[ibt2][inl][m2*nm+m1] -= nlm2[1][ib+m2] * nlm1[ib+m1] * *dm_current; - gdmy[ibt2][inl][m2*nm+m1] -= nlm2[2][ib+m2] * nlm1[ib+m1] * *dm_current; - gdmz[ibt2][inl][m2*nm+m1] -= nlm2[3][ib+m2] * nlm1[ib+m1] * *dm_current; + //() = -() + gdmx[ibt2][inl][m2 * nm + m1] + -= grad_overlap_2[0]->get_value(col_indexes[iw2], ib + m2) + * overlap_1->get_value(row_indexes[iw1], ib + m1) * *dm_current; + gdmy[ibt2][inl][m2 * nm + m1] + -= grad_overlap_2[1]->get_value(col_indexes[iw2], ib + m2) + * overlap_1->get_value(row_indexes[iw1], ib + m1) * *dm_current; + gdmz[ibt2][inl][m2 * nm + m1] + -= grad_overlap_2[2]->get_value(col_indexes[iw2], ib + m2) + * overlap_1->get_value(row_indexes[iw1], ib + m1) * *dm_current; if (isstress) { int mm = 0; - for(int ipol=0;ipol<3;ipol++) + for (int ipol = 0; ipol < 3; ipol++) { - for(int jpol=ipol;jpol<3;jpol++) + for (int jpol = ipol; jpol < 3; jpol++) { - gdm_epsl[mm][inl][m2*nm+m1] += ucell.lat0 * - *dm_current * (nlm2[jpol+1][ib+m2] * nlm1[ib+m1] * r0[ipol]); + gdm_epsl[mm][inl][m2 * nm + m1] + += ucell.lat0 * *dm_current + * (grad_overlap_2[jpol]->get_value(col_indexes[iw2], + ib + m2) + * overlap_1->get_value(row_indexes[iw1], ib + m1) + * r0[ipol]); mm++; } } } } } - ib+=nm; + ib += nm; } } - assert(ib==nlm1.size()); - if (isstress) + assert(ib == overlap_1->get_col_size()); + if (isstress) { - if constexpr (std::is_same::value) - { - nlm1 = this->nlm_save[iat][ad2][col_indexes[iw2]][0]; - nlm2 = this->nlm_save[iat][ad1][row_indexes[iw1]]; - } - else - { - nlm1 = this->nlm_save_k[iat][key_2][col_indexes[iw2]][0]; - nlm2 = this->nlm_save_k[iat][key_1][row_indexes[iw1]]; - } - - assert(nlm1.size()==nlm2[0].size()); - int ib=0; - for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0) + int ib = 0; + for (int L0 = 0; L0 <= orb.Alpha[0].getLmax(); ++L0) { - for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0) + for (int N0 = 0; N0 < orb.Alpha[0].getNchi(L0); ++N0) { const int inl = this->inl_index[T0](I0, L0, N0); - const int nm = 2*L0+1; - for (int m1 = 0;m1 < nm; ++m1) + const int nm = 2 * L0 + 1; + for (int m1 = 0; m1 < nm; ++m1) { for (int m2 = 0; m2 < nm; ++m2) { int mm = 0; - for(int ipol=0;ipol<3;ipol++) + for (int ipol = 0; ipol < 3; ipol++) { - for(int jpol=ipol;jpol<3;jpol++) + for (int jpol = ipol; jpol < 3; jpol++) { - gdm_epsl[mm][inl][m2*nm+m1] += ucell.lat0 * - *dm_current * (nlm1[ib+m1] * nlm2[jpol+1][ib+m2] * r1[ipol]); + gdm_epsl[mm][inl][m2 * nm + m1] + += ucell.lat0 * *dm_current + * (overlap_2->get_value(col_indexes[iw2], ib + m1) + * grad_overlap_1[jpol]->get_value(row_indexes[iw1], + ib + m2) + * r1[ipol]); mm++; } } } } - ib+=nm; + ib += nm; } } } dm_current++; - }//iw2 - }//iw1 - }//ad2 - }//ad1 - }//I0 - }//T0 + } // iw2 + } // iw1 + } // ad2 + } // ad1 + } // I0 + } // T0 #ifdef __MPI - for(int iat=0;iatinlmax,size,this->gdmx[iat]); - allsum_deepks(this->inlmax,size,this->gdmy[iat]); - allsum_deepks(this->inlmax,size,this->gdmz[iat]); + allsum_deepks(this->inlmax, size, this->gdmx[iat]); + allsum_deepks(this->inlmax, size, this->gdmy[iat]); + allsum_deepks(this->inlmax, size, this->gdmz[iat]); } if (isstress) { - for(int ipol=0;ipol<6;ipol++) + for (int ipol = 0; ipol < 6; ipol++) { - allsum_deepks(this->inlmax,size,this->gdm_epsl[ipol]); + allsum_deepks(this->inlmax, size, this->gdm_epsl[ipol]); } } #endif - ModuleBase::timer::tick("LCAO_Deepks","cal_gdmx"); + ModuleBase::timer::tick("LCAO_Deepks", "cal_gdmx"); return; } @@ -305,26 +329,26 @@ void LCAO_Deepks::check_gdmx(const int nat) std::ofstream ofs_y; std::ofstream ofs_z; - ofs_x<lmaxd * 2 + 1) * (this->lmaxd * 2 + 1); - for(int ia=0;ia(const std::vector>& kvec_d, + std::vector*> psialpha, const bool isstress); template void LCAO_Deepks::cal_gdmx>(const std::vector>>& dm, @@ -354,6 +379,7 @@ template void LCAO_Deepks::cal_gdmx>(const std::vector>& kvec_d, + std::vector*> psialpha, const bool isstress); #endif diff --git a/source/module_hamilt_lcao/module_deepks/deepks_fgamma.cpp b/source/module_hamilt_lcao/module_deepks/deepks_fgamma.cpp deleted file mode 100644 index 60077ebdac..0000000000 --- a/source/module_hamilt_lcao/module_deepks/deepks_fgamma.cpp +++ /dev/null @@ -1,381 +0,0 @@ -//wenfei 2022-1-11 -//This file contains subroutines for calculating F_delta, -#include "module_parameter/parameter.h" -//which is defind as sum_mu,nu rho_mu,nu d/dX (V(D)) - -//There are 2 subroutines in this file: -//1. cal_f_delta_gamma, which is used for gamma point calculation -//2. check_f_delta, which prints F_delta into F_delta.dat for checking - -#ifdef __DEEPKS - -#include "deepks_force.h" -#include "module_base/vector3.h" -#include "module_base/timer.h" -#include "module_base/constants.h" -#include "module_hamilt_lcao/module_hcontainer/atom_pair.h" -#include "module_base/libm/libm.h" - -typedef std::tuple key_tuple; - -//force for gamma only calculations -//Pulay and HF terms are calculated together -void DeePKS_domain::cal_f_delta_gamma( - const std::vector>& dm, - const UnitCell& ucell, - const LCAO_Orbitals& orb, - const Grid_Driver& GridD, - const Parallel_Orbitals& pv, - const int lmaxd, - const int nks, - const std::vector> &kvec_d, - std::vector>>>>& nlm_save, - double** gedm, - ModuleBase::IntArray* inl_index, - ModuleBase::matrix& f_delta, - const bool isstress, - ModuleBase::matrix& svnl_dalpha) -{ - ModuleBase::TITLE("DeePKS_domain", "cal_f_delta_gamma"); - - using TK = double; // temporary used, will be replaced by the template parameter - - f_delta.zero_out(); - - const double Rcut_Alpha = orb.Alpha[0].getRcut(); - - const int nrow = pv.nrow; - - for (int T0 = 0; T0 < ucell.ntype; T0++) - { - Atom* atom0 = &ucell.atoms[T0]; - for (int I0 =0; I0< atom0->na; I0++) - { - const int iat = ucell.itia2iat(T0,I0); - const ModuleBase::Vector3 tau0 = atom0->tau[I0]; - GridD.Find_atom(ucell, atom0->tau[I0] ,T0, I0); - - for (int ad1=0; ad1 tau1 = GridD.getAdjacentTau(ad1); - const Atom* atom1 = &ucell.atoms[T1]; - const int nw1_tot = atom1->nw*PARAM.globalv.npol; - const double Rcut_AO1 = orb.Phi[T1].getRcut(); - - ModuleBase::Vector3 dR1(GridD.getBox(ad1).x, GridD.getBox(ad1).y, GridD.getBox(ad1).z); - - for (int ad2=0; ad2 < GridD.getAdjacentNum()+1 ; ad2++) - { - const int T2 = GridD.getType(ad2); - const int I2 = GridD.getNatom(ad2); - const int ibt2 = ucell.itia2iat(T2,I2); - const ModuleBase::Vector3 tau2 = GridD.getAdjacentTau(ad2); - const Atom* atom2 = &ucell.atoms[T2]; - const int nw2_tot = atom2->nw*PARAM.globalv.npol; - ModuleBase::Vector3 dR2(GridD.getBox(ad2).x, GridD.getBox(ad2).y, GridD.getBox(ad2).z); - - const double Rcut_AO2 = orb.Phi[T2].getRcut(); - const double dist1 = (tau1-tau0).norm() * ucell.lat0; - const double dist2 = (tau2-tau0).norm() * ucell.lat0; - - if (dist1 > Rcut_Alpha + Rcut_AO1 - || dist2 > Rcut_Alpha + Rcut_AO2) - { - continue; - } - - double r0[3]; - double r1[3]; - if(isstress) - { - r1[0] = ( tau1.x - tau0.x) ; - r1[1] = ( tau1.y - tau0.y) ; - r1[2] = ( tau1.z - tau0.z) ; - r0[0] = ( tau2.x - tau0.x) ; - r0[1] = ( tau2.y - tau0.y) ; - r0[2] = ( tau2.z - tau0.z) ; - } - - auto row_indexes = pv.get_indexes_row(ibt1); - auto col_indexes = pv.get_indexes_col(ibt2); - - if(row_indexes.size() * col_indexes.size() == 0) - { - continue; - } - - int dRx; - int dRy; - int dRz; - if constexpr (std::is_same::value) - { - dRx = 0; - dRy = 0; - dRz = 0; - } - else - { - dRx = dR2.x - dR1.x; - dRy = dR2.y - dR1.y; - dRz = dR2.z - dR1.z; - } - - hamilt::AtomPair dm_pair(ibt1, ibt2, dRx, dRy, dRz, &pv); - - dm_pair.allocate(nullptr, 1); - - if constexpr (std::is_same::value) - { - for(int is=0;is kphase = std::complex(cosp, sinp); - // if(ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)) - // { - // dm_pair.add_from_matrix(dm[ik].data(), pv.get_row_size(), kphase, 1); - // } - // else - // { - // dm_pair.add_from_matrix(dm[ik].data(), pv.get_col_size(), kphase, 0); - // } - // } - } - - const double* dm_current = dm_pair.get_pointer(); - - for (int iw1=0; iw1 nlm1; - std::vector> nlm2; - nlm2.resize(3); - - if constexpr (std::is_same::value) - { - nlm1 = nlm_save[iat][ad1][row_indexes[iw1]][0]; - for(int dim=0;dim<3;dim++) - { - nlm2[dim] = nlm_save[iat][ad2][col_indexes[iw2]][dim+1]; - } - } - else - { - // nlm1 = nlm_save_k[iat][key_1][row_indexes[iw1]][0]; - // for(int dim=0;dim<3;dim++) - // { - // nlm2[dim] = nlm_save_k[iat][key_2][col_indexes[iw2]][dim+1]; - // } - } - - assert(nlm1.size()==nlm2[0].size()); - - if(!PARAM.inp.deepks_equiv) - { - int ib=0; - for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0) - { - for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0) - { - const int inl = inl_index[T0](I0, L0, N0); - const int nm = 2*L0+1; - for (int m1 = 0;m1 < nm; ++m1) - { - for (int m2 = 0; m2 < nm; ++m2) - { - for(int dim=0;dim<3;dim++) - { - nlm[dim] += gedm[inl][m1*nm+m2]*nlm1[ib+m1]*nlm2[dim][ib+m2]; - } - } - } - ib+=nm; - } - } - assert(ib==nlm1.size()); - } - else - { - int nproj = 0; - for(int il = 0; il < lmaxd + 1; il++) - { - nproj += (2 * il + 1) * orb.Alpha[0].getNchi(il); - } - for(int iproj = 0; iproj < nproj; iproj ++) - { - for(int jproj = 0; jproj < nproj; jproj ++) - { - for(int dim=0;dim<3;dim++) - { - nlm[dim] += gedm[iat][iproj*nproj+jproj] * nlm1[iproj] * nlm2[dim][jproj]; - } - } - } - } - - // HF term is minus, only one projector for each atom force. - f_delta(iat, 0) -= 2 * *dm_current * nlm[0]; - f_delta(iat, 1) -= 2 * *dm_current * nlm[1]; - f_delta(iat, 2) -= 2 * *dm_current * nlm[2]; - - // Pulay term is plus, only one projector for each atom force. - f_delta(ibt2, 0) += 2 * *dm_current * nlm[0]; - f_delta(ibt2, 1) += 2 * *dm_current * nlm[1]; - f_delta(ibt2, 2) += 2 * *dm_current * nlm[2]; - - if(isstress) - { - if constexpr (std::is_same::value) - { - nlm1 = nlm_save[iat][ad2][col_indexes[iw2]][0]; - for(int i=0;i<3;i++) - { - nlm2[i] = nlm_save[iat][ad1][row_indexes[iw1]][i+1]; - } - } - else - { - // nlm1 = nlm_save_k[iat][key_2][col_indexes[iw2]][0]; - // for(int i=0;i<3;i++) - // { - // nlm2[i] = nlm_save_k[iat][key_1][row_indexes[iw1]][i+1]; - // } - } - - assert(nlm1.size()==nlm2[0].size()); - - if(!PARAM.inp.deepks_equiv) - { - int ib=0; - for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0) - { - for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0) - { - const int inl = inl_index[T0](I0, L0, N0); - const int nm = 2*L0+1; - - for (int m1 = 0;m1 < nm; ++m1) - { - for (int m2 = 0; m2 < nm; ++m2) - { - for(int dim=0;dim<3;++dim) - { - nlm_t[dim] += gedm[inl][m1*nm+m2]*nlm1[ib+m1]*nlm2[dim][ib+m2]; - } - } - } - ib+=nm; - } - } - assert(ib==nlm1.size()); - } - else - { - int nproj = 0; - for(int il = 0; il < lmaxd + 1; il++) - { - nproj += (2 * il + 1) * orb.Alpha[0].getNchi(il); - } - for(int iproj = 0; iproj < nproj; iproj ++) - { - for(int jproj = 0; jproj < nproj; jproj ++) - { - for(int dim=0;dim<3;dim++) - { - nlm_t[dim] += gedm[iat][iproj*nproj+jproj] * nlm1[iproj] * nlm2[dim][jproj]; - } - } - } - } - - for(int ipol=0;ipol<3;ipol++) - { - for(int jpol=ipol;jpol<3;jpol++) - { - svnl_dalpha(ipol, jpol) += *dm_current * - (nlm[jpol] * r0[ipol] + nlm_t[jpol] * r1[ipol]); - } - } - } - dm_current++; - }//iw2 - }//iw1 - }//ad2 - }//ad1 - }//end I0 - }//end T0 - - if(isstress) - { - assert(ucell.omega>0.0); - const double weight = ucell.lat0 / ucell.omega ; - for(int i=0;i<3;++i) - { - for(int j=0;j<3;++j) - { - if(j>i) - { - svnl_dalpha(j,i) = svnl_dalpha(i,j); - } - svnl_dalpha(i,j) *= weight; - } - } - } - - return; -} - - -//prints forces and stress from DeePKS (LCAO) -void DeePKS_domain::check_f_delta( - const int nat, - ModuleBase::matrix& f_delta, - ModuleBase::matrix& svnl_dalpha) -{ - ModuleBase::TITLE("LCAO_Deepks", "check_F_delta"); - - std::ofstream ofs("F_delta.dat"); - ofs< key_tuple; // used in nlm_save_k - -void DeePKS_domain::cal_f_delta_k( - const std::vector>>& dm, /**<[in] density matrix*/ - const UnitCell& ucell, - const LCAO_Orbitals& orb, - const Grid_Driver& GridD, - const Parallel_Orbitals& pv, - const int lmaxd, - const int nks, - const std::vector>& kvec_d, - std::vector>>>>& nlm_save_k, - double** gedm, - ModuleBase::IntArray* inl_index, - ModuleBase::matrix& f_delta, - const bool isstress, - ModuleBase::matrix& svnl_dalpha) -{ - ModuleBase::TITLE("LCAO_Deepks", "cal_f_delta_hf_k_new"); - ModuleBase::timer::tick("LCAO_Deepks","cal_f_delta_hf_k_new"); - f_delta.zero_out(); - - const double Rcut_Alpha = orb.Alpha[0].getRcut(); - const int nrow = pv.nrow; - - for (int T0 = 0; T0 < ucell.ntype; T0++) - { - Atom* atom0 = &ucell.atoms[T0]; - for (int I0 =0; I0< atom0->na; I0++) - { - const int iat = ucell.itia2iat(T0,I0); - const ModuleBase::Vector3 tau0 = atom0->tau[I0]; - GridD.Find_atom(ucell, atom0->tau[I0] ,T0, I0); - - for (int ad1=0; ad1 tau1 = GridD.getAdjacentTau(ad1); - const Atom* atom1 = &ucell.atoms[T1]; - const int nw1_tot = atom1->nw*PARAM.globalv.npol; - const double Rcut_AO1 = orb.Phi[T1].getRcut(); - - ModuleBase::Vector3 dR1(GridD.getBox(ad1).x, GridD.getBox(ad1).y, GridD.getBox(ad1).z); - - for (int ad2=0; ad2 < GridD.getAdjacentNum()+1 ; ad2++) - { - const int T2 = GridD.getType(ad2); - const int I2 = GridD.getNatom(ad2); - const int ibt2 = ucell.itia2iat(T2,I2); - const ModuleBase::Vector3 tau2 = GridD.getAdjacentTau(ad2); - const Atom* atom2 = &ucell.atoms[T2]; - const int nw2_tot = atom2->nw*PARAM.globalv.npol; - ModuleBase::Vector3 dR2(GridD.getBox(ad2).x, GridD.getBox(ad2).y, GridD.getBox(ad2).z); - - const double Rcut_AO2 = orb.Phi[T2].getRcut(); - const double dist1 = (tau1-tau0).norm() * ucell.lat0; - const double dist2 = (tau2-tau0).norm() * ucell.lat0; - - if (dist1 > Rcut_Alpha + Rcut_AO1 - || dist2 > Rcut_Alpha + Rcut_AO2) - { - continue; - } - - double r0[3]; - double r1[3]; - if(isstress) - { - r1[0] = ( tau1.x - tau0.x) ; - r1[1] = ( tau1.y - tau0.y) ; - r1[2] = ( tau1.z - tau0.z) ; - r0[0] = ( tau2.x - tau0.x) ; - r0[1] = ( tau2.y - tau0.y) ; - r0[2] = ( tau2.z - tau0.z) ; - } - - auto row_indexes = pv.get_indexes_row(ibt1); - auto col_indexes = pv.get_indexes_col(ibt2); - if(row_indexes.size() * col_indexes.size() == 0) { continue; -} - - hamilt::AtomPair dm_pair(ibt1, ibt2, (dR2-dR1).x, (dR2-dR1).y, (dR2-dR1).z, &pv); - dm_pair.allocate(nullptr, 1); - for(int ik=0;ik kphase = std::complex(cosp, sinp); - if(ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)) - { - dm_pair.add_from_matrix(dm[ik].data(), pv.get_row_size(), kphase, 1); - } - else - { - dm_pair.add_from_matrix(dm[ik].data(), pv.get_col_size(), kphase, 0); - } - } - - const double* dm_current = dm_pair.get_pointer(); - - for (int iw1=0; iw1 nlm1 = nlm_save_k[iat][key_1][row_indexes[iw1]][0]; - std::vector> nlm2; - nlm2.resize(3); - for(int dim=0;dim<3;dim++) - { - nlm2[dim] = nlm_save_k[iat][key_2][col_indexes[iw2]][dim+1]; - } - - assert(nlm1.size()==nlm2[0].size()); - - if(!PARAM.inp.deepks_equiv) - { - int ib=0; - for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0) - { - for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0) - { - const int inl = inl_index[T0](I0, L0, N0); - const int nm = 2*L0+1; - for (int m1 = 0;m1 < nm; ++m1) - { - for (int m2 = 0; m2 < nm; ++m2) - { - for(int dim=0;dim<3;dim++) - { - nlm[dim] += gedm[inl][m1*nm+m2]*nlm1[ib+m1]*nlm2[dim][ib+m2]; - } - } - } - ib+=nm; - } - } - assert(ib==nlm1.size()); - } - else - { - int nproj = 0; - for(int il = 0; il < lmaxd + 1; il++) - { - nproj += (2 * il + 1) * orb.Alpha[0].getNchi(il); - } - for(int iproj = 0; iproj < nproj; iproj ++) - { - for(int jproj = 0; jproj < nproj; jproj ++) - { - for(int dim=0;dim<3;dim++) - { - nlm[dim] += gedm[iat][iproj*nproj+jproj] * nlm1[iproj] * nlm2[dim][jproj]; - } - } - } - } - - // Pulay term is plus - f_delta(ibt2, 0) += 2.0 * *dm_current * nlm[0]; - f_delta(ibt2, 1) += 2.0 * *dm_current * nlm[1]; - f_delta(ibt2, 2) += 2.0 * *dm_current * nlm[2]; - - // HF term is minus, only one projector for each atom force. - f_delta(iat, 0) -= 2.0 * *dm_current * nlm[0]; - f_delta(iat, 1) -= 2.0 * *dm_current * nlm[1]; - f_delta(iat, 2) -= 2.0 * *dm_current * nlm[2]; - - if(isstress) - { - nlm1 = nlm_save_k[iat][key_2][col_indexes[iw2]][0]; - for(int i=0;i<3;i++) - { - nlm2[i] = nlm_save_k[iat][key_1][row_indexes[iw1]][i+1]; - } - - assert(nlm1.size()==nlm2[0].size()); - - if(!PARAM.inp.deepks_equiv) - { - int ib=0; - for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0) - { - for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0) - { - const int inl = inl_index[T0](I0, L0, N0); - const int nm = 2*L0+1; - for (int m1 = 0;m1 < nm; ++m1) - { - for (int m2 = 0; m2 < nm; ++m2) - { - for(int dim=0;dim<3;dim++) - { - nlm_t[dim] += gedm[inl][m1*nm+m2]*nlm1[ib+m1]*nlm2[dim][ib+m2]; - } - } - } - ib+=nm; - } - } - assert(ib==nlm1.size()); - } - else - { - int nproj = 0; - for(int il = 0; il < lmaxd + 1; il++) - { - nproj += (2 * il + 1) * orb.Alpha[0].getNchi(il); - } - for(int iproj = 0; iproj < nproj; iproj ++) - { - for(int jproj = 0; jproj < nproj; jproj ++) - { - for(int dim=0;dim<3;dim++) - { - nlm_t[dim] += gedm[iat][iproj*nproj+jproj] * nlm1[iproj] * nlm2[dim][jproj]; - } - } - } - } - - for(int ipol=0;ipol<3;ipol++) - { - svnl_dalpha(0,ipol) -= *dm_current * (nlm[0] * r0[ipol] + nlm_t[0] * r1[ipol])* -1.0; - svnl_dalpha(1,ipol) -= *dm_current * (nlm[1] * r0[ipol] + nlm_t[1] * r1[ipol])* -1.0; - svnl_dalpha(2,ipol) -= *dm_current * (nlm[2] * r0[ipol] + nlm_t[2] * r1[ipol])* -1.0; - } - - } - dm_current++; - }//iw2 - }//iw1 - }//ad2 - }//ad1 - }//end I0 - }//end T0 - - if(isstress) - { - assert(ucell.omega>0.0); - const double weight = ucell.lat0 / ucell.omega ; - for(int i=0;i<3;++i) - { - for(int j=0;j<3;++j) - { - if(j>i) { svnl_dalpha(j,i) = svnl_dalpha(i,j); -} - svnl_dalpha(i,j) *= weight ; - } - } - } - ModuleBase::timer::tick("LCAO_Deepks","cal_f_delta_hf_k_new"); - return; -} - -#endif diff --git a/source/module_hamilt_lcao/module_deepks/deepks_force.cpp b/source/module_hamilt_lcao/module_deepks/deepks_force.cpp new file mode 100644 index 0000000000..069109229f --- /dev/null +++ b/source/module_hamilt_lcao/module_deepks/deepks_force.cpp @@ -0,0 +1,381 @@ +// wenfei 2022-1-11 +// This file contains subroutines for calculating F_delta, +#include "module_parameter/parameter.h" +// which is defind as sum_mu,nu rho_mu,nu d/dX (V(D)) + +// There are 2 subroutines in this file: +// 1. cal_f_delta_gamma, which is used for gamma point calculation +// 2. check_f_delta, which prints F_delta into F_delta.dat for checking + +#ifdef __DEEPKS + +#include "deepks_force.h" +#include "module_base/constants.h" +#include "module_base/libm/libm.h" +#include "module_base/timer.h" +#include "module_base/vector3.h" +#include "module_hamilt_lcao/module_hcontainer/atom_pair.h" + +template +void DeePKS_domain::cal_f_delta(const std::vector>& dm, + const UnitCell& ucell, + const LCAO_Orbitals& orb, + const Grid_Driver& GridD, + const Parallel_Orbitals& pv, + const int lmaxd, + const int nks, + const std::vector>& kvec_d, + std::vector*> psialpha, + double** gedm, + ModuleBase::IntArray* inl_index, + ModuleBase::matrix& f_delta, + const bool isstress, + ModuleBase::matrix& svnl_dalpha) +{ + ModuleBase::TITLE("DeePKS_domain", "cal_f_delta"); + + f_delta.zero_out(); + + const double Rcut_Alpha = orb.Alpha[0].getRcut(); + + const int nrow = pv.nrow; + + for (int T0 = 0; T0 < ucell.ntype; T0++) + { + Atom* atom0 = &ucell.atoms[T0]; + for (int I0 = 0; I0 < atom0->na; I0++) + { + const int iat = ucell.itia2iat(T0, I0); + const ModuleBase::Vector3 tau0 = atom0->tau[I0]; + GridD.Find_atom(ucell, atom0->tau[I0], T0, I0); + + for (int ad1 = 0; ad1 < GridD.getAdjacentNum() + 1; ++ad1) + { + const int T1 = GridD.getType(ad1); + const int I1 = GridD.getNatom(ad1); + const int ibt1 = ucell.itia2iat(T1, I1); + const ModuleBase::Vector3 tau1 = GridD.getAdjacentTau(ad1); + const Atom* atom1 = &ucell.atoms[T1]; + const int nw1_tot = atom1->nw * PARAM.globalv.npol; + const double Rcut_AO1 = orb.Phi[T1].getRcut(); + + ModuleBase::Vector3 dR1(GridD.getBox(ad1).x, GridD.getBox(ad1).y, GridD.getBox(ad1).z); + + for (int ad2 = 0; ad2 < GridD.getAdjacentNum() + 1; ad2++) + { + const int T2 = GridD.getType(ad2); + const int I2 = GridD.getNatom(ad2); + const int ibt2 = ucell.itia2iat(T2, I2); + const ModuleBase::Vector3 tau2 = GridD.getAdjacentTau(ad2); + const Atom* atom2 = &ucell.atoms[T2]; + const int nw2_tot = atom2->nw * PARAM.globalv.npol; + ModuleBase::Vector3 dR2(GridD.getBox(ad2).x, GridD.getBox(ad2).y, GridD.getBox(ad2).z); + + const double Rcut_AO2 = orb.Phi[T2].getRcut(); + const double dist1 = (tau1 - tau0).norm() * ucell.lat0; + const double dist2 = (tau2 - tau0).norm() * ucell.lat0; + + if (dist1 > Rcut_Alpha + Rcut_AO1 || dist2 > Rcut_Alpha + Rcut_AO2) + { + continue; + } + + double r0[3]; + double r1[3]; + if (isstress) + { + r1[0] = (tau1.x - tau0.x); + r1[1] = (tau1.y - tau0.y); + r1[2] = (tau1.z - tau0.z); + r0[0] = (tau2.x - tau0.x); + r0[1] = (tau2.y - tau0.y); + r0[2] = (tau2.z - tau0.z); + } + + auto row_indexes = pv.get_indexes_row(ibt1); + auto col_indexes = pv.get_indexes_col(ibt2); + + if (row_indexes.size() * col_indexes.size() == 0) + { + continue; + } + + int dRx; + int dRy; + int dRz; + if constexpr (std::is_same::value) // for gamma-only + { + dRx = 0; + dRy = 0; + dRz = 0; + } + else // for multi-k + { + dRx = dR2.x - dR1.x; + dRy = dR2.y - dR1.y; + dRz = dR2.z - dR1.z; + } + ModuleBase::Vector3 dR(dRx, dRy, dRz); + + hamilt::AtomPair dm_pair(ibt1, ibt2, dRx, dRy, dRz, &pv); + + dm_pair.allocate(nullptr, 1); + + if constexpr (std::is_same::value) // for gamma-only + { + for (int is = 0; is < dm.size(); is++) + { + if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)) + { + dm_pair.add_from_matrix(dm[is].data(), pv.get_row_size(), 1.0, 1); + } + else + { + dm_pair.add_from_matrix(dm[is].data(), pv.get_col_size(), 1.0, 0); + } + } + } + else // for multi-k + { + for (int ik = 0; ik < nks; ik++) + { + const double arg = -(kvec_d[ik] * dR) * ModuleBase::TWO_PI; + double sinp, cosp; + ModuleBase::libm::sincos(arg, &sinp, &cosp); + const std::complex kphase = std::complex(cosp, sinp); + if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)) + { + dm_pair.add_from_matrix(dm[ik].data(), pv.get_row_size(), kphase, 1); + } + else + { + dm_pair.add_from_matrix(dm[ik].data(), pv.get_col_size(), kphase, 0); + } + } + } + + const double* dm_current = dm_pair.get_pointer(); + + for (int iw1 = 0; iw1 < row_indexes.size(); ++iw1) + { + for (int iw2 = 0; iw2 < col_indexes.size(); ++iw2) + { + double nlm[3] = {0, 0, 0}; + double nlm_t[3] = {0, 0, 0}; // for stress + + hamilt::BaseMatrix* overlap_1 = psialpha[0]->find_matrix(iat, ibt1, dR1); + hamilt::BaseMatrix* overlap_2 = psialpha[0]->find_matrix(iat, ibt2, dR2); + std::vector*> grad_overlap_1(3); + std::vector*> grad_overlap_2(3); + for (int i = 0; i < 3; ++i) + { + grad_overlap_1[i] = psialpha[i + 1]->find_matrix(iat, ibt1, dR1); + grad_overlap_2[i] = psialpha[i + 1]->find_matrix(iat, ibt2, dR2); + } + + assert(overlap_1->get_col_size() == overlap_2->get_col_size()); + + if (!PARAM.inp.deepks_equiv) + { + int ib = 0; + for (int L0 = 0; L0 <= orb.Alpha[0].getLmax(); ++L0) + { + for (int N0 = 0; N0 < orb.Alpha[0].getNchi(L0); ++N0) + { + const int inl = inl_index[T0](I0, L0, N0); + const int nm = 2 * L0 + 1; + for (int m1 = 0; m1 < nm; ++m1) + { + for (int m2 = 0; m2 < nm; ++m2) + { + for (int dim = 0; dim < 3; dim++) + { + nlm[dim] + += gedm[inl][m1 * nm + m2] + * overlap_1->get_value(row_indexes[iw1], ib + m1) + * grad_overlap_2[dim]->get_value(col_indexes[iw2], ib + m2); + } + } + } + ib += nm; + } + } + assert(ib == overlap_1->get_col_size()); + } + else + { + int nproj = 0; + for (int il = 0; il < lmaxd + 1; il++) + { + nproj += (2 * il + 1) * orb.Alpha[0].getNchi(il); + } + for (int iproj = 0; iproj < nproj; iproj++) + { + for (int jproj = 0; jproj < nproj; jproj++) + { + for (int dim = 0; dim < 3; dim++) + { + nlm[dim] += gedm[iat][iproj * nproj + jproj] + * overlap_1->get_value(row_indexes[iw1], iproj) + * grad_overlap_2[dim]->get_value(col_indexes[iw2], jproj); + } + } + } + } + + // HF term is minus, only one projector for each atom force. + f_delta(iat, 0) -= 2.0 * *dm_current * nlm[0]; + f_delta(iat, 1) -= 2.0 * *dm_current * nlm[1]; + f_delta(iat, 2) -= 2.0 * *dm_current * nlm[2]; + + // Pulay term is plus, only one projector for each atom force. + f_delta(ibt2, 0) += 2.0 * *dm_current * nlm[0]; + f_delta(ibt2, 1) += 2.0 * *dm_current * nlm[1]; + f_delta(ibt2, 2) += 2.0 * *dm_current * nlm[2]; + + if (isstress) + { + if (!PARAM.inp.deepks_equiv) + { + int ib = 0; + for (int L0 = 0; L0 <= orb.Alpha[0].getLmax(); ++L0) + { + for (int N0 = 0; N0 < orb.Alpha[0].getNchi(L0); ++N0) + { + const int inl = inl_index[T0](I0, L0, N0); + const int nm = 2 * L0 + 1; + + for (int m1 = 0; m1 < nm; ++m1) + { + for (int m2 = 0; m2 < nm; ++m2) + { + for (int dim = 0; dim < 3; ++dim) + { + nlm_t[dim] += gedm[inl][m1 * nm + m2] + * overlap_2->get_value(col_indexes[iw2], ib + m1) + * grad_overlap_1[dim]->get_value(row_indexes[iw1], + ib + m2); + } + } + } + ib += nm; + } + } + assert(ib == overlap_2->get_col_size()); + } + else + { + int nproj = 0; + for (int il = 0; il < lmaxd + 1; il++) + { + nproj += (2 * il + 1) * orb.Alpha[0].getNchi(il); + } + for (int iproj = 0; iproj < nproj; iproj++) + { + for (int jproj = 0; jproj < nproj; jproj++) + { + for (int dim = 0; dim < 3; dim++) + { + nlm_t[dim] += gedm[iat][iproj * nproj + jproj] + * overlap_2->get_value(col_indexes[iw2], iproj) + * grad_overlap_1[dim]->get_value(row_indexes[iw1], jproj); + } + } + } + } + + for (int ipol = 0; ipol < 3; ipol++) + { + for (int jpol = ipol; jpol < 3; jpol++) + { + svnl_dalpha(ipol, jpol) + += *dm_current * (nlm[ipol] * r0[jpol] + nlm_t[ipol] * r1[jpol]); + } + } + } + dm_current++; + } // iw2 + } // iw1 + } // ad2 + } // ad1 + } // end I0 + } // end T0 + + if (isstress) + { + assert(ucell.omega > 0.0); + const double weight = ucell.lat0 / ucell.omega; + // use upper triangle to make symmetric stress tensor + for (int i = 0; i < 3; ++i) + { + for (int j = 0; j < 3; ++j) + { + if (j > i) + { + svnl_dalpha(j, i) = svnl_dalpha(i, j); + } + svnl_dalpha(i, j) *= weight; + } + } + } + + return; +} + +// prints forces and stress from DeePKS (LCAO) +void DeePKS_domain::check_f_delta(const int nat, ModuleBase::matrix& f_delta, ModuleBase::matrix& svnl_dalpha) +{ + ModuleBase::TITLE("LCAO_Deepks", "check_F_delta"); + + std::ofstream ofs("F_delta.dat"); + ofs << std::setprecision(10); + + for (int iat = 0; iat < nat; iat++) + { + ofs << f_delta(iat, 0) << " " << f_delta(iat, 1) << " " << f_delta(iat, 2) << std::endl; + } + + std::ofstream ofs1("stress_delta.dat"); + ofs1 << std::setprecision(10); + for (int ipol = 0; ipol < 3; ipol++) + { + for (int jpol = 0; jpol < 3; jpol++) + { + ofs1 << svnl_dalpha(ipol, jpol) << " "; + } + ofs1 << std::endl; + } + return; +} + +template void DeePKS_domain::cal_f_delta(const std::vector>& dm, + const UnitCell& ucell, + const LCAO_Orbitals& orb, + const Grid_Driver& GridD, + const Parallel_Orbitals& pv, + const int lmaxd, + const int nks, + const std::vector>& kvec_d, + std::vector*> psialpha, + double** gedm, + ModuleBase::IntArray* inl_index, + ModuleBase::matrix& f_delta, + const bool isstress, + ModuleBase::matrix& svnl_dalpha); + +template void DeePKS_domain::cal_f_delta>(const std::vector>>& dm, + const UnitCell& ucell, + const LCAO_Orbitals& orb, + const Grid_Driver& GridD, + const Parallel_Orbitals& pv, + const int lmaxd, + const int nks, + const std::vector>& kvec_d, + std::vector*> psialpha, + double** gedm, + ModuleBase::IntArray* inl_index, + ModuleBase::matrix& f_delta, + const bool isstress, + ModuleBase::matrix& svnl_dalpha); + +#endif diff --git a/source/module_hamilt_lcao/module_deepks/deepks_force.h b/source/module_hamilt_lcao/module_deepks/deepks_force.h index 58474b9d56..e6f5b382d0 100644 --- a/source/module_hamilt_lcao/module_deepks/deepks_force.h +++ b/source/module_hamilt_lcao/module_deepks/deepks_force.h @@ -20,21 +20,19 @@ namespace DeePKS_domain { //------------------------ - // LCAO_deepks_fgamma.cpp - // LCAO_deepks_fk.cpp + // LCAO_deepks_force.cpp //------------------------ // This file contains subroutines for calculating F_delta, // which is defind as sum_mu,nu rho_mu,nu d/dX (V(D)) - // There are 3 subroutines in this file: - // 1. cal_f_delta_gamma, which is used for gamma point calculation - // 2. cal_f_delta_k, which is used for multi-k calculation - // 3. check_f_delta, which prints F_delta into F_delta.dat for checking + // There are 2 subroutines in this file: + // 1. cal_f_delta, which is used for F_delta calculation + // 2. check_f_delta, which prints F_delta into F_delta.dat for checking -// for gamma only, pulay and HF terms of force are calculated together -void cal_f_delta_gamma( - const std::vector>& dm, +template +void cal_f_delta( + const std::vector>& dm, const UnitCell& ucell, const LCAO_Orbitals& orb, const Grid_Driver& GridD, @@ -42,27 +40,7 @@ void cal_f_delta_gamma( const int lmaxd, const int nks, const std::vector>& kvec_d, - std::vector>>>>& nlm_save, - double** gedm, - ModuleBase::IntArray* inl_index, - ModuleBase::matrix& f_delta, - const bool isstress, - ModuleBase::matrix& svnl_dalpha); - -// for multi-k, pulay and HF terms of force are calculated together - -typedef std::tuple key_tuple; - -void cal_f_delta_k( - const std::vector>>& dm, /**<[in] density matrix*/ - const UnitCell& ucell, - const LCAO_Orbitals& orb, - const Grid_Driver& GridD, - const Parallel_Orbitals& pv, - const int lmaxd, - const int nks, - const std::vector>& kvec_d, - std::vector>>>>& nlm_save_k, + std::vector*> psialpha, double** gedm, ModuleBase::IntArray* inl_index, ModuleBase::matrix& f_delta, diff --git a/source/module_hamilt_lcao/module_deepks/orbital_precalc.cpp b/source/module_hamilt_lcao/module_deepks/orbital_precalc.cpp index da8dcdd539..a8fec8f9e9 100644 --- a/source/module_hamilt_lcao/module_deepks/orbital_precalc.cpp +++ b/source/module_hamilt_lcao/module_deepks/orbital_precalc.cpp @@ -33,11 +33,11 @@ void LCAO_Deepks::cal_orbital_precalc(const std::vector>& dm_hl, this->init_orbital_pdm_shell(nks); - for (int T0 = 0; T0 < ucell.ntype; T0++) + for (int T0 = 0; T0 < ucell.ntype; T0++) { Atom* atom0 = &ucell.atoms[T0]; - for (int I0 = 0; I0 < atom0->na; I0++) + for (int I0 = 0; I0 < atom0->na; I0++) { const int iat = ucell.itia2iat(T0, I0); const ModuleBase::Vector3 tau0 = atom0->tau[I0]; @@ -47,9 +47,9 @@ void LCAO_Deepks::cal_orbital_precalc(const std::vector>& dm_hl, std::vector trace_alpha_row; std::vector trace_alpha_col; int ib = 0; - for (int L0 = 0; L0 <= orb.Alpha[0].getLmax(); ++L0) + for (int L0 = 0; L0 <= orb.Alpha[0].getLmax(); ++L0) { - for (int N0 = 0; N0 < orb.Alpha[0].getNchi(L0); ++N0) + for (int N0 = 0; N0 < orb.Alpha[0].getNchi(L0); ++N0) { const int inl = this->inl_index[T0](I0, L0, N0); const int nm = 2 * L0 + 1; @@ -67,33 +67,27 @@ void LCAO_Deepks::cal_orbital_precalc(const std::vector>& dm_hl, } const int trace_alpha_size = trace_alpha_row.size(); - for (int ad1 = 0; ad1 < GridD.getAdjacentNum() + 1; ++ad1) + for (int ad1 = 0; ad1 < GridD.getAdjacentNum() + 1; ++ad1) { const int T1 = GridD.getType(ad1); const int I1 = GridD.getNatom(ad1); const int ibt1 = ucell.itia2iat(T1, I1); - const ModuleBase::Vector3 tau1 - = GridD.getAdjacentTau(ad1); + const ModuleBase::Vector3 tau1 = GridD.getAdjacentTau(ad1); const Atom* atom1 = &ucell.atoms[T1]; const int nw1_tot = atom1->nw * PARAM.globalv.npol; const double Rcut_AO1 = orb.Phi[T1].getRcut(); const double dist1 = (tau1 - tau0).norm() * ucell.lat0; - if (dist1 >= Rcut_Alpha + Rcut_AO1) + if (dist1 >= Rcut_Alpha + Rcut_AO1) { continue; } - ModuleBase::Vector3 dR1(GridD.getBox(ad1).x, - GridD.getBox(ad1).y, - GridD.getBox(ad1).z); - - key_tuple key_1(ibt1, dR1.x, dR1.y, dR1.z); + ModuleBase::Vector3 dR1(GridD.getBox(ad1).x, GridD.getBox(ad1).y, GridD.getBox(ad1).z); if constexpr (std::is_same>::value) { - if (this->nlm_save_k[iat].find(key_1) - == this->nlm_save_k[iat].end()) + if (this->psialpha[0]->find_matrix(iat, ibt1, dR1.x, dR1.y, dR1.z) == nullptr) { continue; } @@ -103,67 +97,53 @@ void LCAO_Deepks::cal_orbital_precalc(const std::vector>& dm_hl, const int row_size = row_indexes.size(); - if (row_size == 0) + if (row_size == 0) { continue; } std::vector s_1t(trace_alpha_size * row_size); - std::vector g_1dmt(nks * trace_alpha_size * row_size, 0.0); + std::vector g_1dmt(nks * trace_alpha_size * row_size, 0.0); - for (int irow = 0; irow < row_size; irow++) - { - double* row_ptr; - if constexpr (std::is_same::value) - { - row_ptr = this->nlm_save[iat][ad1][row_indexes[irow]][0].data(); - } - else - { - row_ptr = this->nlm_save_k[iat][key_1][row_indexes[irow]][0].data(); - } - for (int i = 0; i < trace_alpha_size; i++) + for (int irow = 0; irow < row_size; irow++) + { + hamilt::BaseMatrix* row_ptr = this->psialpha[0]->find_matrix(iat, ibt1, dR1); + for (int i = 0; i < trace_alpha_size; i++) { - s_1t[i * row_size + irow] = row_ptr[trace_alpha_row[i]]; + s_1t[i * row_size + irow] = row_ptr->get_value(row_indexes[irow], trace_alpha_row[i]); } } - for (int ad2 = 0; ad2 < GridD.getAdjacentNum() + 1; ad2++) + for (int ad2 = 0; ad2 < GridD.getAdjacentNum() + 1; ad2++) { const int T2 = GridD.getType(ad2); const int I2 = GridD.getNatom(ad2); const int ibt2 = ucell.itia2iat(T2, I2); if constexpr (std::is_same>::value) // Why only for multi-k? { - if (ibt1 > ibt2) + if (ibt1 > ibt2) { continue; } } - const ModuleBase::Vector3 tau2 - = GridD.getAdjacentTau(ad2); + const ModuleBase::Vector3 tau2 = GridD.getAdjacentTau(ad2); const Atom* atom2 = &ucell.atoms[T2]; const int nw2_tot = atom2->nw * PARAM.globalv.npol; const double Rcut_AO2 = orb.Phi[T2].getRcut(); const double dist2 = (tau2 - tau0).norm() * ucell.lat0; - if (dist2 >= Rcut_Alpha + Rcut_AO2) + if (dist2 >= Rcut_Alpha + Rcut_AO2) { continue; } - ModuleBase::Vector3 dR2(GridD.getBox(ad2).x, - GridD.getBox(ad2).y, - GridD.getBox(ad2).z); + ModuleBase::Vector3 dR2(GridD.getBox(ad2).x, GridD.getBox(ad2).y, GridD.getBox(ad2).z); - key_tuple key_2(ibt2, dR2.x, dR2.y, dR2.z); - if constexpr (std::is_same>::value) { - if (this->nlm_save_k[iat].find(key_2) - == this->nlm_save_k[iat].end()) + if (this->psialpha[0]->find_matrix(iat, ibt2, dR2.x, dR2.y, dR2.z) == nullptr) { continue; } @@ -171,27 +151,18 @@ void LCAO_Deepks::cal_orbital_precalc(const std::vector>& dm_hl, auto col_indexes = pv->get_indexes_col(ibt2); const int col_size = col_indexes.size(); - if (col_size == 0) - { - continue; - } + if (col_size == 0) + { + continue; + } std::vector s_2t(trace_alpha_size * col_size); - for (int icol = 0; icol < col_size; icol++) + for (int icol = 0; icol < col_size; icol++) { - double* col_ptr; - if constexpr (std::is_same::value) - { - col_ptr = this->nlm_save[iat][ad2][col_indexes[icol]][0].data(); - } - else - { - col_ptr = this->nlm_save_k[iat][key_2][col_indexes[icol]][0].data(); - } - for (int i = 0; i < trace_alpha_size; i++) + hamilt::BaseMatrix* col_ptr = this->psialpha[0]->find_matrix(iat, ibt2, dR2); + for (int i = 0; i < trace_alpha_size; i++) { - s_2t[i * col_size + icol] - = col_ptr[trace_alpha_col[i]]; + s_2t[i * col_size + icol] = col_ptr->get_value(col_indexes[icol], trace_alpha_col[i]); } } @@ -201,69 +172,51 @@ void LCAO_Deepks::cal_orbital_precalc(const std::vector>& dm_hl, if constexpr (std::is_same::value) { - for (int is = 0; is < PARAM.inp.nspin; is++) + for (int is = 0; is < PARAM.inp.nspin; is++) { - hamilt::AtomPair dm_pair(ibt1, - ibt2, - 0, - 0, - 0, - pv); + hamilt::AtomPair dm_pair(ibt1, ibt2, 0, 0, 0, pv); dm_pair.allocate(dm_array.data(), 0); - if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)) + if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)) { - dm_pair.add_from_matrix(dm_hl[0][is].c, - pv->get_row_size(), - 1.0, - 1); - } - else + dm_pair.add_from_matrix(dm_hl[0][is].c, pv->get_row_size(), 1.0, 1); + } + else { - dm_pair.add_from_matrix(dm_hl[0][is].c, - pv->get_col_size(), - 1.0, - 0); + dm_pair.add_from_matrix(dm_hl[0][is].c, pv->get_col_size(), 1.0, 0); } } // is } else { - for (int ik = 0; ik < nks; ik++) + for (int ik = 0; ik < nks; ik++) { hamilt::AtomPair dm_pair(ibt1, - ibt2, - (dR2 - dR1).x, - (dR2 - dR1).y, - (dR2 - dR1).z, - pv); + ibt2, + (dR2 - dR1).x, + (dR2 - dR1).y, + (dR2 - dR1).z, + pv); dm_pair.allocate(&dm_array[ik * row_size * col_size], 0); const double arg - = -(kvec_d[ik] * (dR2 - dR1)) * ModuleBase::TWO_PI; + = -(kvec_d[ik] * ModuleBase::Vector3(dR2 - dR1)) * ModuleBase::TWO_PI; double sinp, cosp; ModuleBase::libm::sincos(arg, &sinp, &cosp); - const std::complex kphase - = std::complex(cosp, sinp); + const std::complex kphase = std::complex(cosp, sinp); - if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)) + if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)) { - dm_pair.add_from_matrix(dm_hl[0][ik].c, - pv->get_row_size(), - kphase, - 1); - } - else + dm_pair.add_from_matrix(dm_hl[0][ik].c, pv->get_row_size(), kphase, 1); + } + else { - dm_pair.add_from_matrix(dm_hl[0][ik].c, - pv->get_col_size(), - kphase, - 0); + dm_pair.add_from_matrix(dm_hl[0][ik].c, pv->get_col_size(), kphase, 0); } } // ik } @@ -272,9 +225,9 @@ void LCAO_Deepks::cal_orbital_precalc(const std::vector>& dm_hl, constexpr char transa = 'T', transb = 'N'; double gemm_alpha = 1.0, gemm_beta = 1.0; - if constexpr (std::is_same>::value) + if constexpr (std::is_same>::value) { - if (ibt1 < ibt2) + if (ibt1 < ibt2) { gemm_alpha = 2.0; } @@ -302,9 +255,9 @@ void LCAO_Deepks::cal_orbital_precalc(const std::vector>& dm_hl, int ib = 0, index = 0, inc = 1; - for (int L0 = 0; L0 <= orb.Alpha[0].getLmax(); ++L0) + for (int L0 = 0; L0 <= orb.Alpha[0].getLmax(); ++L0) { - for (int N0 = 0; N0 < orb.Alpha[0].getNchi(L0); ++N0) + for (int N0 = 0; N0 < orb.Alpha[0].getNchi(L0); ++N0) { const int inl = this->inl_index[T0](I0, L0, N0); const int nm = 2 * L0 + 1; @@ -315,10 +268,10 @@ void LCAO_Deepks::cal_orbital_precalc(const std::vector>& dm_hl, { orbital_pdm_shell[ik][0][inl][m1 * nm + m2] += ddot_(&row_size, - p_g1dmt + index * row_size * nks, - &inc, - s_1t.data() + index * row_size, - &inc); + p_g1dmt + index * row_size * nks, + &inc, + s_1t.data() + index * row_size, + &inc); index++; } } @@ -330,11 +283,11 @@ void LCAO_Deepks::cal_orbital_precalc(const std::vector>& dm_hl, } } #ifdef __MPI - for (int iks = 0; iks < nks; iks++) + for (int iks = 0; iks < nks; iks++) { for (int hl = 0; hl < 1; hl++) { - for (int inl = 0; inl < this->inlmax; inl++) + for (int inl = 0; inl < this->inlmax; inl++) { Parallel_Reduce::reduce_all(this->orbital_pdm_shell[iks][hl][inl], (2 * this->lmaxd + 1) * (2 * this->lmaxd + 1)); @@ -348,16 +301,16 @@ void LCAO_Deepks::cal_orbital_precalc(const std::vector>& dm_hl, int nlmax = this->inlmax / nat; std::vector orbital_pdm_shell_vector; - for (int nl = 0; nl < nlmax; ++nl) + for (int nl = 0; nl < nlmax; ++nl) { std::vector kiammv; - for (int iks = 0; iks < nks; ++iks) + for (int iks = 0; iks < nks; ++iks) { std::vector iammv; for (int hl = 0; hl < 1; ++hl) { std::vector ammv; - for (int iat = 0; iat < nat; ++iat) + for (int iat = 0; iat < nat; ++iat) { int inl = iat * nlmax + nl; int nm = 2 * this->inl_l[inl] + 1; @@ -367,12 +320,11 @@ void LCAO_Deepks::cal_orbital_precalc(const std::vector>& dm_hl, { for (int m2 = 0; m2 < nm; ++m2) // m1 = 1 for s, 3 for p, 5 for d { - mmv.push_back( - this->orbital_pdm_shell[iks][hl][inl][m1 * nm + m2]); + mmv.push_back(this->orbital_pdm_shell[iks][hl][inl][m1 * nm + m2]); } } - torch::Tensor mm = torch::tensor(mmv, - torch::TensorOptions().dtype(torch::kFloat64)).reshape({nm, nm}); // nm*nm + torch::Tensor mm + = torch::tensor(mmv, torch::TensorOptions().dtype(torch::kFloat64)).reshape({nm, nm}); // nm*nm ammv.push_back(mm); } @@ -390,11 +342,10 @@ void LCAO_Deepks::cal_orbital_precalc(const std::vector>& dm_hl, // einsum for each nl: std::vector orbital_precalc_vector; - for (int nl = 0; nl < nlmax; ++nl) + for (int nl = 0; nl < nlmax; ++nl) { orbital_precalc_vector.push_back( - at::einsum("kiamn, avmn->kiav", - {orbital_pdm_shell_vector[nl], this->gevdm_vector[nl]})); + at::einsum("kiamn, avmn->kiav", {orbital_pdm_shell_vector[nl], this->gevdm_vector[nl]})); } this->orbital_precalc_tensor = torch::cat(orbital_precalc_vector, -1); diff --git a/source/module_hamilt_lcao/module_deepks/test/LCAO_deepks_test.cpp b/source/module_hamilt_lcao/module_deepks/test/LCAO_deepks_test.cpp index ae8384057c..77bcc9a6cf 100644 --- a/source/module_hamilt_lcao/module_deepks/test/LCAO_deepks_test.cpp +++ b/source/module_hamilt_lcao/module_deepks/test/LCAO_deepks_test.cpp @@ -126,11 +126,11 @@ void test_deepks::check_gdmx() this->ld.init_gdmx(ucell.nat); if (PARAM.sys.gamma_only_local) { - this->ld.cal_gdmx(dm_new, ucell, ORB, Test_Deepks::GridD, kv.get_nkstot(), kv.kvec_d, 0); + this->ld.cal_gdmx(dm_new, ucell, ORB, Test_Deepks::GridD, kv.get_nkstot(), kv.kvec_d, this->ld.psialpha, 0); } else { - this->ld.cal_gdmx(dm_k_new, ucell, ORB, Test_Deepks::GridD, kv.get_nkstot(), kv.kvec_d, 0); + this->ld.cal_gdmx(dm_k_new, ucell, ORB, Test_Deepks::GridD, kv.get_nkstot(), kv.kvec_d, this->ld.psialpha, 0); } this->ld.check_gdmx(ucell.nat); @@ -241,7 +241,7 @@ void test_deepks::check_f_delta() svnl_dalpha.create(3, 3); if (PARAM.sys.gamma_only_local) { - const int nks=1; + const int nks = 1; ld.cal_f_delta_gamma(dm_new, ucell, ORB, Test_Deepks::GridD, nks, kv.kvec_d, 1, svnl_dalpha); } else diff --git a/source/module_hamilt_lcao/module_deepks/v_delta_precalc.cpp b/source/module_hamilt_lcao/module_deepks/v_delta_precalc.cpp index 6bb4c0ac6a..5ffe41a22d 100644 --- a/source/module_hamilt_lcao/module_deepks/v_delta_precalc.cpp +++ b/source/module_hamilt_lcao/module_deepks/v_delta_precalc.cpp @@ -14,7 +14,6 @@ #include "module_hamilt_lcao/module_hcontainer/atom_pair.h" #include "module_parameter/parameter.h" - // calculates v_delta_precalc[nks,nlocal,nlocal,NAt,NDscrpt] = gvdm * v_delta_pdm_shell; // v_delta_pdm_shell[nks,nlocal,nlocal,Inl,nm*nm] = overlap * overlap; // for deepks_v_delta = 1 @@ -33,266 +32,263 @@ void LCAO_Deepks::cal_v_delta_precalc(const int nlocal, this->cal_gvdm(nat); const double Rcut_Alpha = orb.Alpha[0].getRcut(); - this->init_v_delta_pdm_shell(nks,nlocal); - + this->init_v_delta_pdm_shell(nks, nlocal); + for (int T0 = 0; T0 < ucell.ntype; T0++) { - Atom* atom0 = &ucell.atoms[T0]; - - for (int I0 =0; I0< atom0->na; I0++) + Atom* atom0 = &ucell.atoms[T0]; + + for (int I0 = 0; I0 < atom0->na; I0++) { - const int iat = ucell.itia2iat(T0,I0); + const int iat = ucell.itia2iat(T0, I0); const ModuleBase::Vector3 tau0 = atom0->tau[I0]; - GridD.Find_atom(ucell, atom0->tau[I0] ,T0, I0); + GridD.Find_atom(ucell, atom0->tau[I0], T0, I0); - for (int ad1=0; ad1 tau1 = GridD.getAdjacentTau(ad1); - const Atom* atom1 = &ucell.atoms[T1]; - const int nw1_tot = atom1->nw*PARAM.globalv.npol; - const double Rcut_AO1 = orb.Phi[T1].getRcut(); + const Atom* atom1 = &ucell.atoms[T1]; + const int nw1_tot = atom1->nw * PARAM.globalv.npol; + const double Rcut_AO1 = orb.Phi[T1].getRcut(); - const double dist1 = (tau1-tau0).norm() * ucell.lat0; + const double dist1 = (tau1 - tau0).norm() * ucell.lat0; if (dist1 >= Rcut_Alpha + Rcut_AO1) { continue; } - - ModuleBase::Vector3 dR1(GridD.getBox(ad1).x, - GridD.getBox(ad1).y, - GridD.getBox(ad1).z); - key_tuple key_1(ibt1, dR1.x, dR1.y, dR1.z); + ModuleBase::Vector3 dR1(GridD.getBox(ad1).x, GridD.getBox(ad1).y, GridD.getBox(ad1).z); if constexpr (std::is_same>::value) { - if (this->nlm_save_k[iat].find(key_1) - == this->nlm_save_k[iat].end()) + if (this->psialpha[0]->find_matrix(iat, ibt1, dR1.x, dR1.y, dR1.z) == nullptr) { continue; } } - for (int ad2=0; ad2 < GridD.getAdjacentNum()+1 ; ad2++) - { - const int T2 = GridD.getType(ad2); - const int I2 = GridD.getNatom(ad2); + for (int ad2 = 0; ad2 < GridD.getAdjacentNum() + 1; ad2++) + { + const int T2 = GridD.getType(ad2); + const int I2 = GridD.getNatom(ad2); const int ibt2 = ucell.itia2iat(T2, I2); - const int start2 = ucell.itiaiw2iwt(T2, I2, 0); - const ModuleBase::Vector3 tau2 = GridD.getAdjacentTau(ad2); - const Atom* atom2 = &ucell.atoms[T2]; - const int nw2_tot = atom2->nw*PARAM.globalv.npol; - - const double Rcut_AO2 = orb.Phi[T2].getRcut(); - const double dist2 = (tau2-tau0).norm() * ucell.lat0; - - if (dist2 >= Rcut_Alpha + Rcut_AO2) - { - continue; - } - - ModuleBase::Vector3 dR2(GridD.getBox(ad2).x, - GridD.getBox(ad2).y, - GridD.getBox(ad2).z); - key_tuple key_2(ibt2, dR2.x, dR2.y, dR2.z); + const int start2 = ucell.itiaiw2iwt(T2, I2, 0); + const ModuleBase::Vector3 tau2 = GridD.getAdjacentTau(ad2); + const Atom* atom2 = &ucell.atoms[T2]; + const int nw2_tot = atom2->nw * PARAM.globalv.npol; + + const double Rcut_AO2 = orb.Phi[T2].getRcut(); + const double dist2 = (tau2 - tau0).norm() * ucell.lat0; + + if (dist2 >= Rcut_Alpha + Rcut_AO2) + { + continue; + } + + ModuleBase::Vector3 dR2(GridD.getBox(ad2).x, GridD.getBox(ad2).y, GridD.getBox(ad2).z); if constexpr (std::is_same>::value) { - if (this->nlm_save_k[iat].find(key_2) - == this->nlm_save_k[iat].end()) + if (this->psialpha[0]->find_matrix(iat, ibt2, dR2.x, dR2.y, dR2.z) == nullptr) { continue; } } - for (int iw1=0; iw1global2local_row(iw1_all); - if(iw1_local < 0) {continue;} - const int iw1_0 = iw1/PARAM.globalv.npol; - for (int iw2=0; iw2global2local_col(iw2_all); - if(iw2_local < 0) {continue;} - const int iw2_0 = iw2/PARAM.globalv.npol; - - std::vector nlm1; - std::vector nlm2; - if constexpr (std::is_same::value) - { - nlm1 = this->nlm_save[iat][ad1][iw1][0]; - nlm2 = this->nlm_save[iat][ad2][iw2][0]; - } - else + for (int iw1 = 0; iw1 < nw1_tot; ++iw1) + { + const int iw1_all = start1 + iw1; // this is \mu + const int iw1_local = pv->global2local_row(iw1_all); + if (iw1_local < 0) + { + continue; + } + for (int iw2 = 0; iw2 < nw2_tot; ++iw2) + { + const int iw2_all = start2 + iw2; // this is \nu + const int iw2_local = pv->global2local_col(iw2_all); + if (iw2_local < 0) { - nlm1 = this->nlm_save_k[iat][key_1][iw1][0]; - nlm2 = this->nlm_save_k[iat][key_2][iw2][0]; + continue; } - assert(nlm1.size()==nlm2.size()); + hamilt::BaseMatrix* overlap_1 = psialpha[0]->find_matrix(iat, ibt1, dR1); + hamilt::BaseMatrix* overlap_2 = psialpha[0]->find_matrix(iat, ibt2, dR2); + assert(overlap_1->get_col_size() == overlap_2->get_col_size()); + for (int ik = 0; ik < nks; ik++) { - int ib=0; + int ib = 0; std::complex kphase = std::complex(1.0, 0.0); if constexpr (std::is_same>::value) { - const double arg = - (kvec_d[ik] * (dR1-dR2) ) * ModuleBase::TWO_PI; + const double arg + = -(kvec_d[ik] * ModuleBase::Vector3(dR1 - dR2)) * ModuleBase::TWO_PI; double sinp, cosp; ModuleBase::libm::sincos(arg, &sinp, &cosp); kphase = std::complex(cosp, sinp); } - for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0) + for (int L0 = 0; L0 <= orb.Alpha[0].getLmax(); ++L0) { - for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0) + for (int N0 = 0; N0 < orb.Alpha[0].getNchi(L0); ++N0) { const int inl = this->inl_index[T0](I0, L0, N0); - const int nm = 2*L0+1; - - for (int m1=0; m1::value) { - this->v_delta_pdm_shell[ik][iw1_all][iw2_all][inl][m1*nm+m2] += nlm1[ib+m1]*nlm2[ib+m2]; + this->v_delta_pdm_shell[ik][iw1_all][iw2_all][inl][m1 * nm + m2] + += overlap_1->get_value(iw1, ib + m1) + * overlap_2->get_value(iw2, ib + m2); } else { - this->v_delta_pdm_shell_complex[ik][iw1_all][iw2_all][inl][m1*nm+m2] += nlm1[ib+m1]*nlm2[ib+m2]*kphase; + this->v_delta_pdm_shell_complex[ik][iw1_all][iw2_all][inl] + [m1 * nm + m2] + += overlap_1->get_value(iw1, ib + m1) + * overlap_2->get_value(iw2, ib + m2) * kphase; } } } - ib+=nm; + ib += nm; } - } - } //ik - }//iw2 - }//iw1 - }//ad2 - }//ad1 - + } + } // ik + } // iw2 + } // iw1 + } // ad2 + } // ad1 } } #ifdef __MPI - const int mn_size=(2 * this->lmaxd + 1) * (2 * this->lmaxd + 1); + const int mn_size = (2 * this->lmaxd + 1) * (2 * this->lmaxd + 1); for (int ik = 0; ik < nks; ik++) { - for(int inl = 0; inl < this->inlmax; inl++) + for (int inl = 0; inl < this->inlmax; inl++) { - for(int mu = 0; mu < nlocal ; mu++) + for (int mu = 0; mu < nlocal; mu++) { - for(int nu=0; nu< nlocal ; nu++) + for (int nu = 0; nu < nlocal; nu++) { if constexpr (std::is_same::value) { - Parallel_Reduce::reduce_all(this->v_delta_pdm_shell[ik][mu][nu][inl],mn_size); + Parallel_Reduce::reduce_all(this->v_delta_pdm_shell[ik][mu][nu][inl], mn_size); } else { - Parallel_Reduce::reduce_all(this->v_delta_pdm_shell_complex[ik][mu][nu][inl],mn_size); + Parallel_Reduce::reduce_all(this->v_delta_pdm_shell_complex[ik][mu][nu][inl], mn_size); } } } } } -#endif - +#endif + // transfer v_delta_pdm_shell to v_delta_pdm_shell_vector - - int nlmax = this->inlmax/nat; - + + int nlmax = this->inlmax / nat; + std::vector v_delta_pdm_shell_vector; - for(int nl = 0; nl < nlmax; ++nl) + for (int nl = 0; nl < nlmax; ++nl) { std::vector kuuammv; - for(int iks = 0; iks < nks; ++iks) + for (int iks = 0; iks < nks; ++iks) { std::vector uuammv; - for(int mu = 0; mu < nlocal; ++mu) + for (int mu = 0; mu < nlocal; ++mu) { std::vector uammv; - for(int nu =0 ; nu < nlocal; ++nu) + for (int nu = 0; nu < nlocal; ++nu) { std::vector ammv; - for (int iat=0; iatinl_l[inl]+1; + int inl = iat * nlmax + nl; + int nm = 2 * this->inl_l[inl] + 1; std::vector mmv; - - for (int m1=0; m1::value) { - mmv.push_back(this->v_delta_pdm_shell[iks][mu][nu][inl][m1*nm+m2]); + mmv.push_back(this->v_delta_pdm_shell[iks][mu][nu][inl][m1 * nm + m2]); } else { - mmv.push_back(this->v_delta_pdm_shell_complex[iks][mu][nu][inl][m1*nm+m2]); + mmv.push_back(this->v_delta_pdm_shell_complex[iks][mu][nu][inl][m1 * nm + m2]); } } } if constexpr (std::is_same::value) { - torch::Tensor mm = torch::tensor(mmv, torch::TensorOptions().dtype(torch::kFloat64) ).reshape({nm, nm}); //nm*nm + torch::Tensor mm = torch::tensor(mmv, torch::TensorOptions().dtype(torch::kFloat64)) + .reshape({nm, nm}); // nm*nm ammv.push_back(mm); } else { - torch::Tensor mm = torch::from_blob(mmv.data(), {nm, nm}, torch::TensorOptions().dtype(torch::kComplexDouble)).clone(); //nm*nm + torch::Tensor mm = torch::from_blob(mmv.data(), + {nm, nm}, + torch::TensorOptions().dtype(torch::kComplexDouble)) + .clone(); // nm*nm ammv.push_back(mm); } } - torch::Tensor amm = torch::stack(ammv, 0); - uammv.push_back(amm); + torch::Tensor amm = torch::stack(ammv, 0); + uammv.push_back(amm); } - torch::Tensor uamm = torch::stack(uammv, 0); + torch::Tensor uamm = torch::stack(uammv, 0); uuammv.push_back(uamm); } - torch::Tensor uuamm = torch::stack(uuammv, 0); + torch::Tensor uuamm = torch::stack(uuammv, 0); kuuammv.push_back(uuamm); } - torch::Tensor kuuamm = torch::stack(kuuammv, 0); + torch::Tensor kuuamm = torch::stack(kuuammv, 0); v_delta_pdm_shell_vector.push_back(kuuamm); } - + assert(v_delta_pdm_shell_vector.size() == nlmax); - - //einsum for each nl: + + // einsum for each nl: std::vector v_delta_precalc_vector; - for (int nl = 0; nl::value) { - v_delta_precalc_vector.push_back(at::einsum("kxyamn, avmn->kxyav", {v_delta_pdm_shell_vector[nl], this->gevdm_vector[nl]})); + v_delta_precalc_vector.push_back( + at::einsum("kxyamn, avmn->kxyav", {v_delta_pdm_shell_vector[nl], this->gevdm_vector[nl]})); } else { torch::Tensor gevdm_vector_complex = this->gevdm_vector[nl].to(torch::kComplexDouble); - v_delta_precalc_vector.push_back(at::einsum("kxyamn, avmn->kxyav", {v_delta_pdm_shell_vector[nl], gevdm_vector_complex})); + v_delta_precalc_vector.push_back( + at::einsum("kxyamn, avmn->kxyav", {v_delta_pdm_shell_vector[nl], gevdm_vector_complex})); } } this->v_delta_precalc_tensor = torch::cat(v_delta_precalc_vector, -1); - this->del_v_delta_pdm_shell(nks,nlocal); + this->del_v_delta_pdm_shell(nks, nlocal); - //check_v_delta_precalc(nlocal,nat); - // timeval t_end; - // gettimeofday(&t_end,NULL); - // std::cout<<"calculate v_delta_precalc time:\t"<<(double)(t_end.tv_sec-t_start.tv_sec) + (double)(t_end.tv_usec-t_start.tv_usec)/1000000.0< -void LCAO_Deepks::check_v_delta_precalc(const int nat, const int nks,const int nlocal) +void LCAO_Deepks::check_v_delta_precalc(const int nat, const int nks, const int nlocal) { std::ofstream ofs("v_delta_precalc.dat"); ofs << std::setprecision(10); @@ -302,22 +298,24 @@ void LCAO_Deepks::check_v_delta_precalc(const int nat, const int nks,const int n { for (int nu = 0; nu < nlocal; ++nu) { - for (int iat = 0;iat < nat;++iat) + for (int iat = 0; iat < nat; ++iat) { - for(int p=0; pdes_per_atom; ++p) + for (int p = 0; p < this->des_per_atom; ++p) { if constexpr (std::is_same::value) { - ofs << this->v_delta_precalc_tensor.index({iks, mu, nu, iat, p }).item().toDouble() << " " ; + ofs << this->v_delta_precalc_tensor.index({iks, mu, nu, iat, p}).item().toDouble() << " "; } else { auto tensor_value = this->v_delta_precalc_tensor.index({iks, mu, nu, iat, p}); - ofs << std::complex(torch::real(tensor_value).item(), torch::imag(tensor_value).item()) << " " ; + ofs << std::complex(torch::real(tensor_value).item(), + torch::imag(tensor_value).item()) + << " "; } } } - ofs << std::endl; + ofs << std::endl; } } } diff --git a/source/module_hamilt_lcao/module_hcontainer/atom_pair.cpp b/source/module_hamilt_lcao/module_hcontainer/atom_pair.cpp index d35553a2f8..cef77a34ac 100644 --- a/source/module_hamilt_lcao/module_hcontainer/atom_pair.cpp +++ b/source/module_hamilt_lcao/module_hcontainer/atom_pair.cpp @@ -347,6 +347,10 @@ void AtomPair::set_size(const int& col_size_in, const int& row_size_in) { this->col_size = col_size_in; this->row_size = row_size_in; + for (int i = 0; i < this->values.size(); i++) + { + this->values[i].set_size(row_size_in, col_size_in); + } } // get paraV for check diff --git a/source/module_hamilt_lcao/module_hcontainer/base_matrix.cpp b/source/module_hamilt_lcao/module_hcontainer/base_matrix.cpp index a7531a1245..4b1850801a 100644 --- a/source/module_hamilt_lcao/module_hcontainer/base_matrix.cpp +++ b/source/module_hamilt_lcao/module_hcontainer/base_matrix.cpp @@ -216,6 +216,14 @@ size_t BaseMatrix::get_memory_size() const return memory_size; } +// set size +template +void BaseMatrix::set_size(const int& nrow, const int& ncol) +{ + this->nrow_local = nrow; + this->ncol_local = ncol; +} + // T of BaseMatrix can be double or complex template class BaseMatrix; template class BaseMatrix>; diff --git a/source/module_hamilt_lcao/module_hcontainer/base_matrix.h b/source/module_hamilt_lcao/module_hcontainer/base_matrix.h index 227666ebc3..bd47d49d79 100644 --- a/source/module_hamilt_lcao/module_hcontainer/base_matrix.h +++ b/source/module_hamilt_lcao/module_hcontainer/base_matrix.h @@ -80,7 +80,7 @@ class BaseMatrix */ size_t get_memory_size() const; - /** + /** * @brief get col_size for this matrix */ int get_col_size() const {return ncol_local;}; @@ -88,6 +88,10 @@ class BaseMatrix * @brief get row_size for this matrix */ int get_row_size() const {return nrow_local;}; + /** + * @brief set col_size and row_size + */ + void set_size(const int& col_size_in, const int& row_size_in); private: bool allocated = false; diff --git a/source/module_io/read_input_item_deepks.cpp b/source/module_io/read_input_item_deepks.cpp index a641812d4c..f2d53c94d7 100644 --- a/source/module_io/read_input_item_deepks.cpp +++ b/source/module_io/read_input_item_deepks.cpp @@ -67,9 +67,9 @@ void ReadInput::item_deepks() }; item.check_value = [](const Input_Item& item, const Parameter& para) { if (para.input.deepks_out_unittest){ - if (para.input.cal_force != 1) + if (para.input.cal_force != 1 || para.input.cal_stress != 1) { - ModuleBase::WARNING_QUIT("ReadInput", "force is required in generating deepks unittest"); + ModuleBase::WARNING_QUIT("ReadInput", "force and stress are required in generating deepks unittest"); } } }; diff --git a/tests/deepks/602_NO_deepks_d_H2O_md_lda2pbe/result.ref b/tests/deepks/602_NO_deepks_d_H2O_md_lda2pbe/result.ref index c197e2c934..481746130e 100644 --- a/tests/deepks/602_NO_deepks_d_H2O_md_lda2pbe/result.ref +++ b/tests/deepks/602_NO_deepks_d_H2O_md_lda2pbe/result.ref @@ -1,9 +1,9 @@ -etotref -465.9986234579140 -etotperatomref -155.3328744860 +etotref -465.9986234576679 +etotperatomref -155.3328744859 totalforceref 5.535106 -totalstressref 1.520003 +totalstressref 1.522353 totaldes 2.163703 -deepks_e_dm -57.885718058035934 -deepks_f_label 19.095598453044364 -deepks_s_label 19.250415261605152 -totaltimeref 12.68 +deepks_e_dm -57.8857180593137 +deepks_f_label 19.09559844689178 +deepks_s_label 19.250590727951906 +totaltimeref 12.58 diff --git a/tests/deepks/602_NO_deepks_d_H2O_scf_lda2pbe/result.ref b/tests/deepks/602_NO_deepks_d_H2O_scf_lda2pbe/result.ref index bfbc3f2cf0..7e71a478e6 100644 --- a/tests/deepks/602_NO_deepks_d_H2O_scf_lda2pbe/result.ref +++ b/tests/deepks/602_NO_deepks_d_H2O_scf_lda2pbe/result.ref @@ -1,9 +1,9 @@ -etotref -466.0342709734272262 +etotref -466.0342709734266577 etotperatomref -155.3447569911 totalforceref 3.194893 -totalstressref 1.190505 +totalstressref 1.190889 totaldes 2.319513 -deepks_e_dm -57.521935314706305 -deepks_f_label 19.875352737513325 -deepks_s_label 19.58791026543513 -totaltimeref 5.81 +deepks_e_dm -57.521935314706134 +deepks_f_label 19.87535273614641 +deepks_s_label 19.587939082569576 +totaltimeref 5.75