diff --git a/source/Makefile.Objects b/source/Makefile.Objects index 8e6fb95677..972a2e4269 100644 --- a/source/Makefile.Objects +++ b/source/Makefile.Objects @@ -230,6 +230,7 @@ OBJS_ELECSTAT_LCAO=elecstate_lcao.o\ elecstate_lcao_tddft.o\ elecstate_lcao_cal_tau.o\ density_matrix.o\ + density_matrix_io.o\ cal_dm_psi.o\ OBJS_ESOLVER=esolver.o\ @@ -571,8 +572,6 @@ OBJS_LCAO=evolve_elec.o\ stress_tools.o\ edm.o\ pulay_force_stress_center2.o\ - fvnl_dbeta_gamma.o\ - fvnl_dbeta_k.o\ grid_init.o\ spar_dh.o\ spar_exx.o\ diff --git a/source/module_elecstate/CMakeLists.txt b/source/module_elecstate/CMakeLists.txt index 40b43a7b99..1923b03317 100644 --- a/source/module_elecstate/CMakeLists.txt +++ b/source/module_elecstate/CMakeLists.txt @@ -35,6 +35,7 @@ if(ENABLE_LCAO) elecstate_lcao_cal_tau.cpp potentials/H_TDDFT_pw.cpp module_dm/density_matrix.cpp + module_dm/density_matrix_io.cpp module_dm/cal_dm_psi.cpp ) endif() diff --git a/source/module_elecstate/module_dm/density_matrix.cpp b/source/module_elecstate/module_dm/density_matrix.cpp index 92083170eb..56475e6840 100644 --- a/source/module_elecstate/module_dm/density_matrix.cpp +++ b/source/module_elecstate/module_dm/density_matrix.cpp @@ -22,6 +22,7 @@ DensityMatrix::~DensityMatrix() { delete it; } + delete[] this->dmr_tmp_; } template @@ -50,381 +51,9 @@ DensityMatrix::DensityMatrix(const Parallel_Orbitals* paraV_in, const in ModuleBase::Memory::record("DensityMatrix::DMK", this->_DMK.size() * this->_DMK[0].size() * sizeof(TK)); } -// initialize density matrix DMR from UnitCell (mainly used in UnitTest) -template -void DensityMatrix::init_DMR(Grid_Driver* GridD_in, const UnitCell* ucell) -{ - ModuleBase::TITLE("DensityMatrix", "init_DMR"); - // ensure _DMR is empty - for (auto& it: this->_DMR) - { - delete it; - } - this->_DMR.clear(); - // construct a new DMR - hamilt::HContainer* tmp_DMR; - tmp_DMR = new hamilt::HContainer(this->_paraV); - // set up a HContainer - for (int iat1 = 0; iat1 < ucell->nat; iat1++) - { - auto tau1 = ucell->get_tau(iat1); - int T1, I1; - ucell->iat2iait(iat1, &I1, &T1); - AdjacentAtomInfo adjs; - GridD_in->Find_atom(*ucell, tau1, T1, I1, &adjs); - // std::cout << "adjs.adj_num: " <itia2iat(T2, I2); - if (this->_paraV->get_row_size(iat1) <= 0 || this->_paraV->get_col_size(iat2) <= 0) - { - continue; - } - ModuleBase::Vector3& R_index = adjs.box[ad]; - // std::cout << "R_index: " << R_index.x << " " << R_index.y << " " << R_index.z << std::endl; - hamilt::AtomPair tmp_ap(iat1, iat2, R_index, this->_paraV); - tmp_DMR->insert_pair(tmp_ap); - } - } - // allocate the memory of BaseMatrix in SR, and set the new values to zero - if (std::is_same::value) - { - tmp_DMR->fix_gamma(); - } - tmp_DMR->allocate(nullptr, true); - this->_DMR.push_back(tmp_DMR); - // add another DMR if nspin==2 - if (this->_nspin == 2) - { - hamilt::HContainer* tmp_DMR1; - tmp_DMR1 = new hamilt::HContainer(*tmp_DMR); - this->_DMR.push_back(tmp_DMR1); - } - ModuleBase::Memory::record("DensityMatrix::DMR", this->_DMR.size() * this->_DMR[0]->get_memory_size()); -} - -// initialize density matrix DMR from UnitCell and RA (mainly used in UnitTest) -template -void DensityMatrix::init_DMR(Record_adj& ra, const UnitCell* ucell) -{ - ModuleBase::TITLE("DensityMatrix", "init_DMR"); - // ensure _DMR is empty - for (auto& it: this->_DMR) - { - delete it; - } - this->_DMR.clear(); - // construct a new DMR - hamilt::HContainer* tmp_DMR; - tmp_DMR = new hamilt::HContainer(this->_paraV); - // set up a HContainer - for (int iat1 = 0; iat1 < ucell->nat; iat1++) - { - auto tau1 = ucell->get_tau(iat1); - int T1, I1; - ucell->iat2iait(iat1, &I1, &T1); - for (int ad = 0; ad < ra.na_each[iat1]; ++ad) - { - const int T2 = ra.info[iat1][ad][3]; - const int I2 = ra.info[iat1][ad][4]; - int iat2 = ucell->itia2iat(T2, I2); - if (this->_paraV->get_row_size(iat1) <= 0 || this->_paraV->get_col_size(iat2) <= 0) - { - continue; - } - hamilt::AtomPair tmp_ap(iat1, - iat2, - ra.info[iat1][ad][0], - ra.info[iat1][ad][1], - ra.info[iat1][ad][2], - this->_paraV); - tmp_DMR->insert_pair(tmp_ap); - } - } - // allocate the memory of BaseMatrix in SR, and set the new values to zero - if (std::is_same::value) - { - tmp_DMR->fix_gamma(); - } - tmp_DMR->allocate(nullptr, true); - this->_DMR.push_back(tmp_DMR); - // add another DMR if nspin==2 - if (this->_nspin == 2) - { - hamilt::HContainer* tmp_DMR1; - tmp_DMR1 = new hamilt::HContainer(*tmp_DMR); - this->_DMR.push_back(tmp_DMR1); - } - ModuleBase::Memory::record("DensityMatrix::DMR", this->_DMR.size() * this->_DMR[0]->get_memory_size()); -} - -// initialize density matrix DMR from another HContainer (mainly used) -template -void DensityMatrix::init_DMR(const hamilt::HContainer& DMR_in) -{ - ModuleBase::TITLE("DensityMatrix", "init_DMR"); - // ensure _DMR is empty - for (auto& it: this->_DMR) - { - delete it; - } - this->_DMR.clear(); - // set up a HContainer using another one - for (int is = 0; is < this->_nspin; ++is) // loop over spin - { - hamilt::HContainer* tmp_DMR; - tmp_DMR = new hamilt::HContainer(DMR_in); - // zero.out - tmp_DMR->set_zero(); - this->_DMR.push_back(tmp_DMR); - } - ModuleBase::Memory::record("DensityMatrix::DMR", this->_DMR.size() * this->_DMR[0]->get_memory_size()); -} - -template -void DensityMatrix::init_DMR(const hamilt::HContainer& DMR_in) -{ - ModuleBase::TITLE("DensityMatrix", "init_DMR"); - // ensure _DMR is empty - for (auto& it: this->_DMR) - { - delete it; - } - this->_DMR.clear(); - // set up a HContainer using another one - int size_ap = DMR_in.size_atom_pairs(); - if (size_ap > 0) - { - const Parallel_Orbitals* paraV_ = DMR_in.get_atom_pair(0).get_paraV(); - hamilt::HContainer* tmp_DMR = new hamilt::HContainer(paraV_); - for (int iap = 0; iap < size_ap; iap++) - { - const int iat1 = DMR_in.get_atom_pair(iap).get_atom_i(); - const int iat2 = DMR_in.get_atom_pair(iap).get_atom_j(); - for (int ir = 0; ir < DMR_in.get_atom_pair(iap).get_R_size(); ir++) - { - const ModuleBase::Vector3 R_index = DMR_in.get_atom_pair(iap).get_R_index(ir); - hamilt::AtomPair tmp_ap(iat1, iat2, R_index, paraV_); - tmp_DMR->insert_pair(tmp_ap); - } - } - tmp_DMR->allocate(nullptr, true); - this->_DMR.push_back(tmp_DMR); - if (this->_nspin == 2) - { - hamilt::HContainer* tmp_DMR1 = new hamilt::HContainer(*tmp_DMR); - this->_DMR.push_back(tmp_DMR1); - } - } - ModuleBase::Memory::record("DensityMatrix::DMR", this->_DMR.size() * this->_DMR[0]->get_memory_size()); -} - -// get _DMR pointer -template -hamilt::HContainer* DensityMatrix::get_DMR_pointer(const int ispin) const -{ -#ifdef __DEBUG - assert(ispin > 0 && ispin <= this->_nspin); -#endif - return this->_DMR[ispin - 1]; -} - -// get _DMK[ik] pointer -template -TK* DensityMatrix::get_DMK_pointer(const int ik) const -{ -#ifdef __DEBUG - assert(ik < this->_nk * this->_nspin); -#endif - return const_cast(this->_DMK[ik].data()); -} - -// set DMK using a pointer -template -void DensityMatrix::set_DMK_pointer(const int ik, TK* DMK_in) -{ -#ifdef __DEBUG - assert(ik < this->_nk * this->_nspin); -#endif - this->_DMK[ik].assign(DMK_in, DMK_in + this->_paraV->nrow * this->_paraV->ncol); -} - -// set _DMK element -template -void DensityMatrix::set_DMK(const int ispin, const int ik, const int i, const int j, const TK value) -{ -#ifdef __DEBUG - assert(ispin > 0 && ispin <= this->_nspin); - assert(ik >= 0 && ik < this->_nk); -#endif - // consider transpose col=>row - this->_DMK[ik + this->_nk * (ispin - 1)][i * this->_paraV->nrow + j] = value; -} - -// set _DMK element -template -void DensityMatrix::set_DMK_zero() -{ - for (int ik = 0; ik < _nspin * _nk; ik++) - { - ModuleBase::GlobalFunc::ZEROS(this->_DMK[ik].data(), - this->_paraV->get_row_size() * this->_paraV->get_col_size()); - } -} - -// get a matrix element of density matrix dm(k) -template -TK DensityMatrix::get_DMK(const int ispin, const int ik, const int i, const int j) const -{ -#ifdef __DEBUG - assert(ispin > 0 && ispin <= this->_nspin); -#endif - // consider transpose col=>row - return this->_DMK[ik + this->_nk * (ispin - 1)][i * this->_paraV->nrow + j]; -} - -// get _DMK nks, nrow, ncol -template -int DensityMatrix::get_DMK_nks() const -{ -#ifdef __DEBUG - assert(this->_DMK.size() == _nk * _nspin); -#endif - return _nk * _nspin; -} - -template -int DensityMatrix::get_DMK_size() const -{ -#ifdef __DEBUG - assert(this->_DMK.size() != 0); -#endif - return this->_DMK.size(); -} - -template -int DensityMatrix::get_DMK_nrow() const -{ -#ifdef __DEBUG - assert(this->_DMK.size() != 0); -#endif - return this->_paraV->nrow; -} - -template -int DensityMatrix::get_DMK_ncol() const -{ -#ifdef __DEBUG - assert(this->_DMK.size() != 0); -#endif - return this->_paraV->ncol; -} - -template -void DensityMatrix::save_DMR() -{ - ModuleBase::TITLE("DensityMatrix", "save_DMR"); - ModuleBase::timer::tick("DensityMatrix", "save_DMR"); - - const int nnr = this->_DMR[0]->get_nnr(); - // allocate if _DMR_save is empty - if (_DMR_save.size() == 0) - { - _DMR_save.resize(this->_DMR.size()); - } - // resize if _DMR_save[is].size is not equal to _DMR.size - for (int is = 0; is < _DMR_save.size(); is++) - { - if (_DMR_save[is].size() != nnr) - { - _DMR_save[is].resize(nnr); - } - } - // save _DMR to _DMR_save - for (int is = 0; is < this->_DMR.size(); is++) - { - TR* DMR_pointer = this->_DMR[is]->get_wrapper(); - TR* DMR_save_pointer = _DMR_save[is].data(); - // set to zero - ModuleBase::GlobalFunc::ZEROS(DMR_save_pointer, nnr); - for (int i = 0; i < nnr; i++) - { - DMR_save_pointer[i] = DMR_pointer[i]; - } - } - - ModuleBase::timer::tick("DensityMatrix", "save_DMR"); -} - -// calculate DMR from DMK using add_element -template -void DensityMatrix::cal_DMR_test() -{ - for (int is = 1; is <= this->_nspin; ++is) - { - int ik_begin = this->_nk * (is - 1); // jump this->_nk for spin_down if nspin==2 - hamilt::HContainer* tmp_DMR = this->_DMR[is - 1]; - // set zero since this function is called in every scf step - tmp_DMR->set_zero(); -#ifdef _OPENMP -#pragma omp parallel for -#endif - for (int iap = 0; iap < tmp_DMR->size_atom_pairs(); ++iap) - { - hamilt::AtomPair& tmp_ap = tmp_DMR->get_atom_pair(iap); - int iat1 = tmp_ap.get_atom_i(); - int iat2 = tmp_ap.get_atom_j(); - // get global indexes of whole matrix for each atom in this process - int row_ap = this->_paraV->atom_begin_row[iat1]; - int col_ap = this->_paraV->atom_begin_col[iat2]; - if (row_ap == -1 || col_ap == -1) - { - throw std::string("Atom-pair not belong this process"); - } - for (int ir = 0; ir < tmp_ap.get_R_size(); ++ir) - { - const ModuleBase::Vector3 r_index = tmp_ap.get_R_index(ir); - hamilt::BaseMatrix* tmp_matrix = tmp_ap.find_matrix(r_index); -#ifdef __DEBUG - if (tmp_matrix == nullptr) - { - std::cout << "tmp_matrix is nullptr" << std::endl; - continue; - } -#endif - std::complex tmp_res; - // loop over k-points - for (int ik = 0; ik < this->_nk; ++ik) - { - // cal k_phase - // if TK==std::complex, kphase is e^{ikR} - const ModuleBase::Vector3 dR(r_index[0], r_index[1], r_index[2]); - const double arg = (this->_kvec_d[ik] * dR) * ModuleBase::TWO_PI; - double sinp, cosp; - ModuleBase::libm::sincos(arg, &sinp, &cosp); - std::complex kphase = std::complex(cosp, sinp); - // set DMR element - for (int i = 0; i < this->_paraV->get_row_size(iat1); ++i) - { - for (int j = 0; j < this->_paraV->get_col_size(iat2); ++j) - { - // since DMK is column-major, we need to transpose it col=>row - tmp_res - = kphase * this->_DMK[ik_begin + ik][(col_ap + j) * this->_paraV->nrow + row_ap + i]; - tmp_matrix->add_element(i, j, tmp_res.real()); - } - } - } - } - } - } -} - // calculate DMR from DMK using blas for multi-k calculation template <> -void DensityMatrix, double>::cal_DMR() +void DensityMatrix, double>::cal_DMR(const int ik_in) { ModuleBase::TITLE("DensityMatrix", "cal_DMR"); @@ -478,6 +107,8 @@ void DensityMatrix, double>::cal_DMR() { for (int ik = 0; ik < this->_nk; ++ik) { + if(ik_in >= 0 && ik_in != ik) { continue; +} // cal k_phase // if TK==std::complex, kphase is e^{ikR} const ModuleBase::Vector3 dR(r_index[0], r_index[1], r_index[2]); @@ -522,6 +153,8 @@ void DensityMatrix, double>::cal_DMR() tmp_DMR.assign(tmp_ap.get_size(), std::complex(0.0, 0.0)); for (int ik = 0; ik < this->_nk; ++ik) { + if(ik_in >= 0 && ik_in != ik) { continue; +} // cal k_phase // if TK==std::complex, kphase is e^{ikR} const ModuleBase::Vector3 dR(r_index[0], r_index[1], r_index[2]); @@ -589,103 +222,86 @@ void DensityMatrix, double>::cal_DMR() ModuleBase::timer::tick("DensityMatrix", "cal_DMR"); } -// calculate DMR from DMK using blas for multi-k calculation, without summing over k-points +// calculate DMR from DMK using blas for multi-k calculation +template <> +void DensityMatrix::cal_DMR_full(hamilt::HContainer>* dmR_out)const{} template <> -void DensityMatrix, double>::cal_DMR(const int ik) +void DensityMatrix, double>::cal_DMR_full(hamilt::HContainer>* dmR_out)const { - ModuleBase::TITLE("DensityMatrix", "cal_DMR"); - - // To check whether DMR has been initialized -#ifdef __DEBUG - assert(!this->_DMR.empty() && "DMR has not been initialized!"); -#endif + ModuleBase::TITLE("DensityMatrix", "cal_DMR_full"); - ModuleBase::timer::tick("DensityMatrix", "cal_DMR"); + ModuleBase::timer::tick("DensityMatrix", "cal_DMR_full"); int ld_hk = this->_paraV->nrow; int ld_hk2 = 2 * ld_hk; - for (int is = 1; is <= this->_nspin; ++is) - { - int ik_begin = this->_nk * (is - 1); // jump this->_nk for spin_down if nspin==2 - hamilt::HContainer* tmp_DMR = this->_DMR[is - 1]; - // set zero since this function is called in every scf step - tmp_DMR->set_zero(); + hamilt::HContainer>* tmp_DMR = dmR_out; + // set zero since this function is called in every scf step + tmp_DMR->set_zero(); #ifdef _OPENMP #pragma omp parallel for #endif - for (int i = 0; i < tmp_DMR->size_atom_pairs(); ++i) + for (int i = 0; i < tmp_DMR->size_atom_pairs(); ++i) + { + auto& tmp_ap = tmp_DMR->get_atom_pair(i); + int iat1 = tmp_ap.get_atom_i(); + int iat2 = tmp_ap.get_atom_j(); + // get global indexes of whole matrix for each atom in this process + int row_ap = this->_paraV->atom_begin_row[iat1]; + int col_ap = this->_paraV->atom_begin_col[iat2]; + for (int ir = 0; ir < tmp_ap.get_R_size(); ++ir) { - hamilt::AtomPair& tmp_ap = tmp_DMR->get_atom_pair(i); - int iat1 = tmp_ap.get_atom_i(); - int iat2 = tmp_ap.get_atom_j(); - // get global indexes of whole matrix for each atom in this process - int row_ap = this->_paraV->atom_begin_row[iat1]; - int col_ap = this->_paraV->atom_begin_col[iat2]; - if (row_ap == -1 || col_ap == -1) + const ModuleBase::Vector3 r_index = tmp_ap.get_R_index(ir); + auto* tmp_matrix = tmp_ap.find_matrix(r_index); +#ifdef __DEBUG + if (tmp_matrix == nullptr) { - throw std::string("Atom-pair not belong this process"); + std::cout << "tmp_matrix is nullptr" << std::endl; + continue; } - for (int ir = 0; ir < tmp_ap.get_R_size(); ++ir) - { - const ModuleBase::Vector3 r_index = tmp_ap.get_R_index(ir); - hamilt::BaseMatrix* tmp_matrix = tmp_ap.find_matrix(r_index); -#ifdef __DEBUG - if (tmp_matrix == nullptr) - { - std::cout << "tmp_matrix is nullptr" << std::endl; - continue; - } #endif - // Remove loop over k-points and directly use the provided ik - if (PARAM.inp.nspin != 4) + // loop over k-points + // calculate full matrix for complex density matrix + for (int ik = 0; ik < this->_nk; ++ik) + { + // cal k_phase + // if TK==std::complex, kphase is e^{ikR} + const ModuleBase::Vector3 dR(r_index[0], r_index[1], r_index[2]); + const double arg = (this->_kvec_d[ik] * dR) * ModuleBase::TWO_PI; + double sinp, cosp; + ModuleBase::libm::sincos(arg, &sinp, &cosp); + std::complex kphase = std::complex(cosp, sinp); + // set DMR element + std::complex* tmp_DMR_pointer = tmp_matrix->get_pointer(); + const std::complex* tmp_DMK_pointer = this->_DMK[ik].data(); + double* DMK_real_pointer = nullptr; + double* DMK_imag_pointer = nullptr; + // jump DMK to fill DMR + // DMR is row-major, DMK is column-major + tmp_DMK_pointer += col_ap * this->_paraV->nrow + row_ap; + for (int mu = 0; mu < this->_paraV->get_row_size(iat1); ++mu) { - // cal k_phase - // if TK==std::complex, kphase is e^{ikR} - const ModuleBase::Vector3 dR(r_index[0], r_index[1], r_index[2]); - const double arg = (this->_kvec_d[ik] * dR) * ModuleBase::TWO_PI; - double sinp, cosp; - ModuleBase::libm::sincos(arg, &sinp, &cosp); - std::complex kphase = std::complex(cosp, sinp); - // set DMR element - double* tmp_DMR_pointer = tmp_matrix->get_pointer(); - std::complex* tmp_DMK_pointer = this->_DMK[ik + ik_begin].data(); - double* DMK_real_pointer = nullptr; - double* DMK_imag_pointer = nullptr; - // jump DMK to fill DMR - // DMR is row-major, DMK is column-major - tmp_DMK_pointer += col_ap * this->_paraV->nrow + row_ap; - for (int mu = 0; mu < this->_paraV->get_row_size(iat1); ++mu) - { - DMK_real_pointer = (double*)tmp_DMK_pointer; - DMK_imag_pointer = DMK_real_pointer + 1; - BlasConnector::axpy(this->_paraV->get_col_size(iat2), - kphase.real(), - DMK_real_pointer, - ld_hk2, - tmp_DMR_pointer, - 1); - // "-" since i^2 = -1 - BlasConnector::axpy(this->_paraV->get_col_size(iat2), - -kphase.imag(), - DMK_imag_pointer, - ld_hk2, - tmp_DMR_pointer, - 1); - tmp_DMK_pointer += 1; - tmp_DMR_pointer += this->_paraV->get_col_size(iat2); - } + BlasConnector::axpy(this->_paraV->get_col_size(iat2), + kphase, + tmp_DMK_pointer, + ld_hk, + tmp_DMR_pointer, + 1); + tmp_DMK_pointer += 1; + tmp_DMR_pointer += this->_paraV->get_col_size(iat2); } } } } - ModuleBase::timer::tick("DensityMatrix", "cal_DMR"); + ModuleBase::timer::tick("DensityMatrix", "cal_DMR_full"); } // calculate DMR from DMK using blas for gamma-only calculation template <> -void DensityMatrix::cal_DMR() +void DensityMatrix::cal_DMR(const int ik_in) { ModuleBase::TITLE("DensityMatrix", "cal_DMR"); + assert(ik_in == -1 || ik_in == 0); + // To check whether DMR has been initialized #ifdef __DEBUG assert(!this->_DMR.empty() && "DMR has not been initialized!"); @@ -756,159 +372,81 @@ void DensityMatrix::cal_DMR() ModuleBase::timer::tick("DensityMatrix", "cal_DMR"); } -// merge density matrix DMR with different spin +// switch_dmr template -void DensityMatrix::sum_DMR_spin() +void DensityMatrix::switch_dmr(const int mode) { - ModuleBase::TITLE("DensityMatrix", "sum_DMR_spin"); - if (this->_nspin == 1) + ModuleBase::TITLE("DensityMatrix", "switch_dmr"); + if (this->_nspin != 2) { return; } - ModuleBase::timer::tick("DensityMatrix", "sum_DMR_spin"); - if (this->_nspin == 2) + else { - hamilt::HContainer* tmp_DMR_up = this->_DMR[0]; - hamilt::HContainer* tmp_DMR_down = this->_DMR[1]; - for (int i = 0; i < tmp_DMR_up->size_atom_pairs(); ++i) + ModuleBase::timer::tick("DensityMatrix", "switch_dmr"); + switch(mode) { - hamilt::AtomPair& tmp_ap_up = tmp_DMR_up->get_atom_pair(i); - hamilt::AtomPair& tmp_ap_down = tmp_DMR_down->get_atom_pair(i); - for (int ir = 0; ir < tmp_ap_up.get_R_size(); ++ir) + case 0: + // switch to original density matrix + if (this->dmr_tmp_ != nullptr && this->dmr_origin_.size() != 0) { - const ModuleBase::Vector3 r_index = tmp_ap_up.get_R_index(ir); - hamilt::BaseMatrix* tmp_matrix_up = tmp_ap_up.find_matrix(r_index); - hamilt::BaseMatrix* tmp_matrix_down = tmp_ap_down.find_matrix(r_index); - TR* ptr_up = tmp_matrix_up->get_pointer(); - TR* ptr_down = tmp_matrix_down->get_pointer(); - for (int i = 0; i < tmp_ap_up.get_size(); ++i) + this->_DMR[0]->allocate(this->dmr_origin_.data(), false); + delete[] this->dmr_tmp_; + this->dmr_tmp_ = nullptr; + } + // else: do nothing + break; + case 1: + // switch to total magnetization density matrix, dmr_up + dmr_down + if(this->dmr_tmp_ == nullptr) + { + const size_t size = this->_DMR[0]->get_nnr(); + this->dmr_tmp_ = new TR[size]; + this->dmr_origin_.resize(size); + for (int i = 0; i < size; ++i) { - ptr_up[i] += ptr_down[i]; + this->dmr_origin_[i] = this->_DMR[0]->get_wrapper()[i]; + this->dmr_tmp_[i] = this->dmr_origin_[i] + this->_DMR[1]->get_wrapper()[i]; } + this->_DMR[0]->allocate(this->dmr_tmp_, false); } - } - } - ModuleBase::timer::tick("DensityMatrix", "sum_DMR_spin"); -} - -// read *.dmk into density matrix dm(k) -template -void DensityMatrix::read_DMK(const std::string directory, const int ispin, const int ik) -{ - ModuleBase::TITLE("DensityMatrix", "read_DMK"); -#ifdef __DEBUG - assert(ispin > 0 && ispin <= this->_nspin); -#endif - // read - std::string fn; - fn = directory + "SPIN" + std::to_string(ispin) + "_" + std::to_string(ik) + ".dmk"; - // - bool quit_abacus = false; - - std::ifstream ifs; - - ifs.open(fn.c_str()); - if (!ifs) - { - quit_abacus = true; - } - else - { - // if the number is not match, - // quit the program or not. - bool quit = false; - - ModuleBase::CHECK_DOUBLE(ifs, this->_kvec_d[ik].x, quit); - ModuleBase::CHECK_DOUBLE(ifs, this->_kvec_d[ik].y, quit); - ModuleBase::CHECK_DOUBLE(ifs, this->_kvec_d[ik].z, quit); - ModuleBase::CHECK_INT(ifs, this->_paraV->nrow); - ModuleBase::CHECK_INT(ifs, this->_paraV->ncol); - } // If file exist, read in data. - // Finish reading the first part of density matrix. - - for (int i = 0; i < this->_paraV->nrow; ++i) - { - for (int j = 0; j < this->_paraV->ncol; ++j) - { - ifs >> this->_DMK[ik + this->_nk * (ispin - 1)][i * this->_paraV->ncol + j]; - } - } - ifs.close(); -} - -// output density matrix dm(k) into *.dmk -template <> -void DensityMatrix::write_DMK(const std::string directory, const int ispin, const int ik) -{ - ModuleBase::TITLE("DensityMatrix", "write_DMK"); -#ifdef __DEBUG - assert(ispin > 0 && ispin <= this->_nspin); -#endif - // write - std::string fn; - fn = directory + "SPIN" + std::to_string(ispin) + "_" + std::to_string(ik) + ".dmk"; - std::ofstream ofs; - ofs.open(fn.c_str()); - if (!ofs) - { - ModuleBase::WARNING("elecstate::write_dmk", "Can't create DENSITY MATRIX File!"); - } - ofs << this->_kvec_d[ik].x << " " << this->_kvec_d[ik].y << " " << this->_kvec_d[ik].z << std::endl; - ofs << "\n " << this->_paraV->nrow << " " << this->_paraV->ncol << std::endl; - - ofs << std::setprecision(3); - ofs << std::scientific; - - for (int i = 0; i < this->_paraV->nrow; ++i) - { - for (int j = 0; j < this->_paraV->ncol; ++j) - { - if (j % 8 == 0) + else { - ofs << "\n"; + const size_t size = this->_DMR[0]->get_nnr(); + for (int i = 0; i < size; ++i) + { + this->dmr_tmp_[i] = this->dmr_origin_[i] + this->_DMR[1]->get_wrapper()[i]; + } } - ofs << " " << this->_DMK[ik + this->_nk * (ispin - 1)][i * this->_paraV->ncol + j]; - } - } - - ofs.close(); -} - -template <> -void DensityMatrix, double>::write_DMK(const std::string directory, const int ispin, const int ik) -{ - ModuleBase::TITLE("DensityMatrix", "write_DMK"); -#ifdef __DEBUG - assert(ispin > 0 && ispin <= this->_nspin); -#endif - // write - std::string fn; - fn = directory + "SPIN" + std::to_string(ispin) + "_" + std::to_string(ik) + ".dmk"; - std::ofstream ofs; - ofs.open(fn.c_str()); - if (!ofs) - { - ModuleBase::WARNING("elecstate::write_dmk", "Can't create DENSITY MATRIX File!"); - } - ofs << this->_kvec_d[ik].x << " " << this->_kvec_d[ik].y << " " << this->_kvec_d[ik].z << std::endl; - ofs << "\n " << this->_paraV->nrow << " " << this->_paraV->ncol << std::endl; - - ofs << std::setprecision(3); - ofs << std::scientific; - - for (int i = 0; i < this->_paraV->nrow; ++i) - { - for (int j = 0; j < this->_paraV->ncol; ++j) - { - if (j % 8 == 0) + break; + case 2: + // switch to magnetization density matrix, dmr_up - dmr_down + if(this->dmr_tmp_ == nullptr) { - ofs << "\n"; + const size_t size = this->_DMR[0]->get_nnr(); + this->dmr_tmp_ = new TR[size]; + this->dmr_origin_.resize(size); + for (int i = 0; i < size; ++i) + { + this->dmr_origin_[i] = this->_DMR[0]->get_wrapper()[i]; + this->dmr_tmp_[i] = this->dmr_origin_[i] - this->_DMR[1]->get_wrapper()[i]; + } + this->_DMR[0]->allocate(this->dmr_tmp_, false); } - ofs << " " << this->_DMK[ik + this->_nk * (ispin - 1)][i * this->_paraV->ncol + j].real(); + else + { + const size_t size = this->_DMR[0]->get_nnr(); + for (int i = 0; i < size; ++i) + { + this->dmr_tmp_[i] = this->dmr_origin_[i] - this->_DMR[1]->get_wrapper()[i]; + } + } + break; + default: + throw std::string("Unknown mode in switch_dmr"); } + ModuleBase::timer::tick("DensityMatrix", "switch_dmr"); } - - ofs.close(); } // T of HContainer can be double or complex diff --git a/source/module_elecstate/module_dm/density_matrix.h b/source/module_elecstate/module_dm/density_matrix.h index f712a8e4c3..d3a4d1f919 100644 --- a/source/module_elecstate/module_dm/density_matrix.h +++ b/source/module_elecstate/module_dm/density_matrix.h @@ -176,23 +176,23 @@ class DensityMatrix /** * @brief calculate density matrix DMR from dm(k) using blas::axpy + * if ik_in < 0, calculate all k-points + * if ik_in >= 0, calculate only one k-point without summing over k-points */ - void cal_DMR(); + void cal_DMR(const int ik_in = -1); /** - * @brief calculate density matrix DMR from dm(k) using blas::axpy for multi-k calculation, without summing over k-points + * @brief calculate complex density matrix DMR with both real and imaginary part for noncollinear-spin calculation + * the stored dm(k) has been used to calculate the passin DMR + * @param dmR_out pointer of HContainer object to store the calculated complex DMR */ - void cal_DMR(const int ik); + void cal_DMR_full(hamilt::HContainer>* dmR_out) const; /** - * @brief calculate density matrix DMR from dm(k) using base_matrix->add_element() + * @brief (Only nspin=2) switch DMR to total density matrix or magnetization density matrix + * @param mode 0 - original density matrix; 1 - total density matrix; 2 - magnetization density matrix */ - void cal_DMR_test(); // for reference during development - - /** - * @brief merge density matrix DMR with different spin - */ - void sum_DMR_spin(); + void switch_dmr(const int mode); /** * @brief write density matrix dm(ik) into *.dmk @@ -273,6 +273,9 @@ class DensityMatrix */ int _nk = 0; + /// temporary pointers for switch DMR, only used with nspin=2 + std::vector dmr_origin_; + TR* dmr_tmp_ = nullptr; }; diff --git a/source/module_elecstate/module_dm/density_matrix_io.cpp b/source/module_elecstate/module_dm/density_matrix_io.cpp new file mode 100644 index 0000000000..36548ae314 --- /dev/null +++ b/source/module_elecstate/module_dm/density_matrix_io.cpp @@ -0,0 +1,446 @@ +#include "density_matrix.h" + +#include "module_parameter/parameter.h" +#include "module_base/libm/libm.h" +#include "module_base/memory.h" +#include "module_base/timer.h" +#include "module_base/tool_title.h" +#include "module_cell/klist.h" + +namespace elecstate +{ + +// initialize density matrix DMR from UnitCell (mainly used in UnitTest) +template +void DensityMatrix::init_DMR(Grid_Driver* GridD_in, const UnitCell* ucell) +{ + ModuleBase::TITLE("DensityMatrix", "init_DMR"); + // ensure _DMR is empty + for (auto& it: this->_DMR) + { + delete it; + } + this->_DMR.clear(); + // construct a new DMR + hamilt::HContainer* tmp_DMR; + tmp_DMR = new hamilt::HContainer(this->_paraV); + // set up a HContainer + for (int iat1 = 0; iat1 < ucell->nat; iat1++) + { + auto tau1 = ucell->get_tau(iat1); + int T1, I1; + ucell->iat2iait(iat1, &I1, &T1); + AdjacentAtomInfo adjs; + GridD_in->Find_atom(*ucell, tau1, T1, I1, &adjs); + // std::cout << "adjs.adj_num: " <itia2iat(T2, I2); + if (this->_paraV->get_row_size(iat1) <= 0 || this->_paraV->get_col_size(iat2) <= 0) + { + continue; + } + ModuleBase::Vector3& R_index = adjs.box[ad]; + // std::cout << "R_index: " << R_index.x << " " << R_index.y << " " << R_index.z << std::endl; + hamilt::AtomPair tmp_ap(iat1, iat2, R_index, this->_paraV); + tmp_DMR->insert_pair(tmp_ap); + } + } + // allocate the memory of BaseMatrix in SR, and set the new values to zero + if (std::is_same::value) + { + tmp_DMR->fix_gamma(); + } + tmp_DMR->allocate(nullptr, true); + this->_DMR.push_back(tmp_DMR); + // add another DMR if nspin==2 + if (this->_nspin == 2) + { + hamilt::HContainer* tmp_DMR1; + tmp_DMR1 = new hamilt::HContainer(*tmp_DMR); + this->_DMR.push_back(tmp_DMR1); + } + ModuleBase::Memory::record("DensityMatrix::DMR", this->_DMR.size() * this->_DMR[0]->get_memory_size()); +} + +/// initialize density matrix DMR from UnitCell and RA (mainly used in UnitTest) +template +void DensityMatrix::init_DMR(Record_adj& ra, const UnitCell* ucell) +{ + ModuleBase::TITLE("DensityMatrix", "init_DMR"); + // ensure _DMR is empty + for (auto& it: this->_DMR) + { + delete it; + } + this->_DMR.clear(); + // construct a new DMR + hamilt::HContainer* tmp_DMR; + tmp_DMR = new hamilt::HContainer(this->_paraV); + // set up a HContainer + for (int iat1 = 0; iat1 < ucell->nat; iat1++) + { + auto tau1 = ucell->get_tau(iat1); + int T1, I1; + ucell->iat2iait(iat1, &I1, &T1); + for (int ad = 0; ad < ra.na_each[iat1]; ++ad) + { + const int T2 = ra.info[iat1][ad][3]; + const int I2 = ra.info[iat1][ad][4]; + int iat2 = ucell->itia2iat(T2, I2); + if (this->_paraV->get_row_size(iat1) <= 0 || this->_paraV->get_col_size(iat2) <= 0) + { + continue; + } + hamilt::AtomPair tmp_ap(iat1, + iat2, + ra.info[iat1][ad][0], + ra.info[iat1][ad][1], + ra.info[iat1][ad][2], + this->_paraV); + tmp_DMR->insert_pair(tmp_ap); + } + } + // allocate the memory of BaseMatrix in SR, and set the new values to zero + if (std::is_same::value) + { + tmp_DMR->fix_gamma(); + } + tmp_DMR->allocate(nullptr, true); + this->_DMR.push_back(tmp_DMR); + // add another DMR if nspin==2 + if (this->_nspin == 2) + { + hamilt::HContainer* tmp_DMR1; + tmp_DMR1 = new hamilt::HContainer(*tmp_DMR); + this->_DMR.push_back(tmp_DMR1); + } + ModuleBase::Memory::record("DensityMatrix::DMR", this->_DMR.size() * this->_DMR[0]->get_memory_size()); +} + +// initialize density matrix DMR from another HContainer (mainly used) +template +void DensityMatrix::init_DMR(const hamilt::HContainer& DMR_in) +{ + ModuleBase::TITLE("DensityMatrix", "init_DMR"); + // ensure _DMR is empty + for (auto& it: this->_DMR) + { + delete it; + } + this->_DMR.clear(); + // set up a HContainer using another one + for (int is = 0; is < this->_nspin; ++is) // loop over spin + { + hamilt::HContainer* tmp_DMR; + tmp_DMR = new hamilt::HContainer(DMR_in); + // zero.out + tmp_DMR->set_zero(); + this->_DMR.push_back(tmp_DMR); + } + ModuleBase::Memory::record("DensityMatrix::DMR", this->_DMR.size() * this->_DMR[0]->get_memory_size()); +} + +template +void DensityMatrix::init_DMR(const hamilt::HContainer& DMR_in) +{ + ModuleBase::TITLE("DensityMatrix", "init_DMR"); + // ensure _DMR is empty + for (auto& it: this->_DMR) + { + delete it; + } + this->_DMR.clear(); + // set up a HContainer using another one + int size_ap = DMR_in.size_atom_pairs(); + if (size_ap > 0) + { + const Parallel_Orbitals* paraV_ = DMR_in.get_atom_pair(0).get_paraV(); + hamilt::HContainer* tmp_DMR = new hamilt::HContainer(paraV_); + for (int iap = 0; iap < size_ap; iap++) + { + const int iat1 = DMR_in.get_atom_pair(iap).get_atom_i(); + const int iat2 = DMR_in.get_atom_pair(iap).get_atom_j(); + for (int ir = 0; ir < DMR_in.get_atom_pair(iap).get_R_size(); ir++) + { + const ModuleBase::Vector3 R_index = DMR_in.get_atom_pair(iap).get_R_index(ir); + hamilt::AtomPair tmp_ap(iat1, iat2, R_index, paraV_); + tmp_DMR->insert_pair(tmp_ap); + } + } + tmp_DMR->allocate(nullptr, true); + this->_DMR.push_back(tmp_DMR); + if (this->_nspin == 2) + { + hamilt::HContainer* tmp_DMR1 = new hamilt::HContainer(*tmp_DMR); + this->_DMR.push_back(tmp_DMR1); + } + } + ModuleBase::Memory::record("DensityMatrix::DMR", this->_DMR.size() * this->_DMR[0]->get_memory_size()); +} + +// get _DMR pointer +template +hamilt::HContainer* DensityMatrix::get_DMR_pointer(const int ispin) const +{ +#ifdef __DEBUG + assert(ispin > 0 && ispin <= this->_nspin); +#endif + return this->_DMR[ispin - 1]; +} + +// get _DMK[ik] pointer +template +TK* DensityMatrix::get_DMK_pointer(const int ik) const +{ +#ifdef __DEBUG + assert(ik < this->_nk * this->_nspin); +#endif + return const_cast(this->_DMK[ik].data()); +} + +// set DMK using a pointer +template +void DensityMatrix::set_DMK_pointer(const int ik, TK* DMK_in) +{ +#ifdef __DEBUG + assert(ik < this->_nk * this->_nspin); +#endif + this->_DMK[ik].assign(DMK_in, DMK_in + this->_paraV->nrow * this->_paraV->ncol); +} + +// set _DMK element +template +void DensityMatrix::set_DMK(const int ispin, const int ik, const int i, const int j, const TK value) +{ +#ifdef __DEBUG + assert(ispin > 0 && ispin <= this->_nspin); + assert(ik >= 0 && ik < this->_nk); +#endif + // consider transpose col=>row + this->_DMK[ik + this->_nk * (ispin - 1)][i * this->_paraV->nrow + j] = value; +} + +// set _DMK element +template +void DensityMatrix::set_DMK_zero() +{ + for (int ik = 0; ik < _nspin * _nk; ik++) + { + ModuleBase::GlobalFunc::ZEROS(this->_DMK[ik].data(), + this->_paraV->get_row_size() * this->_paraV->get_col_size()); + } +} + +// get a matrix element of density matrix dm(k) +template +TK DensityMatrix::get_DMK(const int ispin, const int ik, const int i, const int j) const +{ +#ifdef __DEBUG + assert(ispin > 0 && ispin <= this->_nspin); +#endif + // consider transpose col=>row + return this->_DMK[ik + this->_nk * (ispin - 1)][i * this->_paraV->nrow + j]; +} + +// get _DMK nks, nrow, ncol +template +int DensityMatrix::get_DMK_nks() const +{ +#ifdef __DEBUG + assert(this->_DMK.size() == _nk * _nspin); +#endif + return _nk * _nspin; +} + +template +int DensityMatrix::get_DMK_size() const +{ +#ifdef __DEBUG + assert(this->_DMK.size() != 0); +#endif + return this->_DMK.size(); +} + +template +int DensityMatrix::get_DMK_nrow() const +{ +#ifdef __DEBUG + assert(this->_DMK.size() != 0); +#endif + return this->_paraV->nrow; +} + +template +int DensityMatrix::get_DMK_ncol() const +{ +#ifdef __DEBUG + assert(this->_DMK.size() != 0); +#endif + return this->_paraV->ncol; +} + +template +void DensityMatrix::save_DMR() +{ + ModuleBase::TITLE("DensityMatrix", "save_DMR"); + ModuleBase::timer::tick("DensityMatrix", "save_DMR"); + + const int nnr = this->_DMR[0]->get_nnr(); + // allocate if _DMR_save is empty + if (_DMR_save.size() == 0) + { + _DMR_save.resize(this->_DMR.size()); + } + // resize if _DMR_save[is].size is not equal to _DMR.size + for (int is = 0; is < _DMR_save.size(); is++) + { + if (_DMR_save[is].size() != nnr) + { + _DMR_save[is].resize(nnr); + } + } + // save _DMR to _DMR_save + for (int is = 0; is < this->_DMR.size(); is++) + { + TR* DMR_pointer = this->_DMR[is]->get_wrapper(); + TR* DMR_save_pointer = _DMR_save[is].data(); + // set to zero + ModuleBase::GlobalFunc::ZEROS(DMR_save_pointer, nnr); + for (int i = 0; i < nnr; i++) + { + DMR_save_pointer[i] = DMR_pointer[i]; + } + } + + ModuleBase::timer::tick("DensityMatrix", "save_DMR"); +} + +// read *.dmk into density matrix dm(k) +template +void DensityMatrix::read_DMK(const std::string directory, const int ispin, const int ik) +{ + ModuleBase::TITLE("DensityMatrix", "read_DMK"); +#ifdef __DEBUG + assert(ispin > 0 && ispin <= this->_nspin); +#endif + // read + std::string fn; + fn = directory + "SPIN" + std::to_string(ispin) + "_" + std::to_string(ik) + ".dmk"; + // + bool quit_abacus = false; + + std::ifstream ifs; + + ifs.open(fn.c_str()); + if (!ifs) + { + quit_abacus = true; + } + else + { + // if the number is not match, + // quit the program or not. + bool quit = false; + + ModuleBase::CHECK_DOUBLE(ifs, this->_kvec_d[ik].x, quit); + ModuleBase::CHECK_DOUBLE(ifs, this->_kvec_d[ik].y, quit); + ModuleBase::CHECK_DOUBLE(ifs, this->_kvec_d[ik].z, quit); + ModuleBase::CHECK_INT(ifs, this->_paraV->nrow); + ModuleBase::CHECK_INT(ifs, this->_paraV->ncol); + } // If file exist, read in data. + // Finish reading the first part of density matrix. + + for (int i = 0; i < this->_paraV->nrow; ++i) + { + for (int j = 0; j < this->_paraV->ncol; ++j) + { + ifs >> this->_DMK[ik + this->_nk * (ispin - 1)][i * this->_paraV->ncol + j]; + } + } + ifs.close(); +} + +// output density matrix dm(k) into *.dmk +template <> +void DensityMatrix::write_DMK(const std::string directory, const int ispin, const int ik) +{ + ModuleBase::TITLE("DensityMatrix", "write_DMK"); +#ifdef __DEBUG + assert(ispin > 0 && ispin <= this->_nspin); +#endif + // write + std::string fn; + fn = directory + "SPIN" + std::to_string(ispin) + "_" + std::to_string(ik) + ".dmk"; + std::ofstream ofs; + ofs.open(fn.c_str()); + if (!ofs) + { + ModuleBase::WARNING("elecstate::write_dmk", "Can't create DENSITY MATRIX File!"); + } + ofs << this->_kvec_d[ik].x << " " << this->_kvec_d[ik].y << " " << this->_kvec_d[ik].z << std::endl; + ofs << "\n " << this->_paraV->nrow << " " << this->_paraV->ncol << std::endl; + + ofs << std::setprecision(3); + ofs << std::scientific; + + for (int i = 0; i < this->_paraV->nrow; ++i) + { + for (int j = 0; j < this->_paraV->ncol; ++j) + { + if (j % 8 == 0) + { + ofs << "\n"; + } + ofs << " " << this->_DMK[ik + this->_nk * (ispin - 1)][i * this->_paraV->ncol + j]; + } + } + + ofs.close(); +} + +template <> +void DensityMatrix, double>::write_DMK(const std::string directory, const int ispin, const int ik) +{ + ModuleBase::TITLE("DensityMatrix", "write_DMK"); +#ifdef __DEBUG + assert(ispin > 0 && ispin <= this->_nspin); +#endif + // write + std::string fn; + fn = directory + "SPIN" + std::to_string(ispin) + "_" + std::to_string(ik) + ".dmk"; + std::ofstream ofs; + ofs.open(fn.c_str()); + if (!ofs) + { + ModuleBase::WARNING("elecstate::write_dmk", "Can't create DENSITY MATRIX File!"); + } + ofs << this->_kvec_d[ik].x << " " << this->_kvec_d[ik].y << " " << this->_kvec_d[ik].z << std::endl; + ofs << "\n " << this->_paraV->nrow << " " << this->_paraV->ncol << std::endl; + + ofs << std::setprecision(3); + ofs << std::scientific; + + for (int i = 0; i < this->_paraV->nrow; ++i) + { + for (int j = 0; j < this->_paraV->ncol; ++j) + { + if (j % 8 == 0) + { + ofs << "\n"; + } + ofs << " " << this->_DMK[ik + this->_nk * (ispin - 1)][i * this->_paraV->ncol + j].real(); + } + } + + ofs.close(); +} + +// T of HContainer can be double or complex +template class DensityMatrix; // Gamma-Only case +template class DensityMatrix, double>; // Multi-k case +template class DensityMatrix, std::complex>; // For EXX in future + +} // namespace elecstate \ No newline at end of file diff --git a/source/module_elecstate/module_dm/test/CMakeLists.txt b/source/module_elecstate/module_dm/test/CMakeLists.txt index 515d5caa2d..addfb66cf0 100644 --- a/source/module_elecstate/module_dm/test/CMakeLists.txt +++ b/source/module_elecstate/module_dm/test/CMakeLists.txt @@ -11,7 +11,7 @@ endif() AddTest( TARGET dm_io_test_serial LIBS parameter ${math_libs} base device cell_info - SOURCES test_dm_io.cpp ../density_matrix.cpp + SOURCES test_dm_io.cpp ../density_matrix.cpp ../density_matrix_io.cpp ${ABACUS_SOURCE_DIR}/module_hamilt_lcao/module_hcontainer/base_matrix.cpp ${ABACUS_SOURCE_DIR}/module_hamilt_lcao/module_hcontainer/hcontainer.cpp ${ABACUS_SOURCE_DIR}/module_hamilt_lcao/module_hcontainer/atom_pair.cpp @@ -23,7 +23,7 @@ AddTest( AddTest( TARGET dm_constructor_test LIBS parameter ${math_libs} base device - SOURCES test_dm_constructor.cpp ../density_matrix.cpp tmp_mocks.cpp + SOURCES test_dm_constructor.cpp ../density_matrix.cpp ../density_matrix_io.cpp tmp_mocks.cpp ${ABACUS_SOURCE_DIR}/module_hamilt_lcao/module_hcontainer/base_matrix.cpp ${ABACUS_SOURCE_DIR}/module_hamilt_lcao/module_hcontainer/hcontainer.cpp ${ABACUS_SOURCE_DIR}/module_hamilt_lcao/module_hcontainer/atom_pair.cpp @@ -34,7 +34,7 @@ AddTest( AddTest( TARGET dm_init_test LIBS parameter ${math_libs} base device - SOURCES test_dm_R_init.cpp ../density_matrix.cpp tmp_mocks.cpp + SOURCES test_dm_R_init.cpp ../density_matrix.cpp ../density_matrix_io.cpp tmp_mocks.cpp ${ABACUS_SOURCE_DIR}/module_hamilt_lcao/module_hcontainer/base_matrix.cpp ${ABACUS_SOURCE_DIR}/module_hamilt_lcao/module_hcontainer/hcontainer.cpp ${ABACUS_SOURCE_DIR}/module_hamilt_lcao/module_hcontainer/atom_pair.cpp @@ -45,7 +45,7 @@ AddTest( AddTest( TARGET dm_cal_DMR_test LIBS parameter ${math_libs} base device - SOURCES test_cal_dm_R.cpp ../density_matrix.cpp tmp_mocks.cpp + SOURCES test_cal_dm_R.cpp ../density_matrix.cpp ../density_matrix_io.cpp tmp_mocks.cpp ${ABACUS_SOURCE_DIR}/module_hamilt_lcao/module_hcontainer/base_matrix.cpp ${ABACUS_SOURCE_DIR}/module_hamilt_lcao/module_hcontainer/hcontainer.cpp ${ABACUS_SOURCE_DIR}/module_hamilt_lcao/module_hcontainer/atom_pair.cpp diff --git a/source/module_elecstate/module_dm/test/test_cal_dm_R.cpp b/source/module_elecstate/module_dm/test/test_cal_dm_R.cpp index c0802a4942..f6456a4836 100644 --- a/source/module_elecstate/module_dm/test/test_cal_dm_R.cpp +++ b/source/module_elecstate/module_dm/test/test_cal_dm_R.cpp @@ -104,7 +104,7 @@ class DMTest : public testing::Test #endif }; -TEST_F(DMTest, cal_DMR_test) +TEST_F(DMTest, cal_DMR_full) { // get my rank of this process int my_rank = 0; @@ -123,51 +123,46 @@ TEST_F(DMTest, cal_DMR_test) } // initalize a kvectors, Gamma-only K_Vectors* kv = nullptr; - int nspin = 2; + int nspin = 4; int nks = 2; // since nspin = 2 kv = new K_Vectors; kv->set_nks(nks); kv->kvec_d.resize(nks); // construct DM - elecstate::DensityMatrix DM(paraV, nspin, kv->kvec_d, kv->get_nks() / nspin); + elecstate::DensityMatrix, double> DM(paraV, nspin, kv->kvec_d, kv->get_nks()); // set this->_DMK for (int is = 1; is <= nspin; is++) { - for (int ik = 0; ik < kv->get_nks() / nspin; ik++) + for (int ik = 0; ik < kv->get_nks(); ik++) { for (int i = 0; i < paraV->nrow; i++) { for (int j = 0; j < paraV->ncol; j++) { - DM.set_DMK(is, ik, i, j, 0.77); + DM.set_DMK(is, ik, i, j, std::complex(0.77, 0.77)); } } } } - // initialize this->_DMR - Grid_Driver gd(0,0); - DM.init_DMR(&gd, &ucell); - // set Gamma-only - for (int is = 1; is <= nspin; is++) - { - DM.get_DMR_pointer(is)->fix_gamma(); - } + // initialize dmR_full + hamilt::HContainer> dmR_full(ucell, paraV); // calculate this->_DMR std::chrono::high_resolution_clock::time_point start_time = std::chrono::high_resolution_clock::now(); - DM.cal_DMR_test(); + DM.cal_DMR_full(&dmR_full); std::chrono::high_resolution_clock::time_point end_time = std::chrono::high_resolution_clock::now(); std::chrono::duration elapsed_time = std::chrono::duration_cast>(end_time - start_time); std::cout << "my rank: " << my_rank << " elapsed time blas: " << elapsed_time.count() << std::endl; // compare the result - for (int i = 0; i < DM.get_DMR_pointer(1)->size_atom_pairs(); i++) + for (int i = 0; i < dmR_full.size_atom_pairs(); i++) { - double* ptr1 = DM.get_DMR_pointer(1)->get_atom_pair(i).get_HR_values(0, 0, 0).get_pointer(); + const std::complex* ptr1 = dmR_full.get_atom_pair(i).get_HR_values(0, 0, 0).get_pointer(); // - for (int j = 0; j < DM.get_DMR_pointer(1)->get_atom_pair(i).get_size(); j++) + for (int j = 0; j < dmR_full.get_atom_pair(i).get_size(); j++) { - // std::cout << "my rank: " << my_rank << " i: " << i << " j: " << j << " value: " << ptr1[j] << std::endl; - EXPECT_NEAR(ptr1[j], 0.77, 1e-10); + //std::cout << "my rank: " << my_rank << " i: " << i << " j: " << j << " value: " << ptr1[j] << std::endl; + EXPECT_NEAR(ptr1[j].real(), 1.54, 1e-10); + EXPECT_NEAR(ptr1[j].imag(), 1.54, 1e-10); } } delete kv; @@ -316,8 +311,8 @@ TEST_F(DMTest, cal_DMR_blas_complex) EXPECT_NEAR(ptr1[j], -0.77 * 2, 1e-10); } } - // merge DMR - DM.sum_DMR_spin(); + // calculate DMR_total + DM.switch_dmr(1); // compare the result for spin-up after sum for (int i = 0; i < DM.get_DMR_pointer(1)->size_atom_pairs(); i++) { @@ -329,6 +324,30 @@ TEST_F(DMTest, cal_DMR_blas_complex) EXPECT_NEAR(ptr1[j], -0.77 * 3, 1e-10); } } + // restore to normal DMR + DM.switch_dmr(0); + for (int i = 0; i < DM.get_DMR_pointer(1)->size_atom_pairs(); i++) + { + double* ptr1 = DM.get_DMR_pointer(1)->get_atom_pair(i).get_HR_values(1, 1, 1).get_pointer(); + // + for (int j = 0; j < DM.get_DMR_pointer(1)->get_atom_pair(i).get_size(); j++) + { + //std::cout << "my rank: " << my_rank << " i: " << i << " j: " << j << " value: " << ptr1[j] << std::endl; + EXPECT_NEAR(ptr1[j], -0.77, 1e-10); + } + } + // calculate DMR_differenct + DM.switch_dmr(2); + for (int i = 0; i < DM.get_DMR_pointer(1)->size_atom_pairs(); i++) + { + double* ptr1 = DM.get_DMR_pointer(1)->get_atom_pair(i).get_HR_values(1, 1, 1).get_pointer(); + // + for (int j = 0; j < DM.get_DMR_pointer(1)->get_atom_pair(i).get_size(); j++) + { + //std::cout << "my rank: " << my_rank << " i: " << i << " j: " << j << " value: " << ptr1[j] << std::endl; + EXPECT_NEAR(ptr1[j], 0.77, 1e-10); + } + } delete kv; } diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/CMakeLists.txt b/source/module_hamilt_lcao/hamilt_lcaodft/CMakeLists.txt index 72209f066e..ef3c266c64 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/CMakeLists.txt +++ b/source/module_hamilt_lcao/hamilt_lcaodft/CMakeLists.txt @@ -21,8 +21,6 @@ if(ENABLE_LCAO) FORCE_k.cpp stress_tools.cpp edm.cpp - fvnl_dbeta_gamma.cpp - fvnl_dbeta_k.cpp grid_init.cpp spar_dh.cpp spar_exx.cpp diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE.h b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE.h index 51d840089d..ecd6d11b6a 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE.h +++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE.h @@ -116,17 +116,6 @@ class Force_LCAO ModuleBase::matrix& stvnl_dphi, Record_adj* ra = nullptr); - void cal_fvnl_dbeta(const elecstate::DensityMatrix* dm, - const Parallel_Orbitals& pv, - const UnitCell& ucell, - const LCAO_Orbitals& orb, - const TwoCenterIntegrator& intor_orb_beta, - Grid_Driver& gd, - const bool isforce, - const bool isstress, - ModuleBase::matrix& fvnl_dbeta, - ModuleBase::matrix& svnl_dbeta); - //------------------------------------------- // forces related to local pseudopotentials //------------------------------------------- diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp index a3258438cc..30782f9fbd 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp @@ -18,6 +18,8 @@ #include "module_hamilt_lcao/module_deepks/LCAO_deepks_io.h" // mohan add 2024-07-22 #endif #include "module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.h" +#include "module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.h" +#include "module_elecstate/elecstate_lcao.h" template Force_Stress_LCAO::Force_Stress_LCAO(Record_adj& ra, const int nat_in) : RA(&ra), f_pw(nat_in), nat(nat_in) @@ -168,6 +170,54 @@ void Force_Stress_LCAO::getForceStress(const bool isforce, orb, pv, kv); + // calculate force and stress for Nonlocal part + if(PARAM.inp.nspin == 1 || PARAM.inp.nspin == 2) + { + hamilt::NonlocalNew> tmp_nonlocal( + nullptr, + kv.kvec_d, + nullptr, + &GlobalC::ucell, + orb.cutoffs(), + &GlobalC::GridD, + two_center_bundle.overlap_orb_beta.get() + ); + + const auto* dm_p = dynamic_cast*>(pelec)->get_DM(); + if(PARAM.inp.nspin == 2) + { + const_cast*>(dm_p)->switch_dmr(1); + } + const hamilt::HContainer* dmr = dm_p->get_DMR_pointer(1); + tmp_nonlocal.cal_force_stress(isforce, isstress, dmr, fvnl_dbeta, svnl_dbeta); + if(PARAM.inp.nspin == 2) + { + const_cast*>(dm_p)->switch_dmr(0); + } + } + else if(PARAM.inp.nspin == 4) + { + hamilt::NonlocalNew, std::complex>> tmp_nonlocal( + nullptr, + kv.kvec_d, + nullptr, + &GlobalC::ucell, + orb.cutoffs(), + &GlobalC::GridD, + two_center_bundle.overlap_orb_beta.get() + ); + + // calculate temporary complex DMR for nonlocal force&stress + // In fact, only SOC part need the imaginary part of DMR for correct force&stress + const auto* dm_p = dynamic_cast>*>(pelec)->get_DM(); + hamilt::HContainer> tmp_dmr(dm_p->get_DMR_pointer(1)->get_paraV()); + std::vector ijrs = dm_p->get_DMR_pointer(1)->get_ijr_info(); + tmp_dmr.insert_ijrs(&ijrs); + tmp_dmr.allocate(); + dm_p->cal_DMR_full(&tmp_dmr); + tmp_nonlocal.cal_force_stress(isforce, isstress, &tmp_dmr, fvnl_dbeta, svnl_dbeta); + } + //! forces and stress from vdw // Peize Lin add 2014-04-04, update 2021-03-09 diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp index db631841cd..dcf34c95b1 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp @@ -106,15 +106,6 @@ void Force_LCAO::allocate(const Parallel_Orbitals& pv, &GlobalC::GridD, nullptr); - LCAO_domain::build_Nonlocal_mu_new(pv, - fsr, - nullptr, - cal_deri, - GlobalC::ucell, - orb, - *(two_center_bundle.overlap_orb_beta), - &GlobalC::GridD); - // calculate asynchronous S matrix to output for Hefei-NAMD if (PARAM.inp.cal_syns) { @@ -228,17 +219,6 @@ void Force_LCAO::ftable(const bool isforce, //tvnl_dphi PulayForceStress::cal_pulay_fs(ftvnl_dphi, stvnl_dphi, *dm, ucell, pv, dHx, dHxy, isforce, isstress); - this->cal_fvnl_dbeta(dm, - pv, - ucell, - orb, - *(two_center_bundle.overlap_orb_beta), - GlobalC::GridD, - isforce, - isstress, - fvnl_dbeta, - svnl_dbeta); - // vl_dphi PulayForceStress::cal_pulay_fs(fvl_dphi, svl_dphi, *dm, ucell, pelec->pot, gint, isforce, isstress, false/*reset dm to gint*/); diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp index 1c4b10f251..7f6d4d1c1c 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp @@ -131,16 +131,6 @@ void Force_LCAO>::allocate(const Parallel_Orbitals& pv, &GlobalC::GridD, nullptr); // delete lm.Hloc_fixedR - // calculate dVnl= in LCAO - LCAO_domain::build_Nonlocal_mu_new(pv, - fsr, - nullptr, - cal_deri, - GlobalC::ucell, - orb, - *(two_center_bundle.overlap_orb_beta), - &GlobalC::GridD); - // calculate asynchronous S matrix to output for Hefei-NAMD if (PARAM.inp.cal_syns) { @@ -329,17 +319,6 @@ void Force_LCAO>::ftable(const bool isforce, // vl_dphi PulayForceStress::cal_pulay_fs(fvl_dphi, svl_dphi, *dm, ucell, pelec->pot, gint, isforce, isstress, false/*reset dm to gint*/); - this->cal_fvnl_dbeta(dm, - pv, - ucell, - orb, - *(two_center_bundle.overlap_orb_beta), - GlobalC::GridD, - isforce, - isstress, - fvnl_dbeta, - svnl_dbeta); - #ifdef __DEEPKS if (PARAM.inp.deepks_scf) { diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_init_basis.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_init_basis.cpp index e0dc45bd6d..f500cdb016 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_init_basis.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_init_basis.cpp @@ -79,7 +79,9 @@ void init_basis_lcao(Parallel_Orbitals& pv, try_nb += pv.set_nloc_wfc_Eij(PARAM.inp.nbands, GlobalV::ofs_running, GlobalV::ofs_warning); if (try_nb != 0) { - pv.set(nlocal, nlocal, 1, pv.blacs_ctxt); + // fall back to the minimum size, 1 or 2 (nspin=4) + const int min_size = (PARAM.inp.nspin == 4) ? 2 : 1; + pv.set(nlocal, nlocal, min_size, pv.blacs_ctxt); try_nb = pv.set_nloc_wfc_Eij(PARAM.inp.nbands, GlobalV::ofs_running, GlobalV::ofs_warning); } diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/edm.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/edm.cpp index c8b9e01b94..49074830a8 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/edm.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/edm.cpp @@ -99,6 +99,5 @@ elecstate::DensityMatrix, double> Force_LCAO - -template <> -void Force_LCAO::cal_fvnl_dbeta(const elecstate::DensityMatrix* dm, - const Parallel_Orbitals& pv, - const UnitCell& ucell, - const LCAO_Orbitals& orb, - const TwoCenterIntegrator& intor_orb_beta, - Grid_Driver& gd, - const bool isforce, - const bool isstress, - ModuleBase::matrix& fvnl_dbeta, - ModuleBase::matrix& svnl_dbeta) -{ - ModuleBase::TITLE("Force_LCAO", "cal_fvnl_dbeta"); - ModuleBase::timer::tick("Force_LCAO", "cal_fvnl_dbeta"); - -// use schedule(dynamic) for load balancing because adj_num is various -#pragma omp parallel -{ - ModuleBase::matrix local_svnl_dbeta(3, 3); - #pragma omp for schedule(dynamic) - for (int iat = 0; iat < ucell.nat; iat++) - { - const int it = ucell.iat2it[iat]; - const int ia = ucell.iat2ia[iat]; - const int T0 = it; - const int I0 = ia; - double r0[3]{0}; - double r1[3]{0}; - - const ModuleBase::Vector3 tau0 = ucell.atoms[it].tau[ia]; - // find ajacent atom of atom ia - // gd.Find_atom( ucell.atoms[it].tau[ia] ); - AdjacentAtomInfo adjs; - gd.Find_atom(ucell, tau0, it, ia, &adjs); - const double Rcut_Beta = ucell.infoNL.Beta[it].get_rcut_max(); - - std::vector>>> nlm_tot; - nlm_tot.resize(adjs.adj_num + 1); // this saves and - - // Step 1 : Calculate and save and - for (int ad1 = 0; ad1 < adjs.adj_num + 1; ad1++) - { - const int T1 = adjs.ntype[ad1]; - const Atom* atom1 = &ucell.atoms[T1]; - const int I1 = adjs.natom[ad1]; - const int start1 = ucell.itiaiw2iwt(T1, I1, 0); - const ModuleBase::Vector3 tau1 = adjs.adjacent_tau[ad1]; - const double Rcut_AO1 = orb.Phi[T1].getRcut(); - - nlm_tot[ad1].clear(); - - const double dist1 = (tau1 - tau0).norm() * ucell.lat0; - if (dist1 > Rcut_Beta + Rcut_AO1) - { - continue; - } - - for (int iw1 = 0; iw1 < ucell.atoms[T1].nw; ++iw1) - { - const int iw1_all = start1 + iw1; - const int iw1_local = pv.global2local_row(iw1_all); - const int iw2_local = pv.global2local_col(iw1_all); - - if (iw1_local < 0 && iw2_local < 0) - { - continue; - } - - std::vector> nlm; - - int L1 = atom1->iw2l[iw1]; - int N1 = atom1->iw2n[iw1]; - int m1 = atom1->iw2m[iw1]; - - // convert m (0,1,...2l) to M (-l, -l+1, ..., l-1, l) - int M1 = (m1 % 2 == 0) ? -m1 / 2 : (m1 + 1) / 2; - - ModuleBase::Vector3 dtau = ucell.atoms[T0].tau[I0] - tau1; - - intor_orb_beta.snap(T1, L1, N1, M1, T0, dtau * ucell.lat0, true, nlm); - - assert(nlm.size() == 4); - nlm_tot[ad1].insert({iw1, nlm}); - } - } // ad - - // Step 2 : sum to get beta - for (int ad1 = 0; ad1 < adjs.adj_num + 1; ++ad1) - { - const int T1 = adjs.ntype[ad1]; - const Atom* atom1 = &ucell.atoms[T1]; - const int I1 = adjs.natom[ad1]; - const int start1 = ucell.itiaiw2iwt(T1, I1, 0); - const ModuleBase::Vector3 tau1 = adjs.adjacent_tau[ad1]; - const double Rcut_AO1 = orb.Phi[T1].getRcut(); - - for (int ad2 = 0; ad2 < adjs.adj_num + 1; ad2++) - { - const int T2 = adjs.ntype[ad2]; - const Atom* atom2 = &ucell.atoms[T2]; - const int I2 = adjs.natom[ad2]; - const int start2 = ucell.itiaiw2iwt(T2, I2, 0); - const ModuleBase::Vector3 tau2 = adjs.adjacent_tau[ad2]; - const double Rcut_AO2 = orb.Phi[T2].getRcut(); - - const double dist1 = (tau1 - tau0).norm() * ucell.lat0; - const double dist2 = (tau2 - tau0).norm() * ucell.lat0; - if (isstress) - { - r1[0] = (tau1.x - tau0.x); - r1[1] = (tau1.y - tau0.y); - r1[2] = (tau1.z - tau0.z); - r0[0] = (tau2.x - tau0.x); - r0[1] = (tau2.y - tau0.y); - r0[2] = (tau2.z - tau0.z); - } - - if (dist1 > Rcut_Beta + Rcut_AO1 || dist2 > Rcut_Beta + Rcut_AO2) - { - continue; - } - - for (int iw1 = 0; iw1 < ucell.atoms[T1].nw; ++iw1) - { - const int iw1_all = start1 + iw1; - const int iw1_local = pv.global2local_row(iw1_all); - if (iw1_local < 0) - { - continue; - } - for (int iw2 = 0; iw2 < ucell.atoms[T2].nw; ++iw2) - - { - const int iw2_all = start2 + iw2; - const int iw2_local = pv.global2local_col(iw2_all); - - if (iw2_local < 0) - { - continue; - } - - double nlm[3] = {0, 0, 0}; - double nlm_t[3] = {0, 0, 0}; // transpose - - const double* nlm1 = nlm_tot[ad1][iw1][0].data(); - const double* nlm2[3]; - for (int i = 0; i < 3; i++) - { - nlm2[i] = nlm_tot[ad2][iw2][i + 1].data(); - } - - const double* tmp_d = nullptr; - for (int no = 0; no < ucell.atoms[T0].ncpp.non_zero_count_soc[0]; no++) - { - const int p1 = ucell.atoms[T0].ncpp.index1_soc[0][no]; - const int p2 = ucell.atoms[T0].ncpp.index2_soc[0][no]; - ucell.atoms[T0].ncpp.get_d(0, p1, p2, tmp_d); - for (int ir = 0; ir < 3; ir++) - { - nlm[ir] += nlm2[ir][p2] * nlm1[p1] * (*tmp_d); - } - } - - if (isstress) - { - nlm1 = nlm_tot[ad2][iw2][0].data(); - for (int i = 0; i < 3; i++) - { - nlm2[i] = nlm_tot[ad1][iw1][i + 1].data(); - } - - const double* tmp_d = nullptr; - for (int no = 0; no < ucell.atoms[T0].ncpp.non_zero_count_soc[0]; no++) - { - const int p1 = ucell.atoms[T0].ncpp.index1_soc[0][no]; - const int p2 = ucell.atoms[T0].ncpp.index2_soc[0][no]; - ucell.atoms[T0].ncpp.get_d(0, p1, p2, tmp_d); - for (int ir = 0; ir < 3; ir++) - { - nlm_t[ir] += nlm2[ir][p1] * nlm1[p2] * (*tmp_d); - } - } - } - // dbeta is minus, that's consistent. - // only one projector for each atom force. - - double sum = 0.0; - for (int is = 0; is < PARAM.inp.nspin; ++is) - { - // sum += dm2d[is](iw2_local, iw1_local); - sum += dm->get_DMK(is + 1, 0, iw2_local, iw1_local); - } - sum *= 2.0; - - if (isforce) - { - fvnl_dbeta(iat, 0) -= sum * nlm[0]; - fvnl_dbeta(iat, 1) -= sum * nlm[1]; - fvnl_dbeta(iat, 2) -= sum * nlm[2]; - } - - if (isstress) - { - for (int ipol = 0; ipol < 3; ipol++) - { - for (int jpol = ipol; jpol < 3; jpol++) - { - local_svnl_dbeta(ipol, jpol) - += sum / 2.0 * (nlm[ipol] * r0[jpol] + nlm_t[ipol] * r1[jpol]); - } - } - } - } // iw2 - } // iw1 - } // ad2 - } // ad1 - } // iat - - // sum up local_svnl_dbeta to svnl_dbeta - if (isstress) - { - #pragma omp critical - { - for (int ipol = 0; ipol < 3; ipol++) - { - for (int jpol = ipol; jpol < 3; jpol++) - { - svnl_dbeta(ipol, jpol) += local_svnl_dbeta(ipol, jpol); - } - } - } - } -} - if (isstress) - { - StressTools::stress_fill(ucell.lat0, ucell.omega, svnl_dbeta); - } - - ModuleBase::timer::tick("Force_LCAO", "cal_fvnl_dbeta"); -} diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/fvnl_dbeta_k.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/fvnl_dbeta_k.cpp deleted file mode 100644 index b57ad57d73..0000000000 --- a/source/module_hamilt_lcao/hamilt_lcaodft/fvnl_dbeta_k.cpp +++ /dev/null @@ -1,634 +0,0 @@ -#include "FORCE.h" -#include "module_base/memory.h" -#include "module_parameter/parameter.h" -#include "module_base/parallel_reduce.h" -#include "module_base/timer.h" -#include "module_base/tool_threading.h" -#include "module_cell/module_neighbor/sltk_grid_driver.h" -#include "module_elecstate/cal_dm.h" -#include "module_elecstate/elecstate_lcao.h" -#include "module_elecstate/module_dm/cal_dm_psi.h" -#include "module_hamilt_pw/hamilt_pwdft/global.h" -#include "module_io/write_HS.h" -#include "module_hamilt_lcao/hamilt_lcaodft/stress_tools.h" - -#include -#include - -#ifdef __DEEPKS -#include "module_hamilt_lcao/module_deepks/LCAO_deepks.h" -#endif - -#ifdef _OPENMP -#include -#endif - -typedef std::tuple key_tuple; - -// must consider three-center H matrix. -template <> -void Force_LCAO>::cal_fvnl_dbeta(const elecstate::DensityMatrix, double>* dm, - const Parallel_Orbitals& pv, - const UnitCell& ucell, - const LCAO_Orbitals& orb, - const TwoCenterIntegrator& intor_orb_beta, - Grid_Driver& gd, - const bool isforce, - const bool isstress, - ModuleBase::matrix& fvnl_dbeta, - ModuleBase::matrix& svnl_dbeta) -{ - ModuleBase::TITLE("Force_LCAO", "cal_fvnl_dbeta"); - ModuleBase::timer::tick("Force_LCAO", "cal_fvnl_dbeta"); - - const int nspin = PARAM.inp.nspin; - const int nspin_DMR = (nspin == 2) ? 2 : 1; - const int npol = PARAM.globalv.npol; - - // Data structure for storing , for a detailed description - // check out the same data structure in build_Nonlocal_mu_new - std::vector>>>> nlm_tot; - - nlm_tot.resize(ucell.nat); - -#ifdef _OPENMP -// use schedule(dynamic) for load balancing because adj_num is various -#pragma omp parallel for schedule(dynamic) -#endif - for (int iat = 0; iat < ucell.nat; iat++) - { - - const int it = ucell.iat2it[iat]; - const int ia = ucell.iat2ia[iat]; - - // Step 1 : generate - // type of atom; distance; atomic basis; projectors - - const double Rcut_Beta = ucell.infoNL.Beta[it].get_rcut_max(); - const ModuleBase::Vector3 tau = ucell.atoms[it].tau[ia]; - AdjacentAtomInfo adjs; - gd.Find_atom(ucell, tau, it, ia, &adjs); - - nlm_tot[iat].clear(); - - for (int ad = 0; ad < adjs.adj_num + 1; ++ad) - { - const int T1 = adjs.ntype[ad]; - const int I1 = adjs.natom[ad]; - const int start1 = ucell.itiaiw2iwt(T1, I1, 0); - const double Rcut_AO1 = orb.Phi[T1].getRcut(); - - const ModuleBase::Vector3& tau1 = adjs.adjacent_tau[ad]; - const Atom* atom1 = &ucell.atoms[T1]; - const int nw1_tot = atom1->nw * npol; - - const ModuleBase::Vector3 dtau = tau1 - tau; - const double dist1 = dtau.norm2() * ucell.lat0 * ucell.lat0; - if (dist1 > (Rcut_Beta + Rcut_AO1) * (Rcut_Beta + Rcut_AO1)) - { - continue; - } - - std::unordered_map>> nlm_cur; - nlm_cur.clear(); - - for (int iw1 = 0; iw1 < nw1_tot; ++iw1) - { - const int iw1_all = start1 + iw1; - const int iw1_local = pv.global2local_row(iw1_all); - const int iw2_local = pv.global2local_col(iw1_all); - if (iw1_local < 0 && iw2_local < 0) - { - continue; - } - const int iw1_0 = iw1 / npol; - std::vector> nlm; - int L1 = atom1->iw2l[iw1_0]; - int N1 = atom1->iw2n[iw1_0]; - int m1 = atom1->iw2m[iw1_0]; - - // convert m (0,1,...2l) to M (-l, -l+1, ..., l-1, l) - int M1 = (m1 % 2 == 0) ? -m1 / 2 : (m1 + 1) / 2; - - ModuleBase::Vector3 dtau = tau - tau1; - intor_orb_beta.snap(T1, L1, N1, M1, it, dtau * ucell.lat0, true, nlm); - - nlm_cur.insert({iw1_all, nlm}); - } // end iw - const int iat1 = ucell.itia2iat(T1, I1); - const int rx1 = adjs.box[ad].x; - const int ry1 = adjs.box[ad].y; - const int rz1 = adjs.box[ad].z; - key_tuple key_1(iat1, rx1, ry1, rz1); - nlm_tot[iat][key_1] = nlm_cur; - } // end ad - } - - //======================================================= - // Step2: - // calculate sum_(L0,M0) beta - // and accumulate the value to Hloc_fixedR(i,j) - //======================================================= - int total_nnr = 0; -#ifdef _OPENMP -#pragma omp parallel reduction(+ : total_nnr) - { - ModuleBase::matrix local_svnl_dbeta(3, 3); - const int num_threads = omp_get_num_threads(); -#else - ModuleBase::matrix& local_svnl_dbeta = svnl_dbeta; -#endif - - ModuleBase::Vector3 tau1; - ModuleBase::Vector3 tau2; - ModuleBase::Vector3 dtau; - ModuleBase::Vector3 tau0; - ModuleBase::Vector3 dtau1; - ModuleBase::Vector3 dtau2; - - double rcut = 0.0; - double distance = 0.0; - - double rcut1 = 0.0; - double rcut2 = 0.0; - double distance1 = 0.0; - double distance2 = 0.0; - -#ifdef _OPENMP -// use schedule(dynamic) for load balancing because adj_num is various -#pragma omp for schedule(dynamic) -#endif - for (int iat1 = 0; iat1 < ucell.nat; iat1++) - { - const int T1 = ucell.iat2it[iat1]; - const Atom* atom1 = &ucell.atoms[T1]; - - { - const int I1 = ucell.iat2ia[iat1]; - tau1 = atom1->tau[I1]; - AdjacentAtomInfo adjs; - gd.Find_atom(ucell, tau1, T1, I1, &adjs); - const int start1 = ucell.itiaiw2iwt(T1, I1, 0); - int nnr = pv.nlocstart[iat1]; - - /* - !!!!!!!!!!!!!!!! - This optimization is also improving the performance of single thread. - Making memory access more linearly in the core loop - */ - bool iat_recorded = false; - bool force_updated = false; - // record iat of adjs - std::vector adj_iat; - // record fvnl_dbeta diff of adjs - std::vector adj_fvnl_dbeta; - if (isforce) - { - adj_iat.resize(adjs.adj_num + 1); - adj_fvnl_dbeta.resize((adjs.adj_num + 1) * 3, 0.0); - } - - for (int ad2 = 0; ad2 < adjs.adj_num + 1; ++ad2) - { - const int T2 = adjs.ntype[ad2]; - const Atom* atom2 = &ucell.atoms[T2]; - - const int I2 = adjs.natom[ad2]; - const int iat2 = ucell.itia2iat(T2, I2); - const int start2 = ucell.itiaiw2iwt(T2, I2, 0); - tau2 = adjs.adjacent_tau[ad2]; - - const int rx2 = adjs.box[ad2].x; - const int ry2 = adjs.box[ad2].y; - const int rz2 = adjs.box[ad2].z; - - dtau = tau2 - tau1; - distance = dtau.norm2() * (ucell.lat0 * ucell.lat0); - // this rcut is in order to make nnr consistent - // with other matrix. - rcut = (orb.Phi[T1].getRcut() + orb.Phi[T2].getRcut()) * (orb.Phi[T1].getRcut() + orb.Phi[T2].getRcut()); - - // check if this a adjacent atoms. - bool is_adj = false; - if (distance < rcut) - { - is_adj = true; - } - else if (distance >= rcut) - { - for (int ad0 = 0; ad0 < adjs.adj_num + 1; ++ad0) - { - const int T0 = adjs.ntype[ad0]; - if (ucell.infoNL.nproj[T0] == 0) - { - continue; - } - const int I0 = adjs.natom[ad0]; - // const int iat0 = ucell.itia2iat(T0, I0); - // const int start0 = ucell.itiaiw2iwt(T0, I0, 0); - - tau0 = adjs.adjacent_tau[ad0]; - dtau1 = tau0 - tau1; - distance1 = dtau1.norm() * ucell.lat0; - rcut1 = orb.Phi[T1].getRcut() + ucell.infoNL.Beta[T0].get_rcut_max(); - - dtau2 = tau0 - tau2; - distance2 = dtau2.norm() * ucell.lat0; - rcut2 = orb.Phi[T2].getRcut() + ucell.infoNL.Beta[T0].get_rcut_max(); - - if (distance1 < rcut1 && distance2 < rcut2) - { - is_adj = true; - break; - } - } - } - - if (is_adj) - { - // basematrix and its data pointer - if (pv.get_row_size(iat1) <= 0 || pv.get_col_size(iat2) <= 0) - { - continue; - } - std::vector tmp_matrix_ptr; - for (int is = 0; is < nspin_DMR; ++is) - { - auto* tmp_base_matrix = dm->get_DMR_pointer(is + 1)->find_matrix(iat1, iat2, rx2, ry2, rz2); - tmp_matrix_ptr.push_back(tmp_base_matrix->get_pointer()); - } - - for (int ad0 = 0; ad0 < adjs.adj_num + 1; ++ad0) - { - const int T0 = adjs.ntype[ad0]; - const int I0 = adjs.natom[ad0]; - const int iat = ucell.itia2iat(T0, I0); - if (!iat_recorded && isforce) - { - adj_iat[ad0] = iat; - } - - // mohan add 2010-12-19 - if (ucell.infoNL.nproj[T0] == 0) - { - continue; - } - - // const int I0 = gd.getNatom(ad0); - // const int start0 = ucell.itiaiw2iwt(T0, I0, 0); - tau0 = adjs.adjacent_tau[ad0]; - - dtau1 = tau0 - tau1; - dtau2 = tau0 - tau2; - const double distance1 = dtau1.norm2() * ucell.lat0 * ucell.lat0; - const double distance2 = dtau2.norm2() * ucell.lat0 * ucell.lat0; - - // seems a bug here!! mohan 2011-06-17 - rcut1 = (orb.Phi[T1].getRcut() + ucell.infoNL.Beta[T0].get_rcut_max()) * (orb.Phi[T1].getRcut() + ucell.infoNL.Beta[T0].get_rcut_max()); - rcut2 = (orb.Phi[T2].getRcut() + ucell.infoNL.Beta[T0].get_rcut_max()) * (orb.Phi[T2].getRcut() + ucell.infoNL.Beta[T0].get_rcut_max()); - - double r0[3]; - double r1[3]; - r1[0] = (tau1.x - tau0.x); - r1[1] = (tau1.y - tau0.y); - r1[2] = (tau1.z - tau0.z); - r0[0] = (tau2.x - tau0.x); - r0[1] = (tau2.y - tau0.y); - r0[2] = (tau2.z - tau0.z); - - if (distance1 >= rcut1 || distance2 >= rcut2) - { - continue; - } - - const int rx0 = adjs.box[ad0].x; - const int ry0 = adjs.box[ad0].y; - const int rz0 = adjs.box[ad0].z; - key_tuple key1(iat1, -rx0, -ry0, -rz0); - key_tuple key2(iat2, rx2 - rx0, ry2 - ry0, rz2 - rz0); - - int nnr_inner = 0; - int dhsize = pv.get_row_size(iat1) * pv.get_col_size(iat2); - std::vector> dhvnl(3, std::vector(dhsize, 0.0)); - std::vector> dhvnl1(3, std::vector(dhsize, 0.0)); - for (int j = 0; j < atom1->nw * npol; j++) - { - const int j0 = j / npol; // added by zhengdy-soc - const int iw1_all = start1 + j; - const int mu = pv.global2local_row(iw1_all); - if (mu < 0) - { - continue; - } - - for (int k = 0; k < atom2->nw * npol; k++) - { - const int k0 = k / npol; - const int iw2_all = start2 + k; - const int nu = pv.global2local_col(iw2_all); - if (nu < 0) - { - continue; - } - - if (nspin == 4) - { - const double* nlm_1 = nlm_tot[iat][key2][iw2_all][0].data(); - const double* nlm_2[3]; - for (int i = 0; i < 3; i++) - { - nlm_2[i] = nlm_tot[iat][key1][iw1_all][i + 1].data(); - } - int is0 = (j - j0 * npol) + (k - k0 * npol) * 2; - for (int no = 0; no < ucell.atoms[T0].ncpp.non_zero_count_soc[is0]; no++) - { - const int p1 = ucell.atoms[T0].ncpp.index1_soc[is0][no]; - const int p2 = ucell.atoms[T0].ncpp.index2_soc[is0][no]; - for (int ir = 0; ir < 3; ir++) - { - if (is0 == 0) - { - dhvnl[ir][nnr_inner] - += nlm_2[ir][p1] * nlm_1[p2] - * (ucell.atoms[T0].ncpp.d_so(0, p2, p1).real() - + ucell.atoms[T0].ncpp.d_so(3, p2, p1).real()) - * 0.5; - } - else if (is0 == 1) - { - dhvnl[ir][nnr_inner] - += nlm_2[ir][p1] * nlm_1[p2] - * (ucell.atoms[T0].ncpp.d_so(1, p2, p1).real() - + ucell.atoms[T0].ncpp.d_so(2, p2, p1).real()) - * 0.5; - } - else if (is0 == 2) - { - dhvnl[ir][nnr_inner] - += nlm_2[ir][p1] * nlm_1[p2] - * (-ucell.atoms[T0].ncpp.d_so(1, p2, p1).imag() - + ucell.atoms[T0].ncpp.d_so(2, p2, p1).imag()) - * 0.5; - } - else if (is0 == 3) - { - dhvnl[ir][nnr_inner] - += nlm_2[ir][p1] * nlm_1[p2] - * (ucell.atoms[T0].ncpp.d_so(0, p2, p1).real() - - ucell.atoms[T0].ncpp.d_so(3, p2, p1).real()) - * 0.5; - } - } - } - if (isstress) - { - const double* nlm_1 = nlm_tot[iat][key1][iw1_all][0].data(); - const double* nlm_2[3]; - for (int i = 0; i < 3; i++) - { - nlm_2[i] = nlm_tot[iat][key2][iw2_all][i + 1].data(); - } - int is0 = (j - j0 * npol) + (k - k0 * npol) * 2; - for (int no = 0; no < ucell.atoms[T0].ncpp.non_zero_count_soc[is0]; no++) - { - const int p1 = ucell.atoms[T0].ncpp.index1_soc[is0][no]; - const int p2 = ucell.atoms[T0].ncpp.index2_soc[is0][no]; - for (int ir = 0; ir < 3; ir++) - { - if (is0 == 0) - { - dhvnl1[ir][nnr_inner] - += nlm_2[ir][p1] * nlm_1[p2] - * (ucell.atoms[T0].ncpp.d_so(0, p2, p1).real() - + ucell.atoms[T0].ncpp.d_so(3, p2, p1).real()) - * 0.5; - } - else if (is0 == 1) - { - dhvnl1[ir][nnr_inner] - += nlm_2[ir][p1] * nlm_1[p2] - * (ucell.atoms[T0].ncpp.d_so(1, p2, p1).real() - + ucell.atoms[T0].ncpp.d_so(2, p2, p1).real()) - * 0.5; - } - else if (is0 == 2) - { - dhvnl1[ir][nnr_inner] - += nlm_2[ir][p1] * nlm_1[p2] - * (-ucell.atoms[T0].ncpp.d_so(1, p2, p1).imag() - + ucell.atoms[T0].ncpp.d_so(2, p2, p1).imag()) - * 0.5; - } - else if (is0 == 3) - { - dhvnl1[ir][nnr_inner] - += nlm_2[ir][p1] * nlm_1[p2] - * (ucell.atoms[T0].ncpp.d_so(0, p2, p1).real() - - ucell.atoms[T0].ncpp.d_so(3, p2, p1).real()) - * 0.5; - } - } - } - } - } - else if (nspin == 1 || nspin == 2) - { - // const Atom* atom0 = &ucell.atoms[T0]; - double nlm[3] = {0, 0, 0}; - const double* nlm_1 = nlm_tot[iat][key2][iw2_all][0].data(); - const double* nlm_2[3]; - for (int i = 0; i < 3; i++) - { - nlm_2[i] = nlm_tot[iat][key1][iw1_all][i + 1].data(); - } - - const double* tmp_d = nullptr; - for (int no = 0; no < ucell.atoms[T0].ncpp.non_zero_count_soc[0]; no++) - { - const int p1 = ucell.atoms[T0].ncpp.index1_soc[0][no]; - const int p2 = ucell.atoms[T0].ncpp.index2_soc[0][no]; - ucell.atoms[T0].ncpp.get_d(0, p1, p2, tmp_d); - for (int ir = 0; ir < 3; ir++) - { - nlm[ir] += nlm_2[ir][p2] * nlm_1[p1] * (*tmp_d); - } - } - - double nlm1[3] = {0, 0, 0}; - if (isstress) - { - const double* nlm_1 = nlm_tot[iat][key1][iw1_all][0].data(); - const double* nlm_2[3]; - for (int i = 0; i < 3; i++) - { - nlm_2[i] = nlm_tot[iat][key2][iw2_all][i + 1].data(); - } - - const double* tmp_d = nullptr; - for (int no = 0; no < ucell.atoms[T0].ncpp.non_zero_count_soc[0]; no++) - { - const int p1 = ucell.atoms[T0].ncpp.index1_soc[0][no]; - const int p2 = ucell.atoms[T0].ncpp.index2_soc[0][no]; - ucell.atoms[T0].ncpp.get_d(0, p1, p2, tmp_d); - for (int ir = 0; ir < 3; ir++) - { - nlm1[ir] += nlm_2[ir][p1] * nlm_1[p2] * (*tmp_d); - } - } - } - - /// only one projector for each atom force, but another projector for stress - force_updated = true; - // get DMR - double dm2d1 = 0.0; - for (int is = 0; is < nspin_DMR; ++is) - { - dm2d1 += tmp_matrix_ptr[is][nnr_inner]; - } - double dm2d2 = 2.0 * dm2d1; - // - for (int jpol = 0; jpol < 3; jpol++) - { - if (isforce) - { - adj_fvnl_dbeta[ad0 * 3 + jpol] -= dm2d2 * nlm[jpol]; - } - if (isstress) - { - for (int ipol = jpol; ipol < 3; ipol++) - { - local_svnl_dbeta(jpol, ipol) - += dm2d1 * (nlm[jpol] * r1[ipol] + nlm1[jpol] * r0[ipol]); - } - } - } - } - else - { - ModuleBase::WARNING_QUIT("Force_LCAO_k::cal_fvnl_dbeta_k", - "nspin must be 1, 2 or 4"); - } - //} - nnr_inner++; - } // k - } // j - if (nspin == 4) - { - force_updated = true; - if (isforce) - { - for (int ir = 0; ir < dhsize; ir++) - { - adj_fvnl_dbeta[ad0 * 3 + 0] -= 2.0 * tmp_matrix_ptr[0][ir] * dhvnl[0][ir]; - adj_fvnl_dbeta[ad0 * 3 + 1] -= 2.0 * tmp_matrix_ptr[0][ir] * dhvnl[1][ir]; - adj_fvnl_dbeta[ad0 * 3 + 2] -= 2.0 * tmp_matrix_ptr[0][ir] * dhvnl[2][ir]; - } - } - if (isstress) - { - for (int ir = 0; ir < dhsize; ir++) - { - for (int jpol = 0; jpol < 3; jpol++) - { - if (isstress) - { - for (int ipol = jpol; ipol < 3; ipol++) - { - local_svnl_dbeta(jpol, ipol) - += tmp_matrix_ptr[0][ir] - * (dhvnl[jpol][ir] * r1[ipol] + dhvnl1[jpol][ir] * r0[ipol]); - } - } - } - } - } - } - } // ad0 - - // outer circle : accumulate nnr - for (int j = 0; j < atom1->nw * npol; j++) - { - const int j0 = j / npol; // added by zhengdy-soc - const int iw1_all = start1 + j; - const int mu = pv.global2local_row(iw1_all); - if (mu < 0) - { - continue; - } - - // fix a serious bug: atom2[T2] -> atom2 - // mohan 2010-12-20 - for (int k = 0; k < atom2->nw * npol; k++) - { - const int k0 = k / npol; - const int iw2_all = start2 + k; - const int nu = pv.global2local_col(iw2_all); - if (nu < 0) - { - continue; - } - total_nnr++; - nnr++; - } - } - iat_recorded = true; - } // is_adj - } // ad2 - - // sum the diff to fvnl_dbeta - if (force_updated && isforce) - { -#ifdef _OPENMP - if (num_threads > 1) - { - for (int ad0 = 0; ad0 < adjs.adj_num + 1; ++ad0) - { -#pragma omp atomic - fvnl_dbeta(adj_iat[ad0], 0) += adj_fvnl_dbeta[ad0 * 3 + 0]; -#pragma omp atomic - fvnl_dbeta(adj_iat[ad0], 1) += adj_fvnl_dbeta[ad0 * 3 + 1]; -#pragma omp atomic - fvnl_dbeta(adj_iat[ad0], 2) += adj_fvnl_dbeta[ad0 * 3 + 2]; - } - } - else -#endif - { - for (int ad0 = 0; ad0 < adjs.adj_num + 1; ++ad0) - { - fvnl_dbeta(adj_iat[ad0], 0) += adj_fvnl_dbeta[ad0 * 3 + 0]; - fvnl_dbeta(adj_iat[ad0], 1) += adj_fvnl_dbeta[ad0 * 3 + 1]; - fvnl_dbeta(adj_iat[ad0], 2) += adj_fvnl_dbeta[ad0 * 3 + 2]; - } - } - } - } // I1 - } // T1 - -#ifdef _OPENMP - if (isstress) - { -#pragma omp critical(cal_fvnl_dbeta_k_new_reduce) - { - for (int l = 0; l < 3; l++) - { - for (int m = 0; m < 3; m++) - { - svnl_dbeta(l, m) += local_svnl_dbeta(l, m); - } - } - } - } - } -#endif - - assert(total_nnr == pv.nnr); - - if (isstress) - { - StressTools::stress_fill(ucell.lat0, ucell.omega, svnl_dbeta); - } - - ModuleBase::timer::tick("Force_LCAO", "cal_fvnl_dbeta"); - return; -} diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_force_stress.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_force_stress.hpp new file mode 100644 index 0000000000..b6cbe96786 --- /dev/null +++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_force_stress.hpp @@ -0,0 +1,442 @@ +#pragma once +#include "nonlocal_new.h" +#include "module_base/parallel_reduce.h" +#include "module_base/timer.h" + +namespace hamilt +{ + +template +void NonlocalNew>::cal_force_stress(const bool cal_force, + const bool cal_stress, + const HContainer* dmR, + ModuleBase::matrix& force, + ModuleBase::matrix& stress) +{ + ModuleBase::TITLE("NonlocalNew", "cal_force_stress"); + + // begin the calculation of force and stress + ModuleBase::timer::tick("NonlocalNew", "cal_force_stress"); + + const Parallel_Orbitals* paraV = dmR->get_paraV(); + const int npol = this->ucell->get_npol(); + std::vector stress_tmp(6, 0); + if (cal_force) + { + force.zero_out(); + } + // 1. calculate for each pair of atoms + // loop over all on-site atoms + int atom_index = 0; + for (int iat0 = 0; iat0 < this->ucell->nat; iat0++) + { + // skip the atoms without plus-U + auto tau0 = ucell->get_tau(iat0); + int I0 = 0; + ucell->iat2iait(iat0, &I0, &this->current_type); + + // first step: find the adjacent atoms and filter the real adjacent atoms + AdjacentAtomInfo adjs; + this->gridD->Find_atom(*ucell, tau0, this->current_type, I0, &adjs); + + std::vector is_adj(adjs.adj_num + 1, false); + for (int ad = 0; ad < adjs.adj_num + 1; ++ad) + { + const int T1 = adjs.ntype[ad]; + const int I1 = adjs.natom[ad]; + const int iat1 = ucell->itia2iat(T1, I1); + const ModuleBase::Vector3& R_index1 = adjs.box[ad]; + // choose the real adjacent atoms + // Note: the distance of atoms should less than the cutoff radius, + // When equal, the theoretical value of matrix element is zero, + // but the calculated value is not zero due to the numerical error, which would lead to result changes. + if (this->ucell->cal_dtau(iat0, iat1, R_index1).norm() * this->ucell->lat0 + < orb_cutoff_[T1] + this->ucell->infoNL.Beta[this->current_type].get_rcut_max()) + { + is_adj[ad] = true; + } + } + filter_adjs(is_adj, adjs); + + std::vector>> nlm_iat0(adjs.adj_num + 1); + for (int ad = 0; ad < adjs.adj_num + 1; ++ad) + { + const int T1 = adjs.ntype[ad]; + const int I1 = adjs.natom[ad]; + const int iat1 = ucell->itia2iat(T1, I1); + const ModuleBase::Vector3& tau1 = adjs.adjacent_tau[ad]; + const Atom* atom1 = &ucell->atoms[T1]; + + auto all_indexes = paraV->get_indexes_row(iat1); + auto col_indexes = paraV->get_indexes_col(iat1); + // insert col_indexes into all_indexes to get universal set with no repeat elements + all_indexes.insert(all_indexes.end(), col_indexes.begin(), col_indexes.end()); + std::sort(all_indexes.begin(), all_indexes.end()); + all_indexes.erase(std::unique(all_indexes.begin(), all_indexes.end()), all_indexes.end()); + for (int iw1l = 0; iw1l < all_indexes.size(); iw1l += npol) + { + const int iw1 = all_indexes[iw1l] / npol; + std::vector> nlm; + // nlm is a vector of vectors, but size of outer vector is only 1 here + // If we are calculating force, we need also to store the gradient + // and size of outer vector is then 4 + // inner loop : all projectors (L0,M0) + int L1 = atom1->iw2l[iw1]; + int N1 = atom1->iw2n[iw1]; + int m1 = atom1->iw2m[iw1]; + + // convert m (0,1,...2l) to M (-l, -l+1, ..., l-1, l) + int M1 = (m1 % 2 == 0) ? -m1 / 2 : (m1 + 1) / 2; + + ModuleBase::Vector3 dtau = tau0 - tau1; + intor_->snap(T1, L1, N1, M1, this->current_type, dtau * this->ucell->lat0, true /*cal_deri*/, nlm); + // select the elements of nlm with target_L + const int length = nlm[0].size(); + std::vector nlm_target(length * 4); + // rearrange the nlm_target to store the gradient + for(int index =0;index < length; index++) + { + for (int n = 0; n < 4; n++) // value, deri_x, deri_y, deri_z + { + nlm_target[index + n * length] = nlm[n][index]; + } + } + nlm_iat0[ad].insert({all_indexes[iw1l], nlm_target}); + } + } + + // second iteration to calculate force and stress + for (int ad1 = 0; ad1 < adjs.adj_num + 1; ++ad1) + { + const int T1 = adjs.ntype[ad1]; + const int I1 = adjs.natom[ad1]; + const int iat1 = ucell->itia2iat(T1, I1); + double* force_tmp1 = (cal_force) ? &force(iat1, 0) : nullptr; + double* force_tmp2 = (cal_force) ? &force(iat0, 0) : nullptr; + ModuleBase::Vector3& R_index1 = adjs.box[ad1]; + ModuleBase::Vector3 dis1 = adjs.adjacent_tau[ad1] - tau0; + for (int ad2 = 0; ad2 < adjs.adj_num + 1; ++ad2) + { + const int T2 = adjs.ntype[ad2]; + const int I2 = adjs.natom[ad2]; + const int iat2 = ucell->itia2iat(T2, I2); + ModuleBase::Vector3& R_index2 = adjs.box[ad2]; + ModuleBase::Vector3 dis2 = adjs.adjacent_tau[ad2] - tau0; + ModuleBase::Vector3 R_vector(R_index2[0] - R_index1[0], + R_index2[1] - R_index1[1], + R_index2[2] - R_index1[2]); + const hamilt::BaseMatrix* tmp = dmR->find_matrix(iat1, iat2, R_vector[0], R_vector[1], R_vector[2]); + int row_size = paraV->get_row_size(); + int col_size = paraV->get_col_size(); + if(row_size == 0 || col_size == 0) + { + continue; + } + // if not found , skip this pair of atoms + if (tmp != nullptr) + { + // calculate force + if (cal_force) { + this->cal_force_IJR(iat1, + iat2, + paraV, + nlm_iat0[ad1], + nlm_iat0[ad2], + tmp, + force_tmp1, + force_tmp2); + } + + // calculate stress + if (cal_stress) { + this->cal_stress_IJR(iat1, + iat2, + paraV, + nlm_iat0[ad1], + nlm_iat0[ad2], + tmp, + dis1, + dis2, + stress_tmp.data()); + } + } + } + } + } + + if (cal_force) + { +#ifdef __MPI + // sum up the occupation matrix + Parallel_Reduce::reduce_all(force.c, force.nr * force.nc); +#endif + for (int i = 0; i < force.nr * force.nc; i++) + { + force.c[i] *= 2.0; + } + } + + // stress renormalization + if (cal_stress) + { +#ifdef __MPI + // sum up the occupation matrix + Parallel_Reduce::reduce_all(stress_tmp.data(), 6); +#endif + const double weight = this->ucell->lat0 / this->ucell->omega; + for (int i = 0; i < 6; i++) + { + stress.c[i] = stress_tmp[i] * weight; + } + stress.c[8] = stress.c[5]; // stress(2,2) + stress.c[7] = stress.c[4]; // stress(2,1) + stress.c[6] = stress.c[2]; // stress(2,0) + stress.c[5] = stress.c[4]; // stress(1,2) + stress.c[4] = stress.c[3]; // stress(1,1) + stress.c[3] = stress.c[1]; // stress(1,0) + } + + ModuleBase::timer::tick("NonlocalNew", "cal_force_stress"); +} + +template <> +void NonlocalNew, std::complex>>::cal_force_IJR(const int& iat1, + const int& iat2, + const Parallel_Orbitals* paraV, + const std::unordered_map>& nlm1_all, + const std::unordered_map>& nlm2_all, + const hamilt::BaseMatrix>* dmR_pointer, + double* force1, + double* force2) +{ + // npol is the number of polarizations, + // 1 for non-magnetic (one Hamiltonian matrix only has spin-up or spin-down), + // 2 for magnetic (one Hamiltonian matrix has both spin-up and spin-down) + const int npol = this->ucell->get_npol(); + // --------------------------------------------- + // calculate the Nonlocal matrix for each pair of orbitals + // --------------------------------------------- + auto row_indexes = paraV->get_indexes_row(iat1); + auto col_indexes = paraV->get_indexes_col(iat2); + // step_trace = 0 for NSPIN=2; ={0, 1, local_col, local_col+1} for NSPIN=4 + std::vector step_trace(npol * npol, 0); + if (npol == 2) { + step_trace[1] = 1; + step_trace[2] = col_indexes.size(); + step_trace[3] = col_indexes.size() + 1; + } + // calculate the local matrix + const std::complex* tmp_d = nullptr; + const std::complex* dm_pointer = dmR_pointer->get_pointer(); + for (int iw1l = 0; iw1l < row_indexes.size(); iw1l += npol) + { + const std::vector& nlm1 = nlm1_all.find(row_indexes[iw1l])->second; + const int length = nlm1.size() / 4; + for (int iw2l = 0; iw2l < col_indexes.size(); iw2l += npol) + { + const std::vector& nlm2 = nlm2_all.find(col_indexes[iw2l])->second; +#ifdef __DEBUG + assert(nlm1.size() == nlm2.size()); +#endif + std::vector> nlm_tmp(12, ModuleBase::ZERO); + for (int is = 0; is < 4; ++is) + { + for (int no = 0; no < this->ucell->atoms[this->current_type].ncpp.non_zero_count_soc[is]; no++) + { + const int p1 = this->ucell->atoms[this->current_type].ncpp.index1_soc[is][no]; + const int p2 = this->ucell->atoms[this->current_type].ncpp.index2_soc[is][no]; + this->ucell->atoms[this->current_type].ncpp.get_d(is, p1, p2, tmp_d); + nlm_tmp[is*3] += nlm1[p1 + length] * nlm2[p2] * (*tmp_d); + nlm_tmp[is*3+1] += nlm1[p1 + length * 2] * nlm2[p2] * (*tmp_d); + nlm_tmp[is*3+2] += nlm1[p1 + length * 3] * nlm2[p2] * (*tmp_d); + } + } + // calculate the force, transfer nlm_tmp to pauli matrix + for(int i = 0; i < 3; i++) + { + double tmp = (dm_pointer[step_trace[0]] * nlm_tmp[i] + + dm_pointer[step_trace[1]] * nlm_tmp[i+3] + + dm_pointer[step_trace[2]] * nlm_tmp[i+6] + + dm_pointer[step_trace[3]] * nlm_tmp[i+9]).real(); + force1[i] += tmp; + force2[i] -= tmp; + } + dm_pointer += npol; + } + dm_pointer += (npol - 1) * col_indexes.size(); + } +} + +template <> +void NonlocalNew, std::complex>>::cal_stress_IJR(const int& iat1, + const int& iat2, + const Parallel_Orbitals* paraV, + const std::unordered_map>& nlm1_all, + const std::unordered_map>& nlm2_all, + const hamilt::BaseMatrix>* dmR_pointer, + const ModuleBase::Vector3& dis1, + const ModuleBase::Vector3& dis2, + double* stress) +{ + // npol is the number of polarizations, + // 1 for non-magnetic (one Hamiltonian matrix only has spin-up or spin-down), + // 2 for magnetic (one Hamiltonian matrix has both spin-up and spin-down) + const int npol = this->ucell->get_npol(); + const int npol2 = npol * npol; + // --------------------------------------------- + // calculate the Nonlocal matrix for each pair of orbitals + // --------------------------------------------- + auto row_indexes = paraV->get_indexes_row(iat1); + auto col_indexes = paraV->get_indexes_col(iat2); + // step_trace = 0 for NSPIN=2; ={0, 1, local_col, local_col+1} for NSPIN=4 + std::vector step_trace(npol2, 0); + if (npol == 2) { + step_trace[1] = 1; + step_trace[2] = col_indexes.size(); + step_trace[3] = col_indexes.size() + 1; + } + // calculate the local matrix + const std::complex* tmp_d = nullptr; + const std::complex* dm_pointer = dmR_pointer->get_pointer(); + for (int iw1l = 0; iw1l < row_indexes.size(); iw1l += npol) + { + const std::vector& nlm1 = nlm1_all.find(row_indexes[iw1l])->second; + const int length = nlm1.size() / npol2; + for (int iw2l = 0; iw2l < col_indexes.size(); iw2l += npol) + { + const std::vector& nlm2 = nlm2_all.find(col_indexes[iw2l])->second; +#ifdef __DEBUG + assert(nlm1.size() == nlm2.size()); +#endif + std::vector> nlm_tmp(npol2 * 6, ModuleBase::ZERO); + for (int is = 0; is < 4; ++is) + { + for (int no = 0; no < this->ucell->atoms[this->current_type].ncpp.non_zero_count_soc[is]; no++) + { + const int p1 = this->ucell->atoms[this->current_type].ncpp.index1_soc[is][no]; + const int p2 = this->ucell->atoms[this->current_type].ncpp.index2_soc[is][no]; + this->ucell->atoms[this->current_type].ncpp.get_d(is, p1, p2, tmp_d); + nlm_tmp[is*6] += (nlm1[p1 + length] * dis1.x * nlm2[p2] + nlm1[p1] * nlm2[p2 + length] * dis2.x) * (*tmp_d); + nlm_tmp[is*6+1] += (nlm1[p1 + length] * dis1.y * nlm2[p2] + nlm1[p1] * nlm2[p2 + length] * dis2.y) * (*tmp_d); + nlm_tmp[is*6+2] += (nlm1[p1 + length] * dis1.z * nlm2[p2] + nlm1[p1] * nlm2[p2 + length] * dis2.z) * (*tmp_d); + nlm_tmp[is*6+3] += (nlm1[p1 + length * 2] * dis1.y * nlm2[p2] + nlm1[p1] * nlm2[p2 + length * 2] * dis2.y) * (*tmp_d); + nlm_tmp[is*6+4] += (nlm1[p1 + length * 2] * dis1.z * nlm2[p2] + nlm1[p1] * nlm2[p2 + length * 2] * dis2.z) * (*tmp_d); + nlm_tmp[is*6+5] += (nlm1[p1 + length * 3] * dis1.z * nlm2[p2] + nlm1[p1] * nlm2[p2 + length * 3] * dis2.z) * (*tmp_d); + } + } + // calculate the force, transfer nlm_tmp to pauli matrix + for(int i = 0; i < 6; i++) + { + stress[i] += (dm_pointer[step_trace[0]] * nlm_tmp[i] + + dm_pointer[step_trace[1]] * nlm_tmp[i+6] + + dm_pointer[step_trace[2]] * nlm_tmp[i+12] + + dm_pointer[step_trace[3]] * nlm_tmp[i+18]).real(); + } + dm_pointer += npol; + } + dm_pointer += (npol - 1) * col_indexes.size(); + } +} + +template +void NonlocalNew>::cal_force_IJR(const int& iat1, + const int& iat2, + const Parallel_Orbitals* paraV, + const std::unordered_map>& nlm1_all, + const std::unordered_map>& nlm2_all, + const hamilt::BaseMatrix* dmR_pointer, + double* force1, + double* force2) +{ + // --------------------------------------------- + // calculate the Nonlocal matrix for each pair of orbitals + // --------------------------------------------- + auto row_indexes = paraV->get_indexes_row(iat1); + auto col_indexes = paraV->get_indexes_col(iat2); + // calculate the local matrix + const double* tmp_d = nullptr; + const double* dm_pointer = dmR_pointer->get_pointer(); + for (int iw1l = 0; iw1l < row_indexes.size(); iw1l++) + { + const std::vector& nlm1 = nlm1_all.find(row_indexes[iw1l])->second; + const int length = nlm1.size() / 4; + for (int iw2l = 0; iw2l < col_indexes.size(); iw2l++) + { + const std::vector& nlm2 = nlm2_all.find(col_indexes[iw2l])->second; +#ifdef __DEBUG + assert(nlm1.size() == nlm2.size()); +#endif + std::vector nlm_tmp(3, 0.0); + for (int no = 0; no < this->ucell->atoms[this->current_type].ncpp.non_zero_count_soc[0]; no++) + { + const int p1 = this->ucell->atoms[this->current_type].ncpp.index1_soc[0][no]; + const int p2 = this->ucell->atoms[this->current_type].ncpp.index2_soc[0][no]; + this->ucell->atoms[this->current_type].ncpp.get_d(0, p1, p2, tmp_d); + nlm_tmp[0] += nlm1[p1 + length] * nlm2[p2] * (*tmp_d); + nlm_tmp[1] += nlm1[p1 + length * 2] * nlm2[p2] * (*tmp_d); + nlm_tmp[2] += nlm1[p1 + length * 3] * nlm2[p2] * (*tmp_d); + } + // calculate the force, transfer nlm_tmp to pauli matrix + for(int i = 0; i < 3; i++) + { + force1[i] += dm_pointer[0] * nlm_tmp[i]; + force2[i] -= dm_pointer[0] * nlm_tmp[i]; + } + dm_pointer++; + } + } +} + +template +void NonlocalNew>::cal_stress_IJR(const int& iat1, + const int& iat2, + const Parallel_Orbitals* paraV, + const std::unordered_map>& nlm1_all, + const std::unordered_map>& nlm2_all, + const hamilt::BaseMatrix* dmR_pointer, + const ModuleBase::Vector3& dis1, + const ModuleBase::Vector3& dis2, + double* stress) +{ + // --------------------------------------------- + // calculate the Nonlocal matrix for each pair of orbitals + // --------------------------------------------- + auto row_indexes = paraV->get_indexes_row(iat1); + auto col_indexes = paraV->get_indexes_col(iat2); + // calculate the local matrix + const double* tmp_d = nullptr; + const double* dm_pointer = dmR_pointer->get_pointer(); + for (int iw1l = 0; iw1l < row_indexes.size(); iw1l++) + { + const std::vector& nlm1 = nlm1_all.find(row_indexes[iw1l])->second; + const int length = nlm1.size() / 4; + for (int iw2l = 0; iw2l < col_indexes.size(); iw2l++) + { + const std::vector& nlm2 = nlm2_all.find(col_indexes[iw2l])->second; +#ifdef __DEBUG + assert(nlm1.size() == nlm2.size()); +#endif + std::vector nlm_tmp(6, 0.0); + for (int no = 0; no < this->ucell->atoms[this->current_type].ncpp.non_zero_count_soc[0]; no++) + { + const int p1 = this->ucell->atoms[this->current_type].ncpp.index1_soc[0][no]; + const int p2 = this->ucell->atoms[this->current_type].ncpp.index2_soc[0][no]; + this->ucell->atoms[this->current_type].ncpp.get_d(0, p1, p2, tmp_d); + nlm_tmp[0] += (nlm1[p1 + length] * dis1.x * nlm2[p2] + nlm1[p1] * nlm2[p2 + length] * dis2.x) * (*tmp_d); + nlm_tmp[1] += (nlm1[p1 + length] * dis1.y * nlm2[p2] + nlm1[p1] * nlm2[p2 + length] * dis2.y) * (*tmp_d); + nlm_tmp[2] += (nlm1[p1 + length] * dis1.z * nlm2[p2] + nlm1[p1] * nlm2[p2 + length] * dis2.z) * (*tmp_d); + nlm_tmp[3] += (nlm1[p1 + length * 2] * dis1.y * nlm2[p2] + nlm1[p1] * nlm2[p2 + length * 2] * dis2.y) * (*tmp_d); + nlm_tmp[4] += (nlm1[p1 + length * 2] * dis1.z * nlm2[p2] + nlm1[p1] * nlm2[p2 + length * 2] * dis2.z) * (*tmp_d); + nlm_tmp[5] += (nlm1[p1 + length * 3] * dis1.z * nlm2[p2] + nlm1[p1] * nlm2[p2 + length * 3] * dis2.z) * (*tmp_d); + } + // calculate the force, transfer nlm_tmp to pauli matrix + for(int i = 0; i < 6; i++) + { + stress[i] += dm_pointer[0] * nlm_tmp[i]; + } + dm_pointer++; + } + } +} + +} // namespace hamilt diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.cpp index 17aea64b3d..ea856eed4e 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.cpp @@ -22,11 +22,15 @@ hamilt::NonlocalNew>::NonlocalNew( { this->cal_type = calculation_type::lcao_fixed; this->ucell = ucell_in; + this->gridD = GridD_in; #ifdef __DEBUG assert(this->ucell != nullptr); #endif // initialize HR to allocate sparse Nonlocal matrix memory - this->initialize_HR(GridD_in); + if(hR_in != nullptr) + { + this->initialize_HR(GridD_in); + } } // destructor @@ -175,7 +179,7 @@ void hamilt::NonlocalNew>::calculate_HR() int M1 = (m1 % 2 == 0) ? -m1 / 2 : (m1 + 1) / 2; ModuleBase::Vector3 dtau = tau0 - tau1; - intor_->snap(T1, L1, N1, M1, T0, dtau * this->ucell->lat0, 0 /*cal_deri*/, nlm); + intor_->snap(T1, L1, N1, M1, T0, dtau * this->ucell->lat0, false /*cal_deri*/, nlm); nlm_tot[ad].insert({all_indexes[iw1l], nlm[0]}); } } @@ -319,6 +323,8 @@ void hamilt::NonlocalNew>::contributeHR() return; } +#include "nonlocal_force_stress.hpp" + template class hamilt::NonlocalNew>; template class hamilt::NonlocalNew, double>>; template class hamilt::NonlocalNew, std::complex>>; diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.h index 8206a28895..8816c36a42 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.h +++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.h @@ -54,6 +54,12 @@ class NonlocalNew> : public OperatorLCAO */ virtual void contributeHR() override; + void cal_force_stress(const bool cal_force, + const bool cal_stress, + const HContainer* dmR, + ModuleBase::matrix& force, + ModuleBase::matrix& stress); + virtual void set_HR_fixed(void*) override; private: @@ -95,6 +101,32 @@ class NonlocalNew> : public OperatorLCAO const std::unordered_map>& nlm2_all, TR* data_pointer); + Grid_Driver* gridD = nullptr; + int current_type = 0; + /** + * @brief calculate the atomic Force of atom pair + */ + void cal_force_IJR(const int& iat1, + const int& iat2, + const Parallel_Orbitals* paraV, + const std::unordered_map>& nlm1_all, + const std::unordered_map>& nlm2_all, + const hamilt::BaseMatrix* dmR_pointer, + double* force1, + double* force2); + /** + * @brief calculate the Stress of atom pair + */ + void cal_stress_IJR(const int& iat1, + const int& iat2, + const Parallel_Orbitals* paraV, + const std::unordered_map>& nlm1_all, + const std::unordered_map>& nlm2_all, + const hamilt::BaseMatrix* dmR_pointer, + const ModuleBase::Vector3& dis1, + const ModuleBase::Vector3& dis2, + double* stress); + std::vector adjs_all; }; diff --git a/source/module_lr/dm_trans/dmr_complex.cpp b/source/module_lr/dm_trans/dmr_complex.cpp index 7a400f3bcc..d5dc550bb1 100644 --- a/source/module_lr/dm_trans/dmr_complex.cpp +++ b/source/module_lr/dm_trans/dmr_complex.cpp @@ -5,7 +5,7 @@ namespace elecstate { template<> - void DensityMatrix, std::complex>::cal_DMR() + void DensityMatrix, std::complex>::cal_DMR(int ik_in) { ModuleBase::TITLE("DensityMatrix", "cal_DMR"); ModuleBase::timer::tick("DensityMatrix", "cal_DMR"); @@ -45,6 +45,7 @@ namespace elecstate if (PARAM.inp.nspin != 4) { for (int ik = 0; ik < this->_nk; ++ik) { + if (ik_in >= 0 && ik_in != ik) continue; // cal k_phase // if TK==std::complex, kphase is e^{ikR} const ModuleBase::Vector3 dR(r_index[0], r_index[1], r_index[2]); diff --git a/tests/deepks/602_NO_deepks_d_H2O_md_lda2pbe/result.ref b/tests/deepks/602_NO_deepks_d_H2O_md_lda2pbe/result.ref index df309753f6..c197e2c934 100644 --- a/tests/deepks/602_NO_deepks_d_H2O_md_lda2pbe/result.ref +++ b/tests/deepks/602_NO_deepks_d_H2O_md_lda2pbe/result.ref @@ -1,9 +1,9 @@ -etotref -465.998623450138 -etotperatomref -155.3328744834 -totalforceref 5.535421 -totalstressref 1.520071 +etotref -465.9986234579140 +etotperatomref -155.3328744860 +totalforceref 5.535106 +totalstressref 1.520003 totaldes 2.163703 -deepks_e_dm -57.88571808911911 -deepks_f_label 19.095622987658235 -deepks_s_label 19.250436793943024 -totaltimeref 19.73 +deepks_e_dm -57.885718058035934 +deepks_f_label 19.095598453044364 +deepks_s_label 19.250415261605152 +totaltimeref 12.68 diff --git a/tests/integrate/240_NO_KP_15_SO/INPUT b/tests/integrate/240_NO_KP_15_SO/INPUT index 68227114a3..b74396f89e 100644 --- a/tests/integrate/240_NO_KP_15_SO/INPUT +++ b/tests/integrate/240_NO_KP_15_SO/INPUT @@ -21,6 +21,8 @@ scf_nmax 100 #cal_stress 1 #noncolin 1 lspinorb 1 +cal_force 1 +cal_stress 1 #Parameters (LCAO) basis_type lcao diff --git a/tests/integrate/240_NO_KP_15_SO/KPT b/tests/integrate/240_NO_KP_15_SO/KPT index b5b3bdb1ae..28006d5e2d 100644 --- a/tests/integrate/240_NO_KP_15_SO/KPT +++ b/tests/integrate/240_NO_KP_15_SO/KPT @@ -1,4 +1,4 @@ K_POINTS 0 Gamma -1 1 2 0 0 0 +2 2 2 0 0 0 diff --git a/tests/integrate/240_NO_KP_15_SO/STRU b/tests/integrate/240_NO_KP_15_SO/STRU index 2b0d9ce20c..79211e7719 100644 --- a/tests/integrate/240_NO_KP_15_SO/STRU +++ b/tests/integrate/240_NO_KP_15_SO/STRU @@ -19,7 +19,7 @@ Direct //Cartesian or Direct coordinate. As 0 1 -0.2500000 0.2500000 0.25000000 0 0 0 +0.2600000 0.2400000 0.245000000 0 0 0 Ga //Element Label 0 diff --git a/tests/integrate/240_NO_KP_15_SO/result.ref b/tests/integrate/240_NO_KP_15_SO/result.ref index 3ca29bdb2b..f937ab8b7b 100644 --- a/tests/integrate/240_NO_KP_15_SO/result.ref +++ b/tests/integrate/240_NO_KP_15_SO/result.ref @@ -1,3 +1,5 @@ -etotref -1956.233240048506 -etotperatomref -978.1166200243 -totaltimeref 3.17 +etotref -1964.0663947982982336 +etotperatomref -982.0331973991 +totalforceref 0.162298 +totalstressref 1877.059021 +totaltimeref 2.87 diff --git a/tests/integrate/260_NO_DJ_PK_PU_SO/result.ref b/tests/integrate/260_NO_DJ_PK_PU_SO/result.ref index 8b8a5ca6f6..3c6533ae0f 100644 --- a/tests/integrate/260_NO_DJ_PK_PU_SO/result.ref +++ b/tests/integrate/260_NO_DJ_PK_PU_SO/result.ref @@ -1,5 +1,5 @@ -etotref -6789.2817503491551179 +etotref -6789.2817503491569369 etotperatomref -3394.6408751746 -totalforceref 11.319866 -totalstressref 4893.060045 -totaltimeref 5.40 +totalforceref 11.329526 +totalstressref 4894.902744 +totaltimeref 4.70 diff --git a/tests/integrate/308_NO_GO_CF_RE/result.ref b/tests/integrate/308_NO_GO_CF_RE/result.ref index c5a751c523..6925e496c2 100644 --- a/tests/integrate/308_NO_GO_CF_RE/result.ref +++ b/tests/integrate/308_NO_GO_CF_RE/result.ref @@ -1,8 +1,8 @@ -etotref -195.5641752854862148 +etotref -195.5641752854540130 etotperatomref -97.7820876427 -totalforceref 15.805668 +totalforceref 15.805546 totalstressref 1875.626439 pointgroupref C_2v spacegroupref D_2h nksibzref 1 -totaltimeref 3.56 +totaltimeref 0.69 diff --git a/tests/integrate/320_NO_GO_MD_NVT/result.ref b/tests/integrate/320_NO_GO_MD_NVT/result.ref index 06d27bae34..990d88c942 100644 --- a/tests/integrate/320_NO_GO_MD_NVT/result.ref +++ b/tests/integrate/320_NO_GO_MD_NVT/result.ref @@ -1,5 +1,5 @@ -etotref -196.5943381700034 -etotperatomref -98.2971690850 -totalforceref 4.029842 -totalstressref 1484.529510 -totaltimeref +1.7855 +etotref -196.5943379249982 +etotperatomref -98.2971689625 +totalforceref 4.029872 +totalstressref 1484.529901 +totaltimeref 1.56 diff --git a/tests/integrate/934_NO_Si2_dzp_neq_GPU/result.ref b/tests/integrate/934_NO_Si2_dzp_neq_GPU/result.ref index 66c8d04b01..1a9c186e92 100644 --- a/tests/integrate/934_NO_Si2_dzp_neq_GPU/result.ref +++ b/tests/integrate/934_NO_Si2_dzp_neq_GPU/result.ref @@ -1,6 +1,6 @@ -etotref -200.9596900406533280 +etotref -200.9596900406574775 etotperatomref -100.4798450203 -totalforceref 143.742152 +totalforceref 143.738992 totalstressref 18521.013103 pointgroupref C_1 spacegroupref S_2 diff --git a/tests/integrate/934_NO_Si2_dzp_neq_ns2_GPU/result.ref b/tests/integrate/934_NO_Si2_dzp_neq_ns2_GPU/result.ref index 46619fa8c3..b4a213a7f2 100644 --- a/tests/integrate/934_NO_Si2_dzp_neq_ns2_GPU/result.ref +++ b/tests/integrate/934_NO_Si2_dzp_neq_ns2_GPU/result.ref @@ -1,6 +1,6 @@ -etotref -200.9596872554600964 +etotref -200.9596872554638480 etotperatomref -100.4798436277 -totalforceref 143.742152 +totalforceref 143.738990 totalstressref 18521.013099 pointgroupref C_1 spacegroupref S_2 diff --git a/tests/integrate/934_NO_Si2_tzdp_neq_GPU/result.ref b/tests/integrate/934_NO_Si2_tzdp_neq_GPU/result.ref index cb23324273..4ba15bc604 100644 --- a/tests/integrate/934_NO_Si2_tzdp_neq_GPU/result.ref +++ b/tests/integrate/934_NO_Si2_tzdp_neq_GPU/result.ref @@ -1,6 +1,6 @@ -etotref -202.0113293338589529 +etotref -202.0113293338656604 etotperatomref -101.0056646669 -totalforceref 154.397572 +totalforceref 154.397454 totalstressref 8606.355128 pointgroupref C_1 spacegroupref S_2 diff --git a/tests/integrate/934_NO_Si2_tzdp_neq_ns2_GPU/result.ref b/tests/integrate/934_NO_Si2_tzdp_neq_ns2_GPU/result.ref index 2885572b5e..a0857f8416 100644 --- a/tests/integrate/934_NO_Si2_tzdp_neq_ns2_GPU/result.ref +++ b/tests/integrate/934_NO_Si2_tzdp_neq_ns2_GPU/result.ref @@ -1,6 +1,6 @@ -etotref -202.0113267424005130 +etotref -202.0113267424063963 etotperatomref -101.0056633712 -totalforceref 154.397574 +totalforceref 154.397456 totalstressref 8606.356131 pointgroupref C_1 spacegroupref S_2