diff --git a/source/module_lr/ao_to_mo_transformer/ao_to_mo.h b/source/module_lr/ao_to_mo_transformer/ao_to_mo.h index 79c6ee99b2..23d42271d8 100644 --- a/source/module_lr/ao_to_mo_transformer/ao_to_mo.h +++ b/source/module_lr/ao_to_mo_transformer/ao_to_mo.h @@ -7,7 +7,10 @@ #endif namespace LR { +#ifndef MO_TYPE_H +#define MO_TYPE_H enum MO_TYPE { OO, VO, VV }; +#endif template void ao_to_mo_forloop_serial( const std::vector& mat_ao, @@ -15,7 +18,7 @@ namespace LR const int& nocc, const int& nvirt, T* const mat_mo, - MO_TYPE type = VO); + const MO_TYPE type = VO); template void ao_to_mo_blas( const std::vector& mat_ao, @@ -24,7 +27,7 @@ namespace LR const int& nvirt, T* const mat_mo, const bool add_on = true, - MO_TYPE type = VO); + const MO_TYPE type = VO); #ifdef __MPI template void ao_to_mo_pblas( @@ -38,6 +41,6 @@ namespace LR const Parallel_2D& pmat_mo, T* const mat_mo, const bool add_on = true, - MO_TYPE type = VO); + const MO_TYPE type = VO); #endif } \ No newline at end of file diff --git a/source/module_lr/ao_to_mo_transformer/ao_to_mo_parallel.cpp b/source/module_lr/ao_to_mo_transformer/ao_to_mo_parallel.cpp index dbd0525e74..2cadd4ac0c 100644 --- a/source/module_lr/ao_to_mo_transformer/ao_to_mo_parallel.cpp +++ b/source/module_lr/ao_to_mo_transformer/ao_to_mo_parallel.cpp @@ -21,7 +21,7 @@ namespace LR const Parallel_2D& pmat_mo, double* mat_mo, const bool add_on, - MO_TYPE type) + const MO_TYPE type) { ModuleBase::TITLE("hamilt_lrtd", "ao_to_mo_pblas"); assert(pmat_ao.comm() == pcoeff.comm() && pmat_ao.comm() == pmat_mo.comm()); @@ -79,7 +79,7 @@ namespace LR const Parallel_2D& pmat_mo, std::complex* const mat_mo, const bool add_on, - MO_TYPE type) + const MO_TYPE type) { ModuleBase::TITLE("hamilt_lrtd", "cal_AX_plas"); assert(pmat_ao.comm() == pcoeff.comm() && pmat_ao.comm() == pmat_mo.comm()); diff --git a/source/module_lr/ao_to_mo_transformer/ao_to_mo_serial.cpp b/source/module_lr/ao_to_mo_transformer/ao_to_mo_serial.cpp index 82e5475f6c..04573381a7 100644 --- a/source/module_lr/ao_to_mo_transformer/ao_to_mo_serial.cpp +++ b/source/module_lr/ao_to_mo_transformer/ao_to_mo_serial.cpp @@ -11,7 +11,7 @@ namespace LR const int& nocc, const int& nvirt, double* mat_mo, - MO_TYPE type) + const MO_TYPE type) { ModuleBase::TITLE("hamilt_lrtd", "ao_to_mo_forloop_serial"); const int nks = mat_ao.size(); @@ -49,7 +49,7 @@ namespace LR const int& nocc, const int& nvirt, std::complex* const mat_mo, - MO_TYPE type) + const MO_TYPE type) { ModuleBase::TITLE("hamilt_lrtd", "ao_to_mo_forloop_serial"); const int nks = mat_ao.size(); @@ -88,7 +88,7 @@ namespace LR const int& nvirt, double* mat_mo, const bool add_on, - MO_TYPE type) + const MO_TYPE type) { ModuleBase::TITLE("hamilt_lrtd", "ao_to_mo_blas"); const int nks = mat_ao.size(); @@ -129,7 +129,7 @@ namespace LR const int& nvirt, std::complex* const mat_mo, const bool add_on, - MO_TYPE type) + const MO_TYPE type) { ModuleBase::TITLE("hamilt_lrtd", "ao_to_mo_blas"); const int nks = mat_ao.size(); diff --git a/source/module_lr/dm_trans/dm_trans.h b/source/module_lr/dm_trans/dm_trans.h index 77cb0870f1..a02f2999d0 100644 --- a/source/module_lr/dm_trans/dm_trans.h +++ b/source/module_lr/dm_trans/dm_trans.h @@ -8,7 +8,12 @@ #endif namespace LR { - // use templates in the future. + +#ifndef MO_TYPE_H +#define MO_TYPE_H + enum MO_TYPE { OO, VO, VV }; +#endif + #ifdef __MPI /// @brief calculate the 2d-block transition density matrix in AO basis using p?gemm /// \f[ \tilde{\rho}_{\mu_j\mu_b}=\sum_{jb}c_{j,\mu_j}X_{jb}c^*_{b,\mu_b} \f] @@ -22,8 +27,8 @@ namespace LR const int nocc, const int nvirt, const Parallel_2D& pmat, - const bool renorm_k = true, - const int nspin = 1); + const T factor = (T)1.0, + const MO_TYPE type = MO_TYPE::VO); #endif /// @brief calculate the 2d-block transition density matrix in AO basis using ?gemm @@ -32,8 +37,8 @@ namespace LR const T* const X_istate, const psi::Psi& c, const int& nocc, const int& nvirt, - const bool renorm_k = true, - const int nspin = 1); + const T factor = (T)1.0, + const MO_TYPE type = MO_TYPE::VO); // for test /// @brief calculate the 2d-block transition density matrix in AO basis using for loop (for test) @@ -42,6 +47,6 @@ namespace LR const T* const X_istate, const psi::Psi& c, const int& nocc, const int& nvirt, - const bool renorm_k = true, - const int nspin = 1); + const T factor = (T)1.0, + const MO_TYPE type = MO_TYPE::VO); } \ No newline at end of file diff --git a/source/module_lr/dm_trans/dm_trans_parallel.cpp b/source/module_lr/dm_trans/dm_trans_parallel.cpp index 701bd87621..861a8f8ceb 100644 --- a/source/module_lr/dm_trans/dm_trans_parallel.cpp +++ b/source/module_lr/dm_trans/dm_trans_parallel.cpp @@ -18,8 +18,8 @@ std::vector cal_dm_trans_pblas(const double* const X_istate, const int nocc, const int nvirt, const Parallel_2D& pmat, - const bool renorm_k, - const int nspin) + const double factor, + const MO_TYPE type) { ModuleBase::TITLE("hamilt_lrtd", "cal_dm_trans_pblas"); assert(px.comm() == pc.comm() && px.comm() == pmat.comm()); @@ -27,6 +27,12 @@ std::vector cal_dm_trans_pblas(const double* const X_istate, assert(pmat.get_local_size() > 0); const int nks = c.get_nk(); + const int i1 = 1; + const int ivirt = nocc + 1; + const int nmo1 = type == MO_TYPE::VV ? nvirt : nocc; + const int nmo2 = type == MO_TYPE::OO ? nocc : nvirt; + const int imo1 = type == MO_TYPE::VV ? ivirt : i1; + const int imo2 = type == MO_TYPE::OO ? i1 : ivirt; std::vector dm_trans(nks, container::Tensor(DAT::DT_DOUBLE, DEV::CpuDevice, { pmat.get_col_size(), pmat.get_row_size() })); @@ -34,8 +40,7 @@ std::vector cal_dm_trans_pblas(const double* const X_istate, { c.fix_k(isk); const int x_start = isk * px.get_local_size(); - int i1 = 1; - int ivirt = nocc + 1; + char transa = 'N'; char transb = 'T'; const double alpha = 1.0; @@ -43,19 +48,19 @@ std::vector cal_dm_trans_pblas(const double* const X_istate, // 1. [X*C_occ^T]^T=C_occ*X^T Parallel_2D pXc; // nvirt*naos - LR_Util::setup_2d_division(pXc, px.get_block_size(), naos, nvirt, px.blacs_ctxt); + LR_Util::setup_2d_division(pXc, px.get_block_size(), naos, nmo2, px.blacs_ctxt); container::Tensor Xc(DAT::DT_DOUBLE, DEV::CpuDevice, {pXc.get_col_size(), pXc.get_row_size()}); // row is "inside"(memory contiguity) for pblas Xc.zero(); - pdgemm_(&transa, &transb, &naos, &nvirt, &nocc, - &alpha, c.get_pointer(), &i1, &i1, pc.desc, + pdgemm_(&transa, &transb, &naos, &nmo2, &nmo1, + &alpha, c.get_pointer(), &i1, &imo1, pc.desc, X_istate + x_start, &i1, &i1, px.desc, &beta, Xc.data(), &i1, &i1, pXc.desc); // 2. C_virt*[X*C_occ^T] - pdgemm_(&transa, &transb, &naos, &naos, &nvirt, - &alpha, c.get_pointer(), &i1, &ivirt, pc.desc, + pdgemm_(&transa, &transb, &naos, &naos, &nmo2, + &factor, c.get_pointer(), &i1, &imo2, pc.desc, Xc.data(), &i1, &i1, pXc.desc, &beta, dm_trans[isk].data(), &i1, &i1, pmat.desc); } @@ -70,14 +75,20 @@ std::vector cal_dm_trans_pblas(const std::complex* co const int nocc, const int nvirt, const Parallel_2D& pmat, - const bool renorm_k, - const int nspin) + const std::complex factor, + const MO_TYPE type) { ModuleBase::TITLE("hamilt_lrtd", "cal_dm_trans_pblas"); assert(px.comm() == pc.comm() && px.comm() == pmat.comm()); assert(px.blacs_ctxt == pc.blacs_ctxt && px.blacs_ctxt == pmat.blacs_ctxt); assert(pmat.get_local_size() > 0); const int nks = c.get_nk(); + const int i1 = 1; + const int ivirt = nocc + 1; + const int nmo1 = type == MO_TYPE::VV ? nvirt : nocc; + const int nmo2 = type == MO_TYPE::OO ? nocc : nvirt; + const int imo1 = type == MO_TYPE::VV ? ivirt : i1; + const int imo2 = type == MO_TYPE::OO ? i1 : ivirt; std::vector dm_trans(nks, container::Tensor(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, {pmat.get_col_size(), pmat.get_row_size()})); @@ -85,8 +96,6 @@ std::vector cal_dm_trans_pblas(const std::complex* co { c.fix_k(isk); const int x_start = isk * px.get_local_size(); - int i1 = 1; - int ivirt = nocc + 1; // ============== C_virt * X * C_occ^\dagger============= // char transa = 'N'; @@ -114,24 +123,23 @@ std::vector cal_dm_trans_pblas(const std::complex* co char transa = 'N'; char transb = 'C'; Parallel_2D pXc; - LR_Util::setup_2d_division(pXc, px.get_block_size(), nvirt, naos, px.blacs_ctxt); + LR_Util::setup_2d_division(pXc, px.get_block_size(), nmo2, naos, px.blacs_ctxt); container::Tensor Xc(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, {pXc.get_col_size(), pXc.get_row_size()}); // row is "inside"(memory contiguity) for pblas Xc.zero(); - std::complex alpha(1.0, 0.0); + const std::complex alpha(1.0, 0.0); const std::complex beta(0.0, 0.0); - pzgemm_(&transa, &transb, &nvirt, &naos, &nocc, &alpha, + pzgemm_(&transa, &transb, &nmo2, &naos, &nmo1, &alpha, X_istate + x_start, &i1, &i1, px.desc, - c.get_pointer(), &i1, &i1, pc.desc, + c.get_pointer(), &i1, &imo1, pc.desc, &beta, Xc.data>(), &i1, &i1, pXc.desc); // 2. [X*C_occ^\dagger]^TC_virt^T - alpha.real(renorm_k ? 1.0 / static_cast(nks) : 1.0); transa = transb = 'T'; - pzgemm_(&transa, &transb, &naos, &naos, &nvirt, - &alpha, Xc.data>(), &i1, &i1, pXc.desc, - c.get_pointer(), &i1, &ivirt, pc.desc, + pzgemm_(&transa, &transb, &naos, &naos, &nmo2, + &factor, Xc.data>(), &i1, &i1, pXc.desc, + c.get_pointer(), &i1, &imo2, pc.desc, &beta, dm_trans[isk].data>(), &i1, &i1, pmat.desc); } return dm_trans; diff --git a/source/module_lr/dm_trans/dm_trans_serial.cpp b/source/module_lr/dm_trans/dm_trans_serial.cpp index 2a098c08c4..cf3571bfc3 100644 --- a/source/module_lr/dm_trans/dm_trans_serial.cpp +++ b/source/module_lr/dm_trans/dm_trans_serial.cpp @@ -10,12 +10,17 @@ namespace LR const psi::Psi& c, const int& nocc, const int& nvirt, - const bool renorm_k, - const int nspin) + const double factor, + const MO_TYPE type) { // cxc_out_test(X_istate, c); ModuleBase::TITLE("hamilt_lrtd", "cal_dm_trans_forloop"); const int nks = c.get_nk(); + const int imo1 = type == MO_TYPE::VV ? nocc : 0; + const int imo2 = type == MO_TYPE::OO ? 0 : nocc; + const int nmo1 = type == MO_TYPE::VV ? nvirt : nocc; + const int nmo2 = type == MO_TYPE::OO ? nocc : nvirt; + const int naos = c.get_nbasis(); std::vector dm_trans(nks, container::Tensor(DAT::DT_DOUBLE, DEV::CpuDevice, { naos, naos })); for (auto& dm : dm_trans)ModuleBase::GlobalFunc::ZEROS(dm.data(), naos * naos); @@ -23,16 +28,16 @@ namespace LR for (size_t isk = 0;isk < nks;++isk) { c.fix_k(isk); - const int x_start = isk * nocc * nvirt; + const int x_start = isk * nmo1 * nmo2; for (size_t mu = 0;mu < naos;++mu) { for (size_t nu = 0;nu < naos;++nu) { // loop for ks states - for (size_t j = 0;j < nocc;++j) + for (size_t p = 0;p < nmo1;++p) { - for (size_t b = 0; b < nvirt;++b) - dm_trans[isk].data()[mu * naos + nu] += c(j, mu) * X_istate[x_start + j * nvirt + b] * c(nocc + b, nu); + for (size_t q = 0; q < nmo2;++q) + dm_trans[isk].data()[mu * naos + nu] += c(imo1 + p, mu) * X_istate[x_start + p * nmo2 + q] * c(imo2 + q, nu) * factor; } } } @@ -45,30 +50,34 @@ namespace LR const psi::Psi>& c, const int& nocc, const int& nvirt, - const bool renorm_k, - const int nspin) + const std::complex factor, + const MO_TYPE type) { // cxc_out_test(X_istate, c); ModuleBase::TITLE("hamilt_lrtd", "cal_dm_trans_forloop"); const int nks = c.get_nk(); - int naos = c.get_nbasis(); + const int naos = c.get_nbasis(); + const int imo1 = type == MO_TYPE::VV ? nocc : 0; + const int imo2 = type == MO_TYPE::OO ? 0 : nocc; + const int nmo1 = type == MO_TYPE::VV ? nvirt : nocc; + const int nmo2 = type == MO_TYPE::OO ? nocc : nvirt; std::vector dm_trans(nks, container::Tensor(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, { naos, naos })); for (auto& dm : dm_trans)ModuleBase::GlobalFunc::ZEROS(dm.data>(), naos * naos); // loop for AOs for (size_t isk = 0;isk < nks;++isk) { c.fix_k(isk); - const int x_start = isk * nocc * nvirt; + const int x_start = isk * nmo1 * nmo2; for (size_t mu = 0;mu < naos;++mu) { for (size_t nu = 0;nu < naos;++nu) { // loop for ks states - for (size_t j = 0;j < nocc;++j) + for (size_t p = 0;p < nmo1;++p) { - for (size_t b = 0; b < nvirt;++b) + for (size_t q = 0; q < nmo2;++q) dm_trans[isk].data>()[nu * naos + mu] += - std::conj(c(j, mu)) * X_istate[x_start + j * nvirt + b] * c(nocc + b, nu) / static_cast(nks / nspin); + std::conj(c(imo1 + p, mu)) * X_istate[x_start + p * nmo2 + q] * c(imo2 + q, nu) * factor; } } } @@ -82,29 +91,33 @@ namespace LR const psi::Psi& c, const int& nocc, const int& nvirt, - const bool renorm_k, - const int nspin) + const double factor, + const MO_TYPE type) { ModuleBase::TITLE("hamilt_lrtd", "cal_dm_trans_blas"); const int nks = c.get_nk(); - int naos = c.get_nbasis(); + const int naos = c.get_nbasis(); + const int imo1 = type == MO_TYPE::VV ? nocc : 0; + const int imo2 = type == MO_TYPE::OO ? 0 : nocc; + const int nmo1 = type == MO_TYPE::VV ? nvirt : nocc; + const int nmo2 = type == MO_TYPE::OO ? nocc : nvirt; std::vector dm_trans(nks, container::Tensor(DAT::DT_DOUBLE, DEV::CpuDevice, { naos, naos })); for (size_t isk = 0;isk < nks;++isk) { c.fix_k(isk); - const int x_start = isk * nocc * nvirt; + const int x_start = isk * nmo1 * nmo2; // 1. [X*C_occ^T]^T=C_occ*X^T char transa = 'N'; char transb = 'T'; const double alpha = 1.0; const double beta = 0.0; - container::Tensor Xc(DAT::DT_DOUBLE, DEV::CpuDevice, { nvirt, naos }); - dgemm_(&transa, &transb, &naos, &nvirt, &nocc, &alpha, - c.get_pointer(), &naos, X_istate + x_start, &nvirt, + container::Tensor Xc(DAT::DT_DOUBLE, DEV::CpuDevice, { nmo2, naos }); + dgemm_(&transa, &transb, &naos, &nmo2, &nmo1, &alpha, + c.get_pointer(imo1), &naos, X_istate + x_start, &nmo2, &beta, Xc.data(), &naos); // 2. C_virt*[X*C_occ^T] - dgemm_(&transa, &transb, &naos, &naos, &nvirt, &alpha, - c.get_pointer(nocc), &naos, Xc.data(), &naos, &beta, + dgemm_(&transa, &transb, &naos, &naos, &nmo2, &factor, + c.get_pointer(imo2), &naos, Xc.data(), &naos, &beta, dm_trans[isk].data(), &naos); } return dm_trans; @@ -116,21 +129,25 @@ namespace LR const psi::Psi>& c, const int& nocc, const int& nvirt, - const bool renorm_k, - const int nspin) + const std::complex factor, + const MO_TYPE type) { ModuleBase::TITLE("hamilt_lrtd", "cal_dm_trans_blas"); const int nks = c.get_nk(); - int naos = c.get_nbasis(); + const int naos = c.get_nbasis(); + const int imo1 = type == MO_TYPE::VV ? nocc : 0; + const int imo2 = type == MO_TYPE::OO ? 0 : nocc; + const int nmo1 = type == MO_TYPE::VV ? nvirt : nocc; + const int nmo2 = type == MO_TYPE::OO ? nocc : nvirt; std::vector dm_trans(nks, container::Tensor(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, { naos, naos })); for (size_t isk = 0;isk < nks;++isk) { c.fix_k(isk); - const int x_start = isk * nocc * nvirt; + const int x_start = isk * nmo1 * nmo2; char transa = 'N'; char transb = 'C'; - std::complex alpha(1.0, 0.0); + const std::complex alpha(1.0, 0.0); const std::complex beta(0.0, 0.0); // ============== C_virt * X * C_occ^\dagger============= @@ -148,15 +165,14 @@ namespace LR // ============== [C_virt * X * C_occ^\dagger]^T============= // ============== = [C_occ^* * X^T * C_virt^T]^T============= // 1. X*C_occ^\dagger - container::Tensor Xc(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, { naos, nvirt }); - zgemm_(&transa, &transb, &nvirt, &naos, &nocc, &alpha, - X_istate + x_start, &nvirt, c.get_pointer(), &naos, - &beta, Xc.data>(), &nvirt); + container::Tensor Xc(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, { naos, nmo2 }); + zgemm_(&transa, &transb, &nmo2, &naos, &nmo1, &alpha, + X_istate + x_start, &nmo2, c.get_pointer(imo1), &naos, + &beta, Xc.data>(), &nmo2); // 2. [X*C_occ^\dagger]^TC_virt^T transa = transb = 'T'; - alpha.real(renorm_k ? 1.0 / static_cast(nks / nspin) : 1.0); - zgemm_(&transa, &transb, &naos, &naos, &nvirt, &alpha, - Xc.data>(), &nvirt, c.get_pointer(nocc), &naos, &beta, + zgemm_(&transa, &transb, &naos, &naos, &nmo2, &factor, + Xc.data>(), &nmo2, c.get_pointer(imo2), &naos, &beta, dm_trans[isk].data>(), &naos); } return dm_trans; diff --git a/source/module_lr/dm_trans/test/dm_trans_test.cpp b/source/module_lr/dm_trans/test/dm_trans_test.cpp index 06e412155f..8a40f08c61 100644 --- a/source/module_lr/dm_trans/test/dm_trans_test.cpp +++ b/source/module_lr/dm_trans/test/dm_trans_test.cpp @@ -61,19 +61,29 @@ TEST_F(DMTransTest, DoubleSerial) { for (auto s : this->sizes) { - psi::Psi X(s.nks, nstate, s.nocc * s.nvirt, nullptr, false); - set_rand(X.get_pointer(), nstate * s.nks * s.nocc * s.nvirt); + psi::Psi X_vo(s.nks, nstate, s.nocc * s.nvirt, nullptr, false); + set_rand(X_vo.get_pointer(), nstate * s.nks * s.nocc * s.nvirt); + psi::Psi X_oo(s.nks, nstate, s.nocc * s.nocc, nullptr, false); + set_rand(X_oo.get_pointer(), nstate * s.nks * s.nocc * s.nocc); + psi::Psi X_vv(s.nks, nstate, s.nvirt * s.nvirt, nullptr, false); + set_rand(X_vv.get_pointer(), nstate * s.nks * s.nvirt * s.nvirt); for (int istate = 0;istate < nstate;++istate) { - int size_c = s.nks * (s.nocc + s.nvirt) * s.naos; + const int size_c = s.nks * (s.nocc + s.nvirt) * s.naos; std::vector temp(s.nks, s.naos); psi::Psi c(s.nks, s.nocc + s.nvirt, s.naos, temp.data(), true); set_rand(c.get_pointer(), size_c); - X.fix_b(istate); - const std::vector& dm_for = LR::cal_dm_trans_forloop_serial(X.get_pointer(), c, s.nocc, s.nvirt); - const std::vector& dm_blas = LR::cal_dm_trans_blas(X.get_pointer(), c, s.nocc, s.nvirt); - for (int isk = 0;isk < s.nks;++isk) check_eq(dm_for[isk].data(), dm_blas[isk].data(), s.naos * s.naos); + auto test = [&](psi::Psi& X, const LR::MO_TYPE type) + { + X.fix_b(istate); + const std::vector& dm_for = LR::cal_dm_trans_forloop_serial(X.get_pointer(), c, s.nocc, s.nvirt, 1., type); + const std::vector& dm_blas = LR::cal_dm_trans_blas(X.get_pointer(), c, s.nocc, s.nvirt, 1., type); + for (int isk = 0;isk < s.nks;++isk) check_eq(dm_for[isk].data(), dm_blas[isk].data(), s.naos * s.naos); + }; + test(X_vo, LR::MO_TYPE::VO); + test(X_oo, LR::MO_TYPE::OO); + test(X_vv, LR::MO_TYPE::VV); } } @@ -82,19 +92,29 @@ TEST_F(DMTransTest, ComplexSerial) { for (auto s : this->sizes) { - psi::Psi> X(s.nks, nstate, s.nocc * s.nvirt, nullptr, false); - set_rand(X.get_pointer(), nstate * s.nks * s.nocc * s.nvirt); + psi::Psi> X_vo(s.nks, nstate, s.nocc * s.nvirt, nullptr, false); + set_rand(X_vo.get_pointer(), nstate * s.nks * s.nocc * s.nvirt); + psi::Psi> X_oo(s.nks, nstate, s.nocc * s.nocc, nullptr, false); + set_rand(X_oo.get_pointer(), nstate * s.nks * s.nocc * s.nocc); + psi::Psi> X_vv(s.nks, nstate, s.nvirt * s.nvirt, nullptr, false); + set_rand(X_vv.get_pointer(), nstate * s.nks * s.nvirt * s.nvirt); for (int istate = 0;istate < nstate;++istate) { - int size_c = s.nks * (s.nocc + s.nvirt) * s.naos; + const int size_c = s.nks * (s.nocc + s.nvirt) * s.naos; std::vector temp(s.nks, s.naos); psi::Psi> c(s.nks, s.nocc + s.nvirt, s.naos, temp.data(), true); set_rand(c.get_pointer(), size_c); - X.fix_b(istate); - const std::vector& dm_for = LR::cal_dm_trans_forloop_serial(X.get_pointer(), c, s.nocc, s.nvirt); - const std::vector& dm_blas = LR::cal_dm_trans_blas(X.get_pointer(), c, s.nocc, s.nvirt); - for (int isk = 0;isk < s.nks;++isk) check_eq(dm_for[isk].data>(), dm_blas[isk].data>(), s.naos * s.naos); + auto test = [&](psi::Psi>& X, const LR::MO_TYPE type) + { + X.fix_b(istate); + const std::vector& dm_for = LR::cal_dm_trans_forloop_serial(X.get_pointer(), c, s.nocc, s.nvirt, std::complex(1., 0.), type); + const std::vector& dm_blas = LR::cal_dm_trans_blas(X.get_pointer(), c, s.nocc, s.nvirt, std::complex(1., 0.), type); + for (int isk = 0;isk < s.nks;++isk) check_eq(dm_for[isk].data>(), dm_blas[isk].data>(), s.naos * s.naos); + }; + test(X_vo, LR::MO_TYPE::VO); + test(X_oo, LR::MO_TYPE::OO); + test(X_vv, LR::MO_TYPE::VV); } } @@ -107,54 +127,60 @@ TEST_F(DMTransTest, DoubleParallel) { // c: nao*nbands in para2d, nbands*nao in psi (row-para and constructed: nao) // X: nvirt*nocc in para2d, nocc*nvirt in psi (row-para and constructed: nvirt) - Parallel_2D px; - LR_Util::setup_2d_division(px, s.nb, s.nvirt, s.nocc); + Parallel_2D px_vo, px_oo, px_vv; + LR_Util::setup_2d_division(px_vo, s.nb, s.nvirt, s.nocc); + LR_Util::setup_2d_division(px_oo, s.nb, s.nocc, s.nocc, px_vo.blacs_ctxt); + LR_Util::setup_2d_division(px_vv, s.nb, s.nvirt, s.nvirt, px_vo.blacs_ctxt); + + psi::Psi X_vo(s.nks, nstate, px_vo.get_local_size(), nullptr, false); + set_rand(X_vo.get_pointer(), nstate * s.nks * px_vo.get_local_size()); + psi::Psi X_oo(s.nks, nstate, px_oo.get_local_size(), nullptr, false); + set_rand(X_oo.get_pointer(), nstate * s.nks * px_oo.get_local_size()); + psi::Psi X_vv(s.nks, nstate, px_vv.get_local_size(), nullptr, false); + set_rand(X_vv.get_pointer(), nstate * s.nks * px_vv.get_local_size()); - std::vector temp_1(s.nks, px.get_local_size()); - psi::Psi X(s.nks, nstate, px.get_local_size(), temp_1.data(), false); Parallel_2D pc; - LR_Util::setup_2d_division(pc, s.nb, s.naos, s.nocc + s.nvirt, px.blacs_ctxt); + LR_Util::setup_2d_division(pc, s.nb, s.naos, s.nocc + s.nvirt, px_vo.blacs_ctxt); std::vector temp_2(s.nks, pc.get_row_size()); psi::Psi c(s.nks, pc.get_col_size(), pc.get_row_size(), temp_2.data(), true); Parallel_2D pmat; - LR_Util::setup_2d_division(pmat, s.nb, s.naos, s.naos, px.blacs_ctxt); + LR_Util::setup_2d_division(pmat, s.nb, s.naos, s.naos, px_vo.blacs_ctxt); - EXPECT_EQ(px.dim0, pc.dim0); - EXPECT_EQ(px.dim1, pc.dim1); - EXPECT_GE(s.nvirt, px.dim0); - EXPECT_GE(s.nocc, px.dim1); + EXPECT_EQ(px_vo.dim0, pc.dim0); + EXPECT_EQ(px_vo.dim1, pc.dim1); + EXPECT_GE(s.nvirt, px_vo.dim0); + EXPECT_GE(s.nocc, px_vo.dim1); EXPECT_GE(s.naos, pc.dim0); - set_rand(X.get_pointer(), nstate * s.nks * px.get_local_size()); //set X and X_full - psi::Psi X_full(s.nks, nstate, s.nocc * s.nvirt, nullptr, false); // allocate X_full - for (int istate = 0;istate < nstate;++istate) - { - X.fix_b(istate); - X_full.fix_b(istate); - for (int isk = 0;isk < s.nks;++isk) + psi::Psi X_full_vo(s.nks, nstate, s.nocc * s.nvirt, nullptr, false); // allocate X_full + psi::Psi X_full_oo(s.nks, nstate, s.nocc * s.nocc, nullptr, false); // allocate X_full + psi::Psi X_full_vv(s.nks, nstate, s.nvirt * s.nvirt, nullptr, false); // allocate X_full + + auto gather = [&](const psi::Psi& X, psi::Psi& X_full, const Parallel_2D& px, const int dim1, const int dim2) { - X.fix_k(isk); - X_full.fix_k(isk); - LR_Util::gather_2d_to_full(px, X.get_pointer(), X_full.get_pointer(), false, s.nvirt, s.nocc); - } - } + for (int istate = 0;istate < nstate;++istate) + { + X.fix_b(istate); + X_full.fix_b(istate); + for (int isk = 0;isk < s.nks;++isk) + { + X.fix_k(isk); + X_full.fix_k(isk); + LR_Util::gather_2d_to_full(px, X.get_pointer(), X_full.get_pointer(), false, dim1, dim2); + } + } + }; + gather(X_vo, X_full_vo, px_vo, s.nvirt, s.nocc); + gather(X_oo, X_full_oo, px_oo, s.nocc, s.nocc); + gather(X_vv, X_full_vv, px_vv, s.nvirt, s.nvirt); + for (int istate = 0;istate < nstate;++istate) { c.fix_k(0); set_rand(c.get_pointer(), s.nks * pc.get_local_size()); // set c - X.fix_b(istate); - X_full.fix_b(istate); - - std::vector dm_pblas_loc = LR::cal_dm_trans_pblas(X.get_pointer(), px, c, pc, s.naos, s.nocc, s.nvirt, pmat); - - // gather dm and output - std::vector dm_gather(s.nks, container::Tensor(DAT::DT_DOUBLE, DEV::CpuDevice, { s.naos, s.naos })); - for (int isk = 0;isk < s.nks;++isk) - LR_Util::gather_2d_to_full(pmat, dm_pblas_loc[isk].data(), dm_gather[isk].data(), false, s.naos, s.naos); - - // compare to global matrix + // gather C std::vector temp(s.nks, s.naos); psi::Psi c_full(s.nks, s.nocc + s.nvirt, s.naos, temp.data(), true); for (int isk = 0;isk < s.nks;++isk) @@ -163,11 +189,26 @@ TEST_F(DMTransTest, DoubleParallel) c_full.fix_k(isk); LR_Util::gather_2d_to_full(pc, c.get_pointer(), c_full.get_pointer(), false, s.naos, s.nocc + s.nvirt); } - if (my_rank == 0) - { - const std::vector& dm_full = LR::cal_dm_trans_blas(X_full.get_pointer(), c_full, s.nocc, s.nvirt); - for (int isk = 0;isk < s.nks;++isk) check_eq(dm_full[isk].data(), dm_gather[isk].data(), s.naos * s.naos); - } + + auto test = [&](psi::Psi& X, psi::Psi& X_full, const Parallel_2D& px, const LR::MO_TYPE type) + { + X.fix_b(istate); + X_full.fix_b(istate); + std::vector dm_pblas_loc = LR::cal_dm_trans_pblas(X.get_pointer(), px, c, pc, s.naos, s.nocc, s.nvirt, pmat, (double)1.0 / (double)s.nks, type); + std::vector dm_gather(s.nks, container::Tensor(DAT::DT_DOUBLE, DEV::CpuDevice, { s.naos, s.naos })); + for (int isk = 0;isk < s.nks;++isk) + { + LR_Util::gather_2d_to_full(pmat, dm_pblas_loc[isk].data(), dm_gather[isk].data(), false, s.naos, s.naos); + } + if (my_rank == 0) + { + const std::vector& dm_full = LR::cal_dm_trans_blas(X_full.get_pointer(), c_full, s.nocc, s.nvirt, (double)1.0 / (double)s.nks, type); + for (int isk = 0;isk < s.nks;++isk) check_eq(dm_full[isk].data(), dm_gather[isk].data(), s.naos * s.naos); + } + }; + test(X_vo, X_full_vo, px_vo, LR::MO_TYPE::VO); + test(X_oo, X_full_oo, px_oo, LR::MO_TYPE::OO); + test(X_vv, X_full_vv, px_vv, LR::MO_TYPE::VV); } } } @@ -177,45 +218,52 @@ TEST_F(DMTransTest, ComplexParallel) { // c: nao*nbands in para2d, nbands*nao in psi (row-para and constructed: nao) // X: nvirt*nocc in para2d, nocc*nvirt in psi (row-para and constructed: nvirt) - Parallel_2D px; - LR_Util::setup_2d_division(px, s.nb, s.nvirt, s.nocc); - psi::Psi> X(s.nks, nstate, px.get_local_size(), nullptr, false); + Parallel_2D px_vo, px_oo, px_vv; + LR_Util::setup_2d_division(px_vo, s.nb, s.nvirt, s.nocc); + LR_Util::setup_2d_division(px_oo, s.nb, s.nocc, s.nocc, px_vo.blacs_ctxt); + LR_Util::setup_2d_division(px_vv, s.nb, s.nvirt, s.nvirt, px_vo.blacs_ctxt); + + psi::Psi> X_vo(s.nks, nstate, px_vo.get_local_size(), nullptr, false); + set_rand(X_vo.get_pointer(), nstate * s.nks * px_vo.get_local_size()); + psi::Psi> X_oo(s.nks, nstate, px_oo.get_local_size(), nullptr, false); + set_rand(X_oo.get_pointer(), nstate * s.nks * px_oo.get_local_size()); + psi::Psi> X_vv(s.nks, nstate, px_vv.get_local_size(), nullptr, false); + set_rand(X_vv.get_pointer(), nstate * s.nks * px_vv.get_local_size()); + Parallel_2D pc; - LR_Util::setup_2d_division(pc, s.nb, s.naos, s.nocc + s.nvirt, px.blacs_ctxt); + LR_Util::setup_2d_division(pc, s.nb, s.naos, s.nocc + s.nvirt, px_vo.blacs_ctxt); std::vector temp(s.nks, pc.get_row_size()); psi::Psi> c(s.nks, pc.get_col_size(), pc.get_row_size(), temp.data(), true); Parallel_2D pmat; - LR_Util::setup_2d_division(pmat, s.nb, s.naos, s.naos, px.blacs_ctxt); + LR_Util::setup_2d_division(pmat, s.nb, s.naos, s.naos, px_vo.blacs_ctxt); - set_rand(X.get_pointer(), nstate * s.nks * px.get_local_size()); //set X and X_full - psi::Psi> X_full(s.nks, nstate, s.nocc * s.nvirt, nullptr, false); // allocate X_full - for (int istate = 0;istate < nstate;++istate) - { - X.fix_b(istate); - X_full.fix_b(istate); - for (int isk = 0;isk < s.nks;++isk) + psi::Psi> X_full_vo(s.nks, nstate, s.nocc * s.nvirt, nullptr, false); // allocate X_full + psi::Psi> X_full_oo(s.nks, nstate, s.nocc * s.nocc, nullptr, false); // allocate X_full + psi::Psi> X_full_vv(s.nks, nstate, s.nvirt * s.nvirt, nullptr, false); // allocate X_full + + auto gather = [&](const psi::Psi>& X, psi::Psi>& X_full, const Parallel_2D& px, const int dim1, const int dim2) { - X.fix_k(isk); - X_full.fix_k(isk); - LR_Util::gather_2d_to_full(px, X.get_pointer(), X_full.get_pointer(), false, s.nvirt, s.nocc); - } - } + for (int istate = 0;istate < nstate;++istate) + { + X.fix_b(istate); + X_full.fix_b(istate); + for (int isk = 0;isk < s.nks;++isk) + { + X.fix_k(isk); + X_full.fix_k(isk); + LR_Util::gather_2d_to_full(px, X.get_pointer(), X_full.get_pointer(), false, dim1, dim2); + } + } + }; + gather(X_vo, X_full_vo, px_vo, s.nvirt, s.nocc); + gather(X_oo, X_full_oo, px_oo, s.nocc, s.nocc); + gather(X_vv, X_full_vv, px_vv, s.nvirt, s.nvirt); + for (int istate = 0;istate < nstate;++istate) { c.fix_k(0); set_rand(c.get_pointer(), s.nks * pc.get_local_size()); // set c - - X.fix_b(istate); - X_full.fix_b(istate); - - std::vector dm_pblas_loc = LR::cal_dm_trans_pblas(X.get_pointer(), px, c, pc, s.naos, s.nocc, s.nvirt, pmat); - - // gather dm and output - std::vector dm_gather(s.nks, container::Tensor(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, { s.naos, s.naos })); - for (int isk = 0;isk < s.nks;++isk) - LR_Util::gather_2d_to_full(pmat, dm_pblas_loc[isk].data>(), dm_gather[isk].data>(), false, s.naos, s.naos); - // compare to global matrix std::vector ngk_temp_2(s.nks, s.naos); psi::Psi> c_full(s.nks, s.nocc + s.nvirt, s.naos, ngk_temp_2.data(), true); @@ -225,11 +273,26 @@ TEST_F(DMTransTest, ComplexParallel) c_full.fix_k(isk); LR_Util::gather_2d_to_full(pc, c.get_pointer(), c_full.get_pointer(), false, s.naos, s.nocc + s.nvirt); } - if (my_rank == 0) - { - std::vector dm_full = LR::cal_dm_trans_blas(X_full.get_pointer(), c_full, s.nocc, s.nvirt); - for (int isk = 0;isk < s.nks;++isk) check_eq(dm_full[isk].data>(), dm_gather[isk].data>(), s.naos * s.naos); - } + + auto test = [&](psi::Psi>& X, psi::Psi>& X_full, const Parallel_2D& px, const LR::MO_TYPE type) + { + X.fix_b(istate); + X_full.fix_b(istate); + std::vector dm_pblas_loc = LR::cal_dm_trans_pblas(X.get_pointer(), px, c, pc, s.naos, s.nocc, s.nvirt, pmat, std::complex(1.0, 0.0) / (double)s.nks, type); + std::vector dm_gather(s.nks, container::Tensor(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, { s.naos, s.naos })); + for (int isk = 0;isk < s.nks;++isk) + { + LR_Util::gather_2d_to_full(pmat, dm_pblas_loc[isk].data>(), dm_gather[isk].data>(), false, s.naos, s.naos); + } + if (my_rank == 0) + { + const std::vector& dm_full = LR::cal_dm_trans_blas(X_full.get_pointer(), c_full, s.nocc, s.nvirt, std::complex(1.0, 0.0) / (double)s.nks, type); + for (int isk = 0;isk < s.nks;++isk) check_eq(dm_full[isk].data>(), dm_gather[isk].data>(), s.naos * s.naos); + } + }; + test(X_vo, X_full_vo, px_vo, LR::MO_TYPE::VO); + test(X_oo, X_full_oo, px_oo, LR::MO_TYPE::OO); + test(X_vv, X_full_vv, px_vv, LR::MO_TYPE::VV); } } } diff --git a/source/module_lr/hamilt_casida.h b/source/module_lr/hamilt_casida.h index b28ad9fde8..75c8ddd554 100644 --- a/source/module_lr/hamilt_casida.h +++ b/source/module_lr/hamilt_casida.h @@ -119,10 +119,10 @@ namespace LR { const auto psi_ks_is = LR_Util::get_psi_spin(psi_ks_in, is, nk); #ifdef __MPI - std::vector dm_trans_2d = cal_dm_trans_pblas(X, pX[is], psi_ks_is, pc_in, naos, nocc[is], nvirt[is], pmat_in); + std::vector dm_trans_2d = cal_dm_trans_pblas(X, pX[is], psi_ks_is, pc_in, naos, nocc[is], nvirt[is], pmat_in, (T)1.0 / (T)nk); if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data(), naos, pmat_in); #else - std::vector dm_trans_2d = cal_dm_trans_blas(X, psi_ks_is, nocc[is], nvirt[is]); + std::vector dm_trans_2d = cal_dm_trans_blas(X, psi_ks_is, nocc[is], nvirt[is], (T)1.0 / (T)nk); if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data(), naos); #endif // LR_Util::print_tensor(dm_trans_2d[0], "dm_trans_2d[0]", &pmat_in); diff --git a/source/module_lr/hamilt_ulr.hpp b/source/module_lr/hamilt_ulr.hpp index c058d14803..40e09b50f0 100644 --- a/source/module_lr/hamilt_ulr.hpp +++ b/source/module_lr/hamilt_ulr.hpp @@ -73,10 +73,10 @@ namespace LR const auto psi_ks_is = LR_Util::get_psi_spin(psi_ks_in, is, nk); // LR_Util::print_value(X, pX_in[is].get_local_size()); #ifdef __MPI - std::vector dm_trans_2d = cal_dm_trans_pblas(X, pX[is], psi_ks_is, pc_in, naos, nocc[is], nvirt[is], pmat_in); + std::vector dm_trans_2d = cal_dm_trans_pblas(X, pX[is], psi_ks_is, pc_in, naos, nocc[is], nvirt[is], pmat_in, (T)1.0 / (T)nk); if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data(), naos, pmat_in); #else - std::vector dm_trans_2d = cal_dm_trans_blas(X, psi_ks_is, nocc[is], nvirt[is]); + std::vector dm_trans_2d = cal_dm_trans_blas(X, psi_ks_is, nocc[is], nvirt[is], (T)1.0 / (T)nk); if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data(), naos); #endif // LR_Util::print_tensor(dm_trans_2d[0], "DMtrans(k=0)", &pmat_in); diff --git a/source/module_lr/lr_spectrum.cpp b/source/module_lr/lr_spectrum.cpp index 0a2c5735f6..5815d7b201 100644 --- a/source/module_lr/lr_spectrum.cpp +++ b/source/module_lr/lr_spectrum.cpp @@ -18,10 +18,10 @@ elecstate::DensityMatrix LR::LR_Spectrum::cal_transition_density_matrix const int offset_x = offset_b + is * nk * this->pX[0].get_local_size(); //1. transition density #ifdef __MPI - std::vector dm_trans_2d = cal_dm_trans_pblas(X + offset_x, this->pX[is], psi_ks[is], this->pc, this->naos, this->nocc[is], this->nvirt[is], this->pmat); + std::vector dm_trans_2d = cal_dm_trans_pblas(X + offset_x, this->pX[is], psi_ks[is], this->pc, this->naos, this->nocc[is], this->nvirt[is], this->pmat, (T)1.0 / (T)nk); // if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data(), naos, pmat); #else - std::vector dm_trans_2d = cal_dm_trans_blas(X + offset_x, this->psi_ks[is], this->nocc[is], this->nvirt[is]); + std::vector dm_trans_2d = cal_dm_trans_blas(X + offset_x, this->psi_ks[is], this->nocc[is], this->nvirt[is], (T)1.0 / (T)nk); // if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data(), naos); #endif for (int ik = 0;ik < this->nk;++ik) { DM_trans.set_DMK_pointer(ik + is * nk, dm_trans_2d[ik].data()); }