deepmodeling
diff --git a/‎source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp‎
Lines changed: 63 additions & 36 deletions b/‎source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp‎
Lines changed: 63 additions & 36 deletions
diff --git a/‎source/module_hamilt_lcao/module_deepks/deepks_vdrpre.cpp‎
Lines changed: 198 additions & 22 deletions b/‎source/module_hamilt_lcao/module_deepks/deepks_vdrpre.cpp‎
Lines changed: 198 additions & 22 deletions
@@ -274,14 +274,13 @@ void LCAO_Deepks_Interface<TK, TR>::out_deepks_labels(const double& etot,
             }                                                                 // end deepks_out_labels == 1
         }                                                                     // end bandgap label
 
-        // not add deepks_out_labels = 2 for HR yet
         // H(R) matrix part, for HR, base will not be calculated since they are HContainer objects
         if (PARAM.inp.deepks_v_delta < 0)
         {
             // set the output
             const double sparse_threshold = 1e-10;
             const int precision = 8;
-            const std::string file_hrtot = PARAM.globalv.global_out_dir + "deepks_hrtot.csr";
+            const std::string file_hrtot = PARAM.globalv.global_out_dir + (PARAM.inp.deepks_out_labels == 1 ? "deepks_hrtot.csr" : "hamiltonian_r.csr");
             hamilt::HContainer<TR>* hR_tot = (p_ham->getHR());
 
             if (rank == 0)
@@ -290,47 +289,75 @@ void LCAO_Deepks_Interface<TK, TR>::out_deepks_labels(const double& etot,
                 ofs_hr << "Matrix Dimension of H(R): " << hR_tot->get_nbasis() << std::endl;
                 ofs_hr << "Matrix number of H(R): " << hR_tot->size_R_loop() << std::endl;
                 hamilt::Output_HContainer<TR> out_hr(hR_tot, ofs_hr, sparse_threshold, precision);
-                out_hr.write();
+                out_hr.write(true); // write all the matrices, including empty ones
                 ofs_hr.close();
             }
 
             if (PARAM.inp.deepks_scf)
             {
-                const std::string file_vdeltar = PARAM.globalv.global_out_dir + "deepks_hrdelta.csr";
-                hamilt::HContainer<TR>* h_deltaR = p_ham->get_V_delta_R();
-
-                if (rank == 0)
+                if (PARAM.inp.deepks_out_labels == 1)
                 {
-                    std::ofstream ofs_hr(file_vdeltar, std::ios::out);
-                    ofs_hr << "Matrix Dimension of H_delta(R): " << h_deltaR->get_nbasis() << std::endl;
-                    ofs_hr << "Matrix number of H_delta(R): " << h_deltaR->size_R_loop() << std::endl;
-                    hamilt::Output_HContainer<TR> out_hr(h_deltaR, ofs_hr, sparse_threshold, precision);
-                    out_hr.write();
-                    ofs_hr.close();
-                }
+                    const std::string file_vdeltar = PARAM.globalv.global_out_dir + "deepks_hrdelta.csr";
+                    hamilt::HContainer<TR>* h_deltaR = p_ham->get_V_delta_R();
+
+                    if (rank == 0)
+                    {
+                        std::ofstream ofs_hr(file_vdeltar, std::ios::out);
+                        ofs_hr << "Matrix Dimension of H_delta(R): " << h_deltaR->get_nbasis() << std::endl;
+                        ofs_hr << "Matrix number of H_delta(R): " << h_deltaR->size_R_loop() << std::endl;
+                        hamilt::Output_HContainer<TR> out_hr(h_deltaR, ofs_hr, sparse_threshold, precision);
+                        out_hr.write(true); // write all the matrices, including empty ones
+                        ofs_hr.close();
+                    }
 
-                torch::Tensor phialpha_r_out;
-                torch::Tensor R_query;
-                DeePKS_domain::prepare_phialpha_r(nlocal,
-                                                  lmaxd,
-                                                  inlmax,
-                                                  nat,
-                                                  phialpha,
-                                                  ucell,
-                                                  orb,
-                                                  *ParaV,
-                                                  GridD,
-                                                  phialpha_r_out,
-                                                  R_query);
-                const std::string file_phialpha_r = PARAM.globalv.global_out_dir + "deepks_phialpha_r.npy";
-                const std::string file_R_query = PARAM.globalv.global_out_dir + "deepks_R_query.npy";
-                LCAO_deepks_io::save_tensor2npy<double>(file_phialpha_r, phialpha_r_out, rank);
-                LCAO_deepks_io::save_tensor2npy<int>(file_R_query, R_query, rank);
-
-                torch::Tensor gevdm_out;
-                DeePKS_domain::prepare_gevdm(nat, lmaxd, inlmax, orb, gevdm, gevdm_out);
-                const std::string file_gevdm = PARAM.globalv.global_out_dir + "deepks_gevdm.npy";
-                LCAO_deepks_io::save_tensor2npy<double>(file_gevdm, gevdm_out, rank);
+                    if (PARAM.inp.deepks_v_delta == -1)
+                    {
+                        int R_size = DeePKS_domain::get_R_size(*h_deltaR);
+                        torch::Tensor vdr_precalc;
+                        DeePKS_domain::cal_vdr_precalc(nlocal,
+                                                        lmaxd,
+                                                        inlmax,
+                                                        nat,
+                                                        nks,
+                                                        R_size,
+                                                        inl2l,
+                                                        kvec_d,
+                                                        phialpha,
+                                                        gevdm,
+                                                        inl_index,
+                                                        ucell,
+                                                        orb,
+                                                        *ParaV,
+                                                        GridD,
+                                                        vdr_precalc);
+
+                        const std::string file_vdrpre = PARAM.globalv.global_out_dir + "deepks_vdrpre.npy";
+                        LCAO_deepks_io::save_tensor2npy<double>(file_vdrpre, vdr_precalc, rank);
+                    }
+                    else if (PARAM.inp.deepks_v_delta == -2)
+                    {
+                        int R_size = DeePKS_domain::get_R_size(*h_deltaR);
+                        torch::Tensor phialpha_r_out;
+                        DeePKS_domain::prepare_phialpha_r(nlocal,
+                                                        lmaxd,
+                                                        inlmax,
+                                                        nat,
+                                                        R_size,
+                                                        phialpha,
+                                                        ucell,
+                                                        orb,
+                                                        *ParaV,
+                                                        GridD,
+                                                        phialpha_r_out);
+                        const std::string file_phialpha_r = PARAM.globalv.global_out_dir + "deepks_phialpha_r.npy";
+                        LCAO_deepks_io::save_tensor2npy<double>(file_phialpha_r, phialpha_r_out, rank);
+
+                        torch::Tensor gevdm_out;
+                        DeePKS_domain::prepare_gevdm(nat, lmaxd, inlmax, orb, gevdm, gevdm_out);
+                        const std::string file_gevdm = PARAM.globalv.global_out_dir + "deepks_gevdm.npy";
+                        LCAO_deepks_io::save_tensor2npy<double>(file_gevdm, gevdm_out, rank);
+                    }
+                }
             }
         }
 
 
@@ -14,32 +14,25 @@
 #include "module_parameter/parameter.h"
 
 void DeePKS_domain::prepare_phialpha_r(const int nlocal,
-                                     const int lmaxd,
-                                     const int inlmax,
-                                     const int nat,
-                                     const std::vector<hamilt::HContainer<double>*> phialpha,
-                                     const UnitCell& ucell,
-                                     const LCAO_Orbitals& orb,
-                                     const Parallel_Orbitals& pv,
-                                     const Grid_Driver& GridD,
-                                     torch::Tensor& phialpha_r_out,
-                                     torch::Tensor& R_query)
+                                       const int lmaxd,
+                                       const int inlmax,
+                                       const int nat,
+                                       const int R_size,
+                                       const std::vector<hamilt::HContainer<double>*> phialpha,
+                                       const UnitCell& ucell,
+                                       const LCAO_Orbitals& orb,
+                                       const Parallel_Orbitals& pv,
+                                       const Grid_Driver& GridD,
+                                       torch::Tensor& phialpha_r_out)
 {
     ModuleBase::TITLE("DeePKS_domain", "prepare_phialpha_r");
     ModuleBase::timer::tick("DeePKS_domain", "prepare_phialpha_r");
     constexpr torch::Dtype dtype = torch::kFloat64;
     int nlmax = inlmax / nat;
     int mmax = 2 * lmaxd + 1;
-    auto size_R = static_cast<long>(phialpha[0]->size_R_loop());
-    phialpha_r_out = torch::zeros({size_R, nat, nlmax, nlocal, mmax}, dtype);
-    R_query = torch::zeros({size_R, 3}, torch::kInt32);
-    auto accessor = phialpha_r_out.accessor<double, 5>();
-    auto R_accessor = R_query.accessor<int, 2>();
 
-    for (int iR = 0; iR < size_R; ++iR)
-    {
-        phialpha[0]->loop_R(iR, R_accessor[iR][0], R_accessor[iR][1], R_accessor[iR][2]);
-    }
+    phialpha_r_out = torch::zeros({R_size, R_size, R_size, nat, nlmax, nlocal, mmax}, dtype);
+    auto accessor = phialpha_r_out.accessor<double, 7>();
 
     DeePKS_domain::iterate_ad1(
         ucell,
@@ -81,18 +74,22 @@ void DeePKS_domain::prepare_phialpha_r(const int nlocal,
                         const int nm = 2 * L0 + 1;
                         for (int m1 = 0; m1 < nm; ++m1) // nm = 1 for s, 3 for p, 5 for d
                         {
-                            accessor[iR][iat][nl][iw1_all][m1] += overlap->get_value(iw1, ib + m1);
+                            int iRx = DeePKS_domain::mapping_R(dR.x);
+                            int iRy = DeePKS_domain::mapping_R(dR.y);
+                            int iRz = DeePKS_domain::mapping_R(dR.z);
+                            accessor[iRx][iRy][iRz][iat][nl][iw1_all][m1]
+                                += overlap->get_value(iw1, ib + m1);
                         }
                         ib += nm;
                         nl++;
                     }
                 }
-            }     // end iw
+            } // end iw
         }
     );
 
 #ifdef __MPI
-    int size = size_R * nat * nlmax * nlocal * mmax;
+    int size = R_size * R_size * R_size * nat * nlmax * nlocal * mmax;
     double* data_ptr = phialpha_r_out.data_ptr<double>();
     Parallel_Reduce::reduce_all(data_ptr, size);
 
@@ -101,4 +98,183 @@ void DeePKS_domain::prepare_phialpha_r(const int nlocal,
     ModuleBase::timer::tick("DeePKS_domain", "prepare_phialpha_r");
     return;
 }
+
+void DeePKS_domain::cal_vdr_precalc(const int nlocal,
+                                    const int lmaxd,
+                                    const int inlmax,
+                                    const int nat,
+                                    const int nks,
+                                    const int R_size,
+                                    const std::vector<int>& inl2l,
+                                    const std::vector<ModuleBase::Vector3<double>>& kvec_d,
+                                    const std::vector<hamilt::HContainer<double>*> phialpha,
+                                    const std::vector<torch::Tensor> gevdm,
+                                    const ModuleBase::IntArray* inl_index,
+                                    const UnitCell& ucell,
+                                    const LCAO_Orbitals& orb,
+                                    const Parallel_Orbitals& pv,
+                                    const Grid_Driver& GridD,
+                                    torch::Tensor& vdr_precalc)
+{
+    ModuleBase::TITLE("DeePKS_domain", "calc_vdr_precalc");
+    ModuleBase::timer::tick("DeePKS_domain", "calc_vdr_precalc");
+
+    torch::Tensor vdr_pdm
+        = torch::zeros({R_size, R_size, R_size, nlocal, nlocal, inlmax, (2 * lmaxd + 1), (2 * lmaxd + 1)},
+                       torch::TensorOptions().dtype(torch::kFloat64));
+    auto accessor = vdr_pdm.accessor<double, 8>();
+
+    DeePKS_domain::iterate_ad2(
+        ucell,
+        GridD,
+        orb,
+        false, // no trace_alpha
+        [&](const int iat,
+            const ModuleBase::Vector3<double>& tau0,
+            const int ibt1,
+            const ModuleBase::Vector3<double>& tau1,
+            const int start1,
+            const int nw1_tot,
+            ModuleBase::Vector3<int> dR1,
+            const int ibt2,
+            const ModuleBase::Vector3<double>& tau2,
+            const int start2,
+            const int nw2_tot,
+            ModuleBase::Vector3<int> dR2)
+        {
+            const int T0 = ucell.iat2it[iat];
+            const int I0 = ucell.iat2ia[iat];
+            if (phialpha[0]->find_matrix(iat, ibt1, dR1.x, dR1.y, dR1.z) == nullptr
+                || phialpha[0]->find_matrix(iat, ibt2, dR2.x, dR2.y, dR2.z) == nullptr)
+            {
+                return; // to next loop
+            }
+
+            hamilt::BaseMatrix<double>* overlap_1 = phialpha[0]->find_matrix(iat, ibt1, dR1);
+            hamilt::BaseMatrix<double>* overlap_2 = phialpha[0]->find_matrix(iat, ibt2, dR2);
+            assert(overlap_1->get_col_size() == overlap_2->get_col_size());
+            ModuleBase::Vector3<int> dR = dR1 - dR2;
+            int iRx = DeePKS_domain::mapping_R(dR.x);
+            int iRy = DeePKS_domain::mapping_R(dR.y);
+            int iRz = DeePKS_domain::mapping_R(dR.z);
+
+            for (int iw1 = 0; iw1 < nw1_tot; ++iw1)
+            {
+                const int iw1_all = start1 + iw1; // this is \mu
+                const int iw1_local = pv.global2local_row(iw1_all);
+                if (iw1_local < 0)
+                {
+                    continue;
+                }
+                for (int iw2 = 0; iw2 < nw2_tot; ++iw2)
+                {
+                    const int iw2_all = start2 + iw2; // this is \nu
+                    const int iw2_local = pv.global2local_col(iw2_all);
+                    if (iw2_local < 0)
+                    {
+                        continue;
+                    }
+
+                    int ib = 0;
+                    for (int L0 = 0; L0 <= orb.Alpha[0].getLmax(); ++L0)
+                    {
+                        for (int N0 = 0; N0 < orb.Alpha[0].getNchi(L0); ++N0)
+                        {
+                            const int inl = inl_index[T0](I0, L0, N0);
+                            const int nm = 2 * L0 + 1;
+
+                            for (int m1 = 0; m1 < nm; ++m1) // nm = 1 for s, 3 for p, 5 for d
+                            {
+                                for (int m2 = 0; m2 < nm; ++m2) // nm = 1 for s, 3 for p, 5 for d
+                                {
+                                    double tmp = overlap_1->get_value(iw1, ib + m1)
+                                                * overlap_2->get_value(iw2, ib + m2);
+                                    accessor[iRx][iRy][iRz][iw1_all][iw2_all][inl][m1][m2]
+                                        += tmp;
+                                }
+                            }
+                            ib += nm;
+                        }
+                    }
+                } // iw2
+            }     // iw1
+        }
+    );
+
+#ifdef __MPI
+    const int size = R_size * R_size * R_size * nlocal * nlocal * inlmax * (2 * lmaxd + 1) * (2 * lmaxd + 1);
+    double* data_ptr = vdr_pdm.data_ptr<double>();
+    Parallel_Reduce::reduce_all(data_ptr, size);
+#endif
+
+    // transfer v_delta_pdm to v_delta_pdm_vector
+    int nlmax = inlmax / nat;
+    std::vector<torch::Tensor> vdr_pdm_vector;
+    for (int nl = 0; nl < nlmax; ++nl)
+    {
+        int nm = 2 * inl2l[nl] + 1;
+        torch::Tensor vdr_pdm_sliced = vdr_pdm.slice(5, nl, inlmax, nlmax).slice(6, 0, nm, 1).slice(7, 0, nm, 1);
+        vdr_pdm_vector.push_back(vdr_pdm_sliced);
+    }
+
+    assert(vdr_pdm_vector.size() == nlmax);
+
+    // einsum for each nl:
+    std::vector<torch::Tensor> vdr_vector;
+    for (int nl = 0; nl < nlmax; ++nl)
+    {
+        vdr_vector.push_back(at::einsum("pqrxyamn, avmn->pqrxyav", {vdr_pdm_vector[nl], gevdm[nl]}));
+    }
+
+    vdr_precalc = torch::cat(vdr_vector, -1);
+
+    ModuleBase::timer::tick("DeePKS_domain", "calc_vdr_precalc");
+    return;
+}
+
+int DeePKS_domain::mapping_R(int R)
+{
+    // R_index mapping: index(R) = 2R-1 if R > 0, index(R) = -2R if R <= 0
+    // after mapping, the new index [0,1,2,3,4,...] -> old index [0,1,-1,2,-2,...]
+    // This manipulation makes sure that the new index is natural number
+    // which makes it available to be used as index in torch::Tensor
+    int R_index = 0;
+    if (R > 0)
+    {
+        R_index = 2 * R - 1;
+    }
+    else
+    {
+        R_index = -2 * R;
+    }
+    return R_index;
+}
+
+template <typename T>
+int DeePKS_domain::get_R_size(const hamilt::HContainer<T>& hcontainer)
+{
+    // get R_size from hcontainer
+    int R_size = 0;
+    if (hcontainer.size_R_loop() > 0)
+    {
+        for (int iR = 0; iR < hcontainer.size_R_loop(); ++iR)
+        {
+            ModuleBase::Vector3<int> R_vec;
+            hcontainer.loop_R(iR, R_vec.x, R_vec.y, R_vec.z);
+            int R_min = std::min({R_vec.x, R_vec.y, R_vec.z});
+            int R_max = std::max({R_vec.x, R_vec.y, R_vec.z});
+            int tmp_R_size = std::max(DeePKS_domain::mapping_R(R_min), DeePKS_domain::mapping_R(R_max)) + 1;
+            if (tmp_R_size > R_size)
+            {
+                R_size = tmp_R_size;
+            }
+        }
+    }
+    assert(R_size > 0);
+    return R_size;
+}
+
+template int DeePKS_domain::get_R_size<double>(const hamilt::HContainer<double>& hcontainer);
+template int DeePKS_domain::get_R_size<std::complex<double>>(
+    const hamilt::HContainer<std::complex<double>>& hcontainer);
 #endif