Refactor: Simplify some functions in DeePKS. (#6102)

ErjieWu · web-flow · commit ab2e27a27ae7 · 2025-04-03T11:40:38.000+08:00
* Simplify some functions in DeePKS.

* clang-format change.
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h b/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h
@@ -73,7 +73,7 @@ class LCAO_Deepks
     int inlmax = 0;                  // tot. number {i,n,l} - atom, n, l
     int n_descriptor;                // natoms * des_per_atom, size of descriptor(projector) basis set
     int des_per_atom;                // \sum_L{Nchi(L)*(2L+1)}
-    std::vector<int> inl2l;          // inl2l[inl] = l of descriptor with inl_index
+    std::vector<int> inl2l;          // inl2l[inl] = inl2l[nl] = l (not related to iat) of descriptor with inl_index
     ModuleBase::IntArray* inl_index; // caoyu add 2021-05-07
 
     bool init_pdm = false; // for DeePKS NSCF calculation, set init_pdm to skip the calculation of pdm in SCF iteration
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp
@@ -64,8 +64,6 @@ void LCAO_Deepks_Interface<TK, TR>::out_deepks_labels(const double& etot,
     {
         // this part is for integrated test of deepks
         // so it is printed no matter even if deepks_out_labels is not used
-        DeePKS_domain::update_dmr(kvec_d, dm->get_DMK_vector(), ucell, orb, *ParaV, GridD, dmr);
-
         DeePKS_domain::cal_pdm<
             TK>(init_pdm, inlmax, lmaxd, inl2l, inl_index, kvec_d, dmr, phialpha, ucell, orb, GridD, *ParaV, pdm);
 
diff --git a/source/module_hamilt_lcao/module_deepks/deepks_force.cpp b/source/module_hamilt_lcao/module_deepks/deepks_force.cpp
@@ -151,6 +151,13 @@ void DeePKS_domain::cal_f_delta(const hamilt::HContainer<double>* dmr,
                                                     += gedm[inl][m1 * nm + m2]
                                                         * overlap_1->get_value(row_indexes[iw1], ib + m1)
                                                         * grad_overlap_2[dim]->get_value(col_indexes[iw2], ib + m2);
+                                                if (isstress)
+                                                {
+                                                    nlm_t[dim] += gedm[inl][m1 * nm + m2]
+                                                                    * overlap_2->get_value(col_indexes[iw2], ib + m1)
+                                                                    * grad_overlap_1[dim]->get_value(row_indexes[iw1],
+                                                                                                    ib + m2);
+                                                }
                                             }
                                         }
                                     }
@@ -175,6 +182,12 @@ void DeePKS_domain::cal_f_delta(const hamilt::HContainer<double>* dmr,
                                         nlm[dim] += gedm[iat][iproj * nproj + jproj]
                                                     * overlap_1->get_value(row_indexes[iw1], iproj)
                                                     * grad_overlap_2[dim]->get_value(col_indexes[iw2], jproj);
+                                        if (isstress)
+                                        {
+                                            nlm_t[dim] += gedm[iat][iproj * nproj + jproj]
+                                                            * overlap_2->get_value(col_indexes[iw2], iproj)
+                                                            * grad_overlap_1[dim]->get_value(row_indexes[iw1], jproj);
+                                        }
                                     }
                                 }
                             }
@@ -192,54 +205,6 @@ void DeePKS_domain::cal_f_delta(const hamilt::HContainer<double>* dmr,
 
                         if (isstress)
                         {
-                            if (!PARAM.inp.deepks_equiv)
-                            {
-                                int ib = 0;
-                                for (int L0 = 0; L0 <= orb.Alpha[0].getLmax(); ++L0)
-                                {
-                                    for (int N0 = 0; N0 < orb.Alpha[0].getNchi(L0); ++N0)
-                                    {
-                                        const int inl = inl_index[T0](I0, L0, N0);
-                                        const int nm = 2 * L0 + 1;
-                                        for (int m1 = 0; m1 < nm; ++m1)
-                                        {
-                                            for (int m2 = 0; m2 < nm; ++m2)
-                                            {
-                                                for (int dim = 0; dim < 3; ++dim)
-                                                {
-                                                    nlm_t[dim] += gedm[inl][m1 * nm + m2]
-                                                                    * overlap_2->get_value(col_indexes[iw2], ib + m1)
-                                                                    * grad_overlap_1[dim]->get_value(row_indexes[iw1],
-                                                                                                    ib + m2);
-                                                }
-                                            }
-                                        }
-                                        ib += nm;
-                                    }
-                                }
-                                assert(ib == overlap_2->get_col_size());
-                            }
-                            else
-                            {
-                                int nproj = 0;
-                                for (int il = 0; il < lmaxd + 1; il++)
-                                {
-                                    nproj += (2 * il + 1) * orb.Alpha[0].getNchi(il);
-                                }
-                                for (int iproj = 0; iproj < nproj; iproj++)
-                                {
-                                    for (int jproj = 0; jproj < nproj; jproj++)
-                                    {
-                                        for (int dim = 0; dim < 3; dim++)
-                                        {
-                                            nlm_t[dim] += gedm[iat][iproj * nproj + jproj]
-                                                            * overlap_2->get_value(col_indexes[iw2], iproj)
-                                                            * grad_overlap_1[dim]->get_value(row_indexes[iw1], jproj);
-                                        }
-                                    }
-                                }
-                            }
-
                             for (int ipol = 0; ipol < 3; ipol++)
                             {
                                 for (int jpol = ipol; jpol < 3; jpol++)
diff --git a/source/module_hamilt_lcao/module_deepks/deepks_fpre.cpp b/source/module_hamilt_lcao/module_deepks/deepks_fpre.cpp
@@ -211,36 +211,10 @@ void DeePKS_domain::cal_gvx(const int nat,
         int nlmax = inlmax / nat;
         for (int nl = 0; nl < nlmax; ++nl)
         {
-            std::vector<torch::Tensor> bmmv;
-            for (int ibt = 0; ibt < nat; ++ibt)
-            {
-                std::vector<torch::Tensor> xmmv;
-                for (int i = 0; i < 3; ++i)
-                {
-                    std::vector<torch::Tensor> ammv;
-                    for (int iat = 0; iat < nat; ++iat)
-                    {
-                        int inl = iat * nlmax + nl;
-                        int nm = 2 * inl2l[inl] + 1;
-                        std::vector<double> mmv;
-                        for (int m1 = 0; m1 < nm; ++m1)
-                        {
-                            for (int m2 = 0; m2 < nm; ++m2)
-                            {
-                                mmv.push_back(accessor[i][ibt][inl][m1][m2]);
-                            }
-                        } // nm^2
-                        torch::Tensor mm = torch::tensor(mmv, torch::TensorOptions().dtype(torch::kFloat64))
-                                               .reshape({nm, nm}); // nm*nm
-                        ammv.push_back(mm);
-                    }
-                    torch::Tensor amm = torch::stack(ammv, 0); // nat*nm*nm
-                    xmmv.push_back(amm);
-                }
-                torch::Tensor bmm = torch::stack(xmmv, 0); // 3*nat*nm*nm
-                bmmv.push_back(bmm);
-            }
-            gdmr.push_back(torch::stack(bmmv, 0)); // nbt*3*nat*nm*nm
+            int nm = 2 * inl2l[nl] + 1;
+            torch::Tensor gdmx_sliced
+                = gdmx.slice(2, nl, inlmax, nlmax).slice(3, 0, nm, 1).slice(4, 0, nm, 1).permute({1, 0, 2, 3, 4});
+            gdmr.push_back(gdmx_sliced);
         }
 
         assert(gdmr.size() == nlmax);
diff --git a/source/module_hamilt_lcao/module_deepks/deepks_iterate.cpp b/source/module_hamilt_lcao/module_deepks/deepks_iterate.cpp
@@ -16,36 +16,34 @@ void DeePKS_domain::iterate_ad1(const UnitCell& ucell,
                                                    ModuleBase::Vector3<int> /*dR*/)> callback)
 {
     const double Rcut_Alpha = orb.Alpha[0].getRcut();
-    for (int T0 = 0; T0 < ucell.ntype; T0++)
+    for (int iat = 0; iat < ucell.nat; iat++)
     {
+        const int T0 = ucell.iat2it[iat];
+        const int I0 = ucell.iat2ia[iat];
         Atom* atom0 = &ucell.atoms[T0];
-        for (int I0 = 0; I0 < atom0->na; I0++)
+        const ModuleBase::Vector3<double> tau0 = atom0->tau[I0];
+        GridD.Find_atom(ucell, tau0, T0, I0);
+        for (int ad = 0; ad < GridD.getAdjacentNum() + 1; ++ad)
         {
-            const int iat = ucell.itia2iat(T0, I0);
-            const ModuleBase::Vector3<double> tau0 = atom0->tau[I0];
-            GridD.Find_atom(ucell, tau0, T0, I0);
-            for (int ad = 0; ad < GridD.getAdjacentNum() + 1; ++ad)
+            const int T1 = GridD.getType(ad);
+            const int I1 = GridD.getNatom(ad);
+            const int ibt = ucell.itia2iat(T1, I1);
+            const int start = ucell.itiaiw2iwt(T1, I1, 0);
+
+            const ModuleBase::Vector3<double> tau1 = GridD.getAdjacentTau(ad);
+            const Atom* atom1 = &ucell.atoms[T1];
+            const int nw1_tot = atom1->nw * PARAM.globalv.npol;
+            const double Rcut_AO1 = orb.Phi[T1].getRcut();
+            const double dist1 = (tau1 - tau0).norm() * ucell.lat0;
+
+            if (dist1 > Rcut_Alpha + Rcut_AO1)
             {
-                const int T1 = GridD.getType(ad);
-                const int I1 = GridD.getNatom(ad);
-                const int ibt = ucell.itia2iat(T1, I1); // on which chi_mu is located
-                const int start = ucell.itiaiw2iwt(T1, I1, 0);
-
-                const ModuleBase::Vector3<double> tau1 = GridD.getAdjacentTau(ad);
-                const Atom* atom1 = &ucell.atoms[T1];
-                const int nw1_tot = atom1->nw * PARAM.globalv.npol;
-                const double Rcut_AO1 = orb.Phi[T1].getRcut();
-                const double dist1 = (tau1 - tau0).norm() * ucell.lat0;
-
-                if (dist1 > Rcut_Alpha + Rcut_AO1)
-                {
-                    continue;
-                }
+                continue;
+            }
 
-                ModuleBase::Vector3<int> dR(GridD.getBox(ad).x, GridD.getBox(ad).y, GridD.getBox(ad).z);
+            ModuleBase::Vector3<int> dR(GridD.getBox(ad).x, GridD.getBox(ad).y, GridD.getBox(ad).z);
 
-                callback(iat, tau0, ibt, tau1, start, nw1_tot, dR);
-            }
+            callback(iat, tau0, ibt, tau1, start, nw1_tot, dR);
         }
     }
 }
@@ -174,7 +172,8 @@ void DeePKS_domain::iterate_ad2(const UnitCell& ucell,
 
                 callback(iat, tau0, ibt1, tau1, start1, nw1_tot, dR1, ibt2, tau2, start2, nw2_tot, dR2);
             }
-        });
+        }
+    );
 }
 
 #endif
diff --git a/source/module_hamilt_lcao/module_deepks/deepks_orbpre.cpp b/source/module_hamilt_lcao/module_deepks/deepks_orbpre.cpp
@@ -187,11 +187,12 @@ void DeePKS_domain::cal_orbital_precalc(const std::vector<TH>& dm_hl,
                     for (int ik = 0; ik < dm_hl.size(); ik++)
                     {
                         dm_pair.allocate(&dm_array[ik * row_size * col_size], 0);
-                        
+
                         std::complex<double> kphase = std::complex<double>(1, 0);
                         if (std::is_same<TK, std::complex<double>>::value)
                         {
-                            const double arg = -(kvec_d[ik] * ModuleBase::Vector3<double>(dR1 - dR2)) * ModuleBase::TWO_PI;
+                            const double arg
+                                = -(kvec_d[ik] * ModuleBase::Vector3<double>(dR1 - dR2)) * ModuleBase::TWO_PI;
                             kphase = std::complex<double>(cos(arg), sin(arg));
                         }
                         TK* kphase_ptr = reinterpret_cast<TK*>(&kphase);
@@ -274,33 +275,10 @@ void DeePKS_domain::cal_orbital_precalc(const std::vector<TH>& dm_hl,
     std::vector<torch::Tensor> orbital_pdm_vector;
     for (int nl = 0; nl < nlmax; ++nl)
     {
-        std::vector<torch::Tensor> kammv;
-        for (int iks = 0; iks < nks; ++iks)
-        {
-            std::vector<torch::Tensor> ammv;
-            for (int iat = 0; iat < nat; ++iat)
-            {
-                int inl = iat * nlmax + nl;
-                int nm = 2 * inl2l[inl] + 1;
-                std::vector<double> mmv;
-
-                for (int m1 = 0; m1 < nm; ++m1) // m1 = 1 for s, 3 for p, 5 for d
-                {
-                    for (int m2 = 0; m2 < nm; ++m2) // m1 = 1 for s, 3 for p, 5 for d
-                    {
-                        mmv.push_back(accessor[iks][inl][m1][m2]);
-                    }
-                }
-                torch::Tensor mm
-                    = torch::tensor(mmv, torch::TensorOptions().dtype(torch::kFloat64)).reshape({nm, nm}); // nm*nm
-
-                ammv.push_back(mm);
-            }
-            torch::Tensor amm = torch::stack(ammv, 0);
-            kammv.push_back(amm);
-        }
-        torch::Tensor kamm = torch::stack(kammv, 0);
-        orbital_pdm_vector.push_back(kamm);
+        int nm = 2 * inl2l[nl] + 1;
+        torch::Tensor orbital_pdm_sliced
+            = orbital_pdm.slice(1, nl, inlmax, nlmax).slice(2, 0, nm, 1).slice(3, 0, nm, 1);
+        orbital_pdm_vector.push_back(orbital_pdm_sliced);
     }
 
     assert(orbital_pdm_vector.size() == nlmax);
diff --git a/source/module_hamilt_lcao/module_deepks/deepks_pdm.cpp b/source/module_hamilt_lcao/module_deepks/deepks_pdm.cpp
@@ -96,6 +96,9 @@ void DeePKS_domain::update_dmr(const std::vector<ModuleBase::Vector3<double>>& k
                                hamilt::HContainer<double>* dmr_deepks)
 {
     dmr_deepks->set_zero();
+    // save whether the pair with R has been calculated
+    std::vector<std::tuple<int, int, int, int, int>> calculated_pairs(0);
+
     DeePKS_domain::iterate_ad2(
         ucell,
         GridD,
@@ -134,6 +137,15 @@ void DeePKS_domain::update_dmr(const std::vector<ModuleBase::Vector3<double>>& k
             }
             ModuleBase::Vector3<int> dR(dRx, dRy, dRz);
 
+            // avoid duplicate calculation
+            if (std::find(calculated_pairs.begin(), calculated_pairs.end(),
+                          std::make_tuple(ibt1, ibt2, dR.x, dR.y, dR.z))
+                != calculated_pairs.end())
+            {
+                return;
+            }
+            calculated_pairs.push_back(std::make_tuple(ibt1, ibt2, dR.x, dR.y, dR.z));
+
             dm_pair.find_R(dR);
             hamilt::BaseMatrix<double>* dmr_ptr = dm_pair.find_matrix(dR);
             dmr_ptr->set_zero(); // must reset to zero to avoid accumulation!
@@ -222,7 +234,7 @@ void DeePKS_domain::cal_pdm(bool& init_pdm,
         Atom* atom0 = &ucell.atoms[T0];
         const ModuleBase::Vector3<double> tau0 = atom0->tau[I0];
         AdjacentAtomInfo adjs;
-        GridD.Find_atom(ucell, atom0->tau[I0], T0, I0, &adjs);
+        GridD.Find_atom(ucell, tau0, T0, I0, &adjs);
 
         // trace alpha orbital
         std::vector<int> trace_alpha_row;
diff --git a/source/module_hamilt_lcao/module_deepks/deepks_spre.cpp b/source/module_hamilt_lcao/module_deepks/deepks_spre.cpp
@@ -202,34 +202,13 @@ void DeePKS_domain::cal_gvepsl(const int nat,
     auto accessor = gdmepsl.accessor<double, 4>();
     if (rank == 0)
     {
-        // make gdmx as tensor
+        // make gdmepsl as tensor
         int nlmax = inlmax / nat;
         for (int nl = 0; nl < nlmax; ++nl)
         {
-            std::vector<torch::Tensor> bmmv;
-            for (int i = 0; i < 6; ++i)
-            {
-                std::vector<torch::Tensor> ammv;
-                for (int iat = 0; iat < nat; ++iat)
-                {
-                    int inl = iat * nlmax + nl;
-                    int nm = 2 * inl2l[inl] + 1;
-                    std::vector<double> mmv;
-                    for (int m1 = 0; m1 < nm; ++m1)
-                    {
-                        for (int m2 = 0; m2 < nm; ++m2)
-                        {
-                            mmv.push_back(accessor[i][inl][m1][m2]);
-                        }
-                    } // nm^2
-                    torch::Tensor mm
-                        = torch::tensor(mmv, torch::TensorOptions().dtype(torch::kFloat64)).reshape({nm, nm}); // nm*nm
-                    ammv.push_back(mm);
-                }
-                torch::Tensor bmm = torch::stack(ammv, 0); // nat*nm*nm
-                bmmv.push_back(bmm);
-            }
-            gdmepsl_vector.push_back(torch::stack(bmmv, 0)); // nbt*3*nat*nm*nm
+            int nm = 2 * inl2l[nl] + 1;
+            torch::Tensor gdmepsl_sliced = gdmepsl.slice(1, nl, inlmax, nlmax).slice(2, 0, nm, 1).slice(3, 0, nm, 1);
+            gdmepsl_vector.push_back(gdmepsl_sliced);
         }
         assert(gdmepsl_vector.size() == nlmax);
 
diff --git a/source/module_hamilt_lcao/module_deepks/deepks_vdelta.cpp b/source/module_hamilt_lcao/module_deepks/deepks_vdelta.cpp
@@ -106,10 +106,6 @@ void DeePKS_domain::collect_h_mat(const Parallel_Orbitals& pv,
                     }
                 }
             }
-            else
-            {
-                // do nothing
-            }
 
             Parallel_Reduce::reduce_all(lineH.data(), nlocal - i);
 
@@ -146,12 +142,11 @@ template void DeePKS_domain::cal_e_delta_band<std::complex<double>>(
     const Parallel_Orbitals* pv,
     double& e_delta_band);
 
-template void DeePKS_domain::collect_h_mat<double, ModuleBase::matrix>(
-    const Parallel_Orbitals& pv,
-    const std::vector<std::vector<double>>& h_in,
-    std::vector<ModuleBase::matrix>& h_out,
-    const int nlocal,
-    const int nks);
+template void DeePKS_domain::collect_h_mat<double, ModuleBase::matrix>(const Parallel_Orbitals& pv,
+                                                                       const std::vector<std::vector<double>>& h_in,
+                                                                       std::vector<ModuleBase::matrix>& h_out,
+                                                                       const int nlocal,
+                                                                       const int nks);
 
 template void DeePKS_domain::collect_h_mat<std::complex<double>, ModuleBase::ComplexMatrix>(
     const Parallel_Orbitals& pv,
diff --git a/source/module_hamilt_lcao/module_deepks/deepks_vdpre.cpp b/source/module_hamilt_lcao/module_deepks/deepks_vdpre.cpp

Original file line number	Diff line number	Diff line change
`@@ -64,8 +64,6 @@ void LCAO_Deepks_Interface<TK, TR>::out_deepks_labels(const double& etot,`
`64`	`64`	`{`
`65`	`65`	`// this part is for integrated test of deepks`
`66`	`66`	`// so it is printed no matter even if deepks_out_labels is not used`
`67`		`- DeePKS_domain::update_dmr(kvec_d, dm->get_DMK_vector(), ucell, orb, *ParaV, GridD, dmr);`
`68`		`-`
`69`	`67`	`DeePKS_domain::cal_pdm<`
`70`	`68`	`TK>(init_pdm, inlmax, lmaxd, inl2l, inl_index, kvec_d, dmr, phialpha, ucell, orb, GridD, *ParaV, pdm);`
`71`	`69`