Add HR precalc functions for DeePKS and fix some bugs.

ErjieWu · ErjieWu · commit bce3cf2595a6 · 2025-03-26T16:42:45.000+08:00
diff --git a/source/Makefile.Objects b/source/Makefile.Objects
@@ -208,6 +208,7 @@ OBJS_DEEPKS=LCAO_deepks.o\
         deepks_orbpre.o\
         deepks_vdelta.o\
         deepks_vdpre.o\
+        deepks_vdrpre.o\
         deepks_hmat.o\
         deepks_pdm.o\
         deepks_phialpha.o\
diff --git a/source/module_hamilt_lcao/module_deepks/CMakeLists.txt b/source/module_hamilt_lcao/module_deepks/CMakeLists.txt
@@ -11,6 +11,7 @@ if(ENABLE_DEEPKS)
       deepks_orbpre.cpp
       deepks_vdelta.cpp
       deepks_vdpre.cpp
+      deepks_vdrpre.cpp
       deepks_hmat.cpp
       deepks_pdm.cpp
       deepks_phialpha.cpp
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h b/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h
@@ -15,6 +15,7 @@
 #include "deepks_spre.h"
 #include "deepks_vdelta.h"
 #include "deepks_vdpre.h"
+#include "deepks_vdrpre.h"
 #include "module_base/complexmatrix.h"
 #include "module_base/intarray.h"
 #include "module_base/matrix.h"
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp
@@ -314,34 +314,18 @@ void LCAO_Deepks_Interface<TK, TR>::out_deepks_labels(const double& etot,
                     ofs_hr.close();
                 }
 
-                const std::string file_vdrpre = PARAM.globalv.global_out_dir + "deepks_vdrpre.csr";
-                std::vector<hamilt::HContainer<TR>*> h_deltaR_pre(inlmax);
-                for (int i = 0; i < inlmax; i++)
-                {
-                    h_deltaR_pre[i] = new hamilt::HContainer<TR>(*hR_tot);
-                    h_deltaR_pre[i]->set_zero();
-                }
-                // DeePKS_domain::cal_vdr_precalc<TR>();
-                if (rank == 0)
-                {
-                    std::ofstream ofs_hrp(file_vdrpre, std::ios::out);
-                    for (int iat = 0; iat < nat; iat++)
-                    {
-                        ofs_hrp << "- Index of atom: " << iat << std::endl;
-                        for (int nl = 0; nl < nlmax; nl++)
-                        {
-                            int inl = iat * nlmax + nl;
-                            ofs_hrp << "-- Index of nl: " << nl << std::endl;
-                            ofs_hrp << "Matrix Dimension of H_delta(R): " << h_deltaR_pre[inl]->get_nbasis() << std::endl;
-                            ofs_hrp << "Matrix number of H_delta(R): " << h_deltaR_pre[inl]->size_R_loop() << std::endl;
-                            hamilt::Output_HContainer<TR> out_hrp(h_deltaR_pre[inl], ofs_hrp, sparse_threshold, precision);
-                            out_hrp.write();
-                            ofs_hrp << std::endl;
-                        }
-                        ofs_hrp << std::endl;
-                    }
-                    ofs_hrp.close();
-                }
+                torch::Tensor phialpha_r_out;
+                torch::Tensor R_query;
+                DeePKS_domain::prepare_phialpha_r(nlocal, lmaxd, inlmax, nat, phialpha, ucell, orb, *ParaV, GridD, phialpha_r_out, R_query);
+                const std::string file_phialpha_r = PARAM.globalv.global_out_dir + "deepks_phialpha_r.npy";
+                const std::string file_R_query = PARAM.globalv.global_out_dir + "deepks_R_query.npy";
+                LCAO_deepks_io::save_tensor2npy<double>(file_phialpha_r, phialpha_r_out, rank);
+                LCAO_deepks_io::save_tensor2npy<int>(file_R_query, R_query, rank);
+
+                torch::Tensor gevdm_out;
+                DeePKS_domain::prepare_gevdm(nat, lmaxd, inlmax, orb, gevdm, gevdm_out);
+                const std::string file_gevdm = PARAM.globalv.global_out_dir + "deepks_gevdm.npy";
+                LCAO_deepks_io::save_tensor2npy<double>(file_gevdm, gevdm_out, rank);
             }
         }
 
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_io.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_io.cpp
@@ -275,18 +275,18 @@ void LCAO_deepks_io::save_tensor2npy(const std::string& file_name, const torch::
 
     std::vector<T> data(tensor.numel());
 
-    if constexpr (std::is_same<T, double>::value)
-    {
-        std::memcpy(data.data(), tensor.data_ptr<double>(), tensor.numel() * sizeof(double));
-    }
-    else
+    if constexpr (std::is_same<T, std::complex<double>>::value)
     {
         auto tensor_data = tensor.data_ptr<c10::complex<double>>();
         for (size_t i = 0; i < tensor.numel(); ++i)
         {
             data[i] = std::complex<double>(tensor_data[i].real(), tensor_data[i].imag());
         }
     }
+    else
+    {
+        std::memcpy(data.data(), tensor.data_ptr<T>(), tensor.numel() * sizeof(T));
+    }
 
     npy::SaveArrayAsNumpy(file_name, false, shape.size(), shape.data(), data);
 }
@@ -313,6 +313,10 @@ template void LCAO_deepks_io::save_npy_h<std::complex<double>>(const std::vector
                                                                const int nks,
                                                                const int rank);
 
+template void LCAO_deepks_io::save_tensor2npy<int>(const std::string& file_name,
+                                                   const torch::Tensor& tensor,
+                                                   const int rank);
+
 template void LCAO_deepks_io::save_tensor2npy<double>(const std::string& file_name,
                                                       const torch::Tensor& tensor,
                                                       const int rank);
diff --git a/source/module_hamilt_lcao/module_deepks/deepks_vdpre.cpp b/source/module_hamilt_lcao/module_deepks/deepks_vdpre.cpp
@@ -220,8 +220,8 @@ void DeePKS_domain::cal_v_delta_precalc(const int nlocal,
     std::vector<torch::Tensor> v_delta_precalc_vector;
     for (int nl = 0; nl < nlmax; ++nl)
     {
-        torch::Tensor gevdm_complex = gevdm[nl].to(dtype);
-        v_delta_precalc_vector.push_back(at::einsum("kxyamn, avmn->kxyav", {v_delta_pdm_vector[nl], gevdm[nl]}));
+        torch::Tensor gevdm_totype = gevdm[nl].to(dtype);
+        v_delta_precalc_vector.push_back(at::einsum("kxyamn, avmn->kxyav", {v_delta_pdm_vector[nl], gevdm_totype}));
     }
 
     v_delta_precalc = torch::cat(v_delta_precalc_vector, -1);
@@ -296,6 +296,8 @@ void DeePKS_domain::prepare_phialpha(const int nlocal,
     int nlmax = inlmax / nat;
     int mmax = 2 * lmaxd + 1;
     phialpha_out = torch::zeros({nat, nlmax, nks, nlocal, mmax}, dtype);
+    auto accessor
+        = phialpha_out.accessor<std::conditional_t<std::is_same<TK, double>::value, double, c10::complex<double>>, 5>();
 
     DeePKS_domain::iterate_ad1(
         ucell,
@@ -348,13 +350,13 @@ void DeePKS_domain::prepare_phialpha(const int nlocal,
                             {
                                 if constexpr (std::is_same<TK, double>::value)
                                 {
-                                    phialpha_out[iat][nl][ik][iw1_all][m1] = overlap->get_value(iw1, ib + m1);
+                                    accessor[iat][nl][ik][iw1_all][m1] = overlap->get_value(iw1, ib + m1);
                                 }
                                 else
                                 {
                                     c10::complex<double> tmp;
                                     tmp = overlap->get_value(iw1, ib + m1) * kphase;
-                                    phialpha_out.index_put_({iat, nl, ik, iw1_all, m1}, tmp);
+                                    accessor[iat][nl][ik][iw1_all][m1] += tmp;
                                 }
                             }
                             ib += nm;
diff --git a/source/module_hamilt_lcao/module_deepks/deepks_vdrpre.cpp b/source/module_hamilt_lcao/module_deepks/deepks_vdrpre.cpp
@@ -0,0 +1,104 @@
+//  prepare_phialpha_r : prepare phialpha_r for outputting npy file
+
+#ifdef __DEEPKS
+
+#include "deepks_vdrpre.h"
+
+#include "LCAO_deepks_io.h" // mohan add 2024-07-22
+#include "deepks_iterate.h"
+#include "module_base/blas_connector.h"
+#include "module_base/constants.h"
+#include "module_base/libm/libm.h"
+#include "module_base/parallel_reduce.h"
+#include "module_hamilt_lcao/module_hcontainer/atom_pair.h"
+#include "module_parameter/parameter.h"
+
+void DeePKS_domain::prepare_phialpha_r(const int nlocal,
+                                     const int lmaxd,
+                                     const int inlmax,
+                                     const int nat,
+                                     const std::vector<hamilt::HContainer<double>*> phialpha,
+                                     const UnitCell& ucell,
+                                     const LCAO_Orbitals& orb,
+                                     const Parallel_Orbitals& pv,
+                                     const Grid_Driver& GridD,
+                                     torch::Tensor& phialpha_r_out,
+                                     torch::Tensor& R_query)
+{
+    ModuleBase::TITLE("DeePKS_domain", "prepare_phialpha_r");
+    ModuleBase::timer::tick("DeePKS_domain", "prepare_phialpha_r");
+    constexpr torch::Dtype dtype = torch::kFloat64;
+    int nlmax = inlmax / nat;
+    int mmax = 2 * lmaxd + 1;
+    auto size_R = static_cast<long>(phialpha[0]->size_R_loop());
+    phialpha_r_out = torch::zeros({size_R, nat, nlmax, nlocal, mmax}, dtype);
+    R_query = torch::zeros({size_R, 3}, torch::kInt32);
+    auto accessor = phialpha_r_out.accessor<double, 5>();
+    auto R_accessor = R_query.accessor<int, 2>();
+
+    for (int iR = 0; iR < size_R; ++iR)
+    {
+        phialpha[0]->loop_R(iR, R_accessor[iR][0], R_accessor[iR][1], R_accessor[iR][2]);
+    }
+
+    DeePKS_domain::iterate_ad1(
+        ucell,
+        GridD,
+        orb,
+        false, // no trace_alpha
+        [&](const int iat,
+            const ModuleBase::Vector3<double>& tau0,
+            const int ibt,
+            const ModuleBase::Vector3<double>& tau,
+            const int start,
+            const int nw_tot,
+            ModuleBase::Vector3<int> dR)
+        {
+            if (phialpha[0]->find_matrix(iat, ibt, dR.x, dR.y, dR.z) == nullptr)
+            {
+                return; // to next loop
+            }
+
+            // middle loop : all atomic basis on the adjacent atom ad
+            for (int iw1 = 0; iw1 < nw_tot; ++iw1)
+            {
+                const int iw1_all = start + iw1;
+                const int iw1_local = pv.global2local_row(iw1_all);
+                const int iw2_local = pv.global2local_col(iw1_all);
+                if (iw1_local < 0 || iw2_local < 0)
+                {
+                    continue;
+                }
+                hamilt::BaseMatrix<double>* overlap = phialpha[0]->find_matrix(iat, ibt, dR);
+                const int iR = phialpha[0]->find_R(dR);
+
+                int ib = 0;
+                int nl = 0;
+                for (int L0 = 0; L0 <= orb.Alpha[0].getLmax(); ++L0)
+                {
+                    for (int N0 = 0; N0 < orb.Alpha[0].getNchi(L0); ++N0)
+                    {
+                        const int nm = 2 * L0 + 1;
+                        for (int m1 = 0; m1 < nm; ++m1) // nm = 1 for s, 3 for p, 5 for d
+                        {
+                            accessor[iR][iat][nl][iw1_all][m1] += overlap->get_value(iw1, ib + m1);
+                        }
+                        ib += nm;
+                        nl++;
+                    }
+                }
+            }     // end iw
+        }
+    );
+
+#ifdef __MPI
+    int size = size_R * nat * nlmax * nlocal * mmax;
+    double* data_ptr = phialpha_r_out.data_ptr<double>();
+    Parallel_Reduce::reduce_all(data_ptr, size);
+
+#endif
+
+    ModuleBase::timer::tick("DeePKS_domain", "prepare_phialpha_r");
+    return;
+}
+#endif
diff --git a/source/module_hamilt_lcao/module_deepks/deepks_vdrpre.h b/source/module_hamilt_lcao/module_deepks/deepks_vdrpre.h
@@ -0,0 +1,45 @@
+#ifndef DEEPKS_VDRPRE_H
+#define DEEPKS_VDRPRE_H
+
+#ifdef __DEEPKS
+
+#include "module_base/complexmatrix.h"
+#include "module_base/intarray.h"
+#include "module_base/matrix.h"
+#include "module_base/timer.h"
+#include "module_basis/module_ao/parallel_orbitals.h"
+#include "module_basis/module_nao/two_center_integrator.h"
+#include "module_cell/module_neighbor/sltk_grid_driver.h"
+#include "module_hamilt_lcao/module_hcontainer/hcontainer.h"
+
+#include <torch/script.h>
+#include <torch/torch.h>
+
+namespace DeePKS_domain
+{
+//------------------------
+// deepks_vdrpre.cpp
+//------------------------
+
+// This file contains 1 subroutine for calculating v_delta,
+// cal_vdr_precalc : v_delta_r_precalc is used for training with v_delta_r label,
+//                         which equals gevdm * v_delta_pdm,
+//                         v_delta_pdm = overlap * overlap
+
+// for deepks_v_delta = -1
+// calculates v_delta_r_precalc
+void prepare_phialpha_r(const int nlocal,
+                        const int lmaxd,
+                        const int inlmax,
+                        const int nat,
+                        const std::vector<hamilt::HContainer<double>*> phialpha,
+                        const UnitCell& ucell,
+                        const LCAO_Orbitals& orb,
+                        const Parallel_Orbitals& pv,
+                        const Grid_Driver& GridD,
+                        torch::Tensor& phialpha_r_out,
+                        torch::Tensor& R_query);
+
+} // namespace DeePKS_domain
+#endif
+#endif

Original file line number	Diff line number	Diff line change
`@@ -220,8 +220,8 @@ void DeePKS_domain::cal_v_delta_precalc(const int nlocal,`
`220`	`220`	`std::vector<torch::Tensor> v_delta_precalc_vector;`
`221`	`221`	`for (int nl = 0; nl < nlmax; ++nl)`
`222`	`222`	`{`
`223`		`- torch::Tensor gevdm_complex = gevdm[nl].to(dtype);`
`224`		`- v_delta_precalc_vector.push_back(at::einsum("kxyamn, avmn->kxyav", {v_delta_pdm_vector[nl], gevdm[nl]}));`
	`223`	`+ torch::Tensor gevdm_totype = gevdm[nl].to(dtype);`
	`224`	`+ v_delta_precalc_vector.push_back(at::einsum("kxyamn, avmn->kxyav", {v_delta_pdm_vector[nl], gevdm_totype}));`
`225`	`225`	`}`
`226`	`226`
`227`	`227`	`v_delta_precalc = torch::cat(v_delta_precalc_vector, -1);`
`@@ -296,6 +296,8 @@ void DeePKS_domain::prepare_phialpha(const int nlocal,`
`296`	`296`	`int nlmax = inlmax / nat;`
`297`	`297`	`int mmax = 2 * lmaxd + 1;`
`298`	`298`	`phialpha_out = torch::zeros({nat, nlmax, nks, nlocal, mmax}, dtype);`
	`299`	`+ auto accessor`
	`300`	`+ = phialpha_out.accessor<std::conditional_t<std::is_same<TK, double>::value, double, c10::complex<double>>, 5>();`
`299`	`301`
`300`	`302`	`DeePKS_domain::iterate_ad1(`
`301`	`303`	`ucell,`
`@@ -348,13 +350,13 @@ void DeePKS_domain::prepare_phialpha(const int nlocal,`
`348`	`350`	`{`
`349`	`351`	`if constexpr (std::is_same<TK, double>::value)`
`350`	`352`	`{`
`351`		`- phialpha_out[iat][nl][ik][iw1_all][m1] = overlap->get_value(iw1, ib + m1);`
	`353`	`+ accessor[iat][nl][ik][iw1_all][m1] = overlap->get_value(iw1, ib + m1);`
`352`	`354`	`}`
`353`	`355`	`else`
`354`	`356`	`{`
`355`	`357`	`c10::complex<double> tmp;`
`356`	`358`	`tmp = overlap->get_value(iw1, ib + m1) * kphase;`
`357`		`- phialpha_out.index_put_({iat, nl, ik, iw1_all, m1}, tmp);`
	`359`	`+ accessor[iat][nl][ik][iw1_all][m1] += tmp;`
`358`	`360`	`}`
`359`	`361`	`}`
`360`	`362`	`ib += nm;`