diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp
index e71dc14b72..41b26e7e9f 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp
@@ -237,18 +237,21 @@ void Force_LCAO<double>::ftable(const bool isforce,
 
         GlobalC::ld.cal_gedm(ucell.nat);
 
-        DeePKS_domain::cal_f_delta_gamma(dm_gamma,
-                                         ucell,
-                                         orb,
-                                         gd,
+        const int nks=1;
+		DeePKS_domain::cal_f_delta_gamma(dm_gamma, 
+				                         ucell, 
+				                         orb, 
+				                         gd, 
                                          *this->ParaV,
                                          GlobalC::ld.lmaxd,
+                                         nks, 
+				                         kv->kvec_d, 
                                          GlobalC::ld.nlm_save,
                                          GlobalC::ld.gedm,
                                          GlobalC::ld.inl_index,
                                          GlobalC::ld.F_delta,
-                                         isstress,
-                                         svnl_dalpha);
+				                         isstress, 
+				                         svnl_dalpha);
 
 #ifdef __MPI
         Parallel_Reduce::reduce_all(GlobalC::ld.F_delta.c, GlobalC::ld.F_delta.nr * GlobalC::ld.F_delta.nc);
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp
index 18e19d071e..3d8f8260fb 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp
@@ -158,7 +158,7 @@ void DeePKS<OperatorLCAO<double, double>>::contributeHR()
     {
         ModuleBase::timer::tick("DeePKS", "contributeHR");
         const Parallel_Orbitals* pv = this->hsk->get_pv();
-        GlobalC::ld.cal_projected_DM(this->DM, *this->ucell, *ptr_orb_, *(this->gd));
+        GlobalC::ld.cal_projected_DM<double>(this->DM, *this->ucell, *ptr_orb_, *(this->gd));
         GlobalC::ld.cal_descriptor(this->ucell->nat);
         GlobalC::ld.cal_gedm(this->ucell->nat);
         // recalculate the H_V_delta
@@ -186,7 +186,7 @@ void DeePKS<OperatorLCAO<std::complex<double>, double>>::contributeHR()
     {
         ModuleBase::timer::tick("DeePKS", "contributeHR");
 
-        GlobalC::ld.cal_projected_DM(this->DM, *this->ucell, *ptr_orb_, *this->gd);
+        GlobalC::ld.cal_projected_DM<std::complex<double>>(this->DM, *this->ucell, *ptr_orb_, *this->gd);
         GlobalC::ld.cal_descriptor(this->ucell->nat);
         // calculate dE/dD
         GlobalC::ld.cal_gedm(this->ucell->nat);
@@ -219,7 +219,7 @@ void DeePKS<OperatorLCAO<std::complex<double>, std::complex<double>>>::contribut
     {
         ModuleBase::timer::tick("DeePKS", "contributeHR");
 
-        GlobalC::ld.cal_projected_DM(this->DM, *this->ucell, *ptr_orb_, *this->gd);
+        GlobalC::ld.cal_projected_DM<std::complex<double>>(this->DM, *this->ucell, *ptr_orb_, *this->gd);
         GlobalC::ld.cal_descriptor(this->ucell->nat);
         // calculate dE/dD
         GlobalC::ld.cal_gedm(this->ucell->nat);
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks.cpp
index 1676838282..2e34264ab7 100644
--- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks.cpp
+++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks.cpp
@@ -512,14 +512,13 @@ void LCAO_Deepks::del_v_delta_pdm_shell(const int nks,const int nlocal)
     return;
 }
 
-void LCAO_Deepks::dpks_cal_e_delta_band(const std::vector<std::vector<double>>& dm, const int nks)
+template <typename TK>
+void LCAO_Deepks::dpks_cal_e_delta_band(const std::vector<std::vector<TK>>& dm, const int nks)
 {
     this->cal_e_delta_band(dm, nks);
 }
 
-void LCAO_Deepks::dpks_cal_e_delta_band(const std::vector<std::vector<std::complex<double>>>& dm, const int nks)
-{
-    this->cal_e_delta_band(dm, nks);
-}
+template void LCAO_Deepks::dpks_cal_e_delta_band<double>(const std::vector<std::vector<double>>& dm, const int nks);
+template void LCAO_Deepks::dpks_cal_e_delta_band<std::complex<double>>(const std::vector<std::vector<std::complex<double>>>& dm, const int nks);
 
 #endif
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h b/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h
index 179938f4ef..89276b7835 100644
--- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h
+++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h
@@ -284,11 +284,10 @@ class LCAO_Deepks
 
     // There are 6 subroutines in this file:
     // 1. cal_projected_DM, which is used for calculating pdm for gamma point calculation
-    // 2. cal_projected_DM_k, counterpart of 1, for multi-k
-    // 3. check_projected_dm, which prints pdm to descriptor.dat
+    // 2. check_projected_dm, which prints pdm to descriptor.dat
 
-    // 4. cal_gdmx, calculating gdmx (and optionally gdm_epsl for stress) for gamma point
-    // 5. check_gdmx, which prints gdmx to a series of .dat files
+    // 3. cal_gdmx, calculating gdmx (and optionally gdm_epsl for stress) for gamma point
+    // 4. check_gdmx, which prints gdmx to a series of .dat files
 
   public:
     /** 
@@ -299,28 +298,14 @@ class LCAO_Deepks
      *    2. SCF calculation of DeePKS with init_chg = file and pdm has been read for restarting SCF
      *    3. Relax/Cell-Relax/MD calculation, non-first step will use the convergence pdm from the last step as initial pdm
      */
-    void cal_projected_DM(const elecstate::DensityMatrix<double, double>* dm,
-                          const UnitCell& ucell,
-                          const LCAO_Orbitals& orb,
-                          const Grid_Driver& GridD);
-
-    void cal_projected_DM(const elecstate::DensityMatrix<std::complex<double>, double>* dm,
+    template <typename TK>
+    void cal_projected_DM(const elecstate::DensityMatrix<TK, double>* dm,
                           const UnitCell& ucell,
                           const LCAO_Orbitals& orb,
                           const Grid_Driver& GridD);
 
     void check_projected_dm();
 
-    void cal_projected_DM_equiv(const elecstate::DensityMatrix<double, double>* dm,
-                                const UnitCell& ucell,
-                                const LCAO_Orbitals& orb,
-                                const Grid_Driver& GridD);
-
-    void cal_projected_DM_k_equiv(const elecstate::DensityMatrix<std::complex<double>, double>* dm,
-                                  const UnitCell& ucell,
-                                  const LCAO_Orbitals& orb,
-                                  const Grid_Driver& GridD);
-
     // calculate the gradient of pdm with regard to atomic positions
     // d/dX D_{Inl,mm'}
     template <typename TK>
@@ -358,21 +343,18 @@ class LCAO_Deepks
     // tr (rho * V_delta)
 
     // Four subroutines are contained in the file:
-    // 5. cal_e_delta_band : calculates e_delta_bands for gamma only
-    // 6. cal_e_delta_band_k : counterpart of 4, for multi-k
+    // 5. cal_e_delta_band : calculates e_delta_bands
 
   public:
     /// calculate tr(\rho V_delta)
     // void cal_e_delta_band(const std::vector<ModuleBase::matrix>& dm/**<[in] density matrix*/);
-    void cal_e_delta_band(const std::vector<std::vector<double>>& dm /**<[in] density matrix*/, const int /*nks*/);
-    // void cal_e_delta_band_k(const std::vector<ModuleBase::ComplexMatrix>& dm/**<[in] density matrix*/,
-    //     const int nks);
-    void cal_e_delta_band(const std::vector<std::vector<std::complex<double>>>& dm /**<[in] density matrix*/,
-                            const int nks);
+    template <typename TK>
+    void cal_e_delta_band(const std::vector<std::vector<TK>>& dm /**<[in] density matrix*/, const int nks);
 
     //! a temporary interface for cal_e_delta_band and cal_e_delta_band_k
-    void dpks_cal_e_delta_band(const std::vector<std::vector<double>>& dm, const int nks);
-    void dpks_cal_e_delta_band(const std::vector<std::vector<std::complex<double>>>& dm, const int nks);
+    template <typename TK>
+    void dpks_cal_e_delta_band(const std::vector<std::vector<TK>>& dm, const int nks);
+    
 
     //-------------------
     // LCAO_deepks_odelta.cpp
@@ -381,17 +363,11 @@ class LCAO_Deepks
     // This file contains subroutines for calculating O_delta,
     // which corresponds to the correction of the band gap.
 
-    // There are two subroutines in this file:
-    // 1. cal_o_delta, which is used for gamma point calculation
-    // 2. cal_o_delta_k, which is used for multi-k calculation
-
   public:
-    void cal_o_delta(const std::vector<std::vector<ModuleBase::matrix>>&
+    template <typename TK, typename TH>
+    void cal_o_delta(const std::vector<std::vector<TH>>&
                          dm_hl /**<[in] modified density matrix that contains HOMO and LUMO only*/,
                      const int nks);
-    void cal_o_delta(const std::vector<std::vector<ModuleBase::ComplexMatrix>>&
-                           dm_hl /**<[in] modified density matrix that contains HOMO and LUMO only*/,
-                       const int nks);
 
     //-------------------
     // LCAO_deepks_torch.cpp
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp
index edb255de93..7639463f72 100644
--- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp
+++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp
@@ -117,7 +117,7 @@ void LCAO_Deepks_Interface<TK, TR>::out_deepks_labels(const double& etot,
                 }
                 
                 ld->cal_orbital_precalc<TK,TH>(dm_bandgap, nat, nks, kvec_d, ucell, orb, GridD);
-                ld->cal_o_delta(dm_bandgap, nks);
+                ld->cal_o_delta<TK,TH>(dm_bandgap, nks);
 
                 // save obase and orbital_precalc
                 LCAO_deepks_io::save_npy_orbital_precalc(nat, 
@@ -249,7 +249,7 @@ void LCAO_Deepks_Interface<TK, TR>::out_deepks_labels(const double& etot,
         // when deepks_scf is on, the init pdm should be same as the out pdm, so we should not recalculate the pdm
 		if(!PARAM.inp.deepks_scf) 
 		{
-			ld->cal_projected_DM(dm, ucell, orb, GridD);
+			ld->cal_projected_DM<TK>(dm, ucell, orb, GridD);
 		}
 
         ld->check_projected_dm(); // print out the projected dm for NSCF calculaiton
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp
index eae3c12220..016e6317b8 100644
--- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp
+++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp
@@ -4,63 +4,29 @@
 //which is defind as sum_mu,nu rho^{hl}_mu,nu <chi_mu|alpha>V(D)<alpha|chi_nu>
 //where rho^{hl}_mu,nu = C_{L\mu}C_{L\nu} - C_{H\mu}C_{H\nu}, L for LUMO, H for HOMO
 
-//There are two subroutines in this file:
-//1. cal_o_delta, which is used for gamma point calculation
-//2. cal_o_delta_k, which is used for multi-k calculation
-
 #ifdef __DEEPKS
 
 #include "LCAO_deepks.h"
 #include "module_base/parallel_reduce.h"
 
-void LCAO_Deepks::cal_o_delta(const std::vector<std::vector<ModuleBase::matrix>>& dm_hl, const int nks)
+template <typename TK, typename TH>
+void LCAO_Deepks::cal_o_delta(const std::vector<std::vector<TH>>& dm_hl, const int nks)
 {
     ModuleBase::TITLE("LCAO_Deepks", "cal_o_delta");
-    this->o_delta.zero_out();
-    for (int hl = 0; hl < 1; ++hl)
-    {
-        for (int i = 0; i < PARAM.globalv.nlocal; ++i)
-        {
-            for (int j = 0; j < PARAM.globalv.nlocal; ++j)
-            {
-                const int mu = pv->global2local_row(j);
-                const int nu = pv->global2local_col(i);
-            
-                if (mu >= 0 && nu >= 0)
-                {                
-                    const int index = nu * pv->nrow + mu;
-                    for (int is = 0; is < PARAM.inp.nspin; ++is)
-                    {
-                        this->o_delta(0,hl) += dm_hl[hl][is](nu, mu) * this->H_V_delta[0][index];
-                    }
-                }
-            }
-        }
-        Parallel_Reduce::reduce_all(this->o_delta(0, hl));
-    }
-    return;
-}
 
-
-//calculating the correction of (LUMO-HOMO) energies, i.e., band gap corrections
-//for multi_k calculations
-void LCAO_Deepks::cal_o_delta(const std::vector<std::vector<ModuleBase::ComplexMatrix>>& dm_hl,
-    const int nks)
-{
-    ModuleBase::TITLE("LCAO_Deepks", "cal_o_delta_k");
-    
-    for(int ik=0; ik<nks; ik++)
+    this->o_delta.zero_out();
+    for (int ik = 0; ik < nks; ik++)
     {
-        for (int hl=0; hl<1; hl++)
+        for (int hl = 0; hl < 1; ++hl)
         {
-            std::complex<double> o_delta_k=std::complex<double>(0.0,0.0);
+            TK o_delta_tmp = TK(0.0);
             for (int i = 0; i < PARAM.globalv.nlocal; ++i)
             {
                 for (int j = 0; j < PARAM.globalv.nlocal; ++j)
                 {
                     const int mu = pv->global2local_row(j);
                     const int nu = pv->global2local_col(i);
-            
+                
                     if (mu >= 0 && nu >= 0)
                     {                
                         int iic;
@@ -72,16 +38,40 @@ void LCAO_Deepks::cal_o_delta(const std::vector<std::vector<ModuleBase::ComplexM
                         {
                             iic = mu * pv->ncol + nu;
                         }
-                        o_delta_k += dm_hl[hl][ik](nu, mu) * this->H_V_delta_k[ik][iic];
+                        if constexpr (std::is_same<TK, double>::value)
+                        {
+                            for (int is = 0; is < PARAM.inp.nspin; ++is)
+                            {
+                                o_delta_tmp += dm_hl[hl][is](nu, mu) * this->H_V_delta[0][iic];
+                            }
+                        }
+                        else
+                        {
+                            o_delta_tmp += dm_hl[hl][ik](nu, mu) * this->H_V_delta_k[ik][iic];
+                        }
                     }
-                } //end j
-            } //end i
-            Parallel_Reduce::reduce_all(o_delta_k);
-            this->o_delta(ik,hl) = o_delta_k.real();
-        }// end hl
-    }// end nks
-    
+                }
+            }
+            Parallel_Reduce::reduce_all(o_delta_tmp);
+            if constexpr (std::is_same<TK, double>::value)
+            {
+                this->o_delta(ik,hl) = o_delta_tmp;
+            }
+            else
+            {
+                this->o_delta(ik,hl) = o_delta_tmp.real();
+            }
+        }
+    }
     return;
 }
 
+template void LCAO_Deepks::cal_o_delta<double, ModuleBase::matrix>(
+    const std::vector<std::vector<ModuleBase::matrix>>& dm_hl, 
+    const int nks);
+
+template void LCAO_Deepks::cal_o_delta<std::complex<double>, ModuleBase::ComplexMatrix>(
+    const std::vector<std::vector<ModuleBase::ComplexMatrix>>& dm_hl, 
+    const int nks);
+
 #endif
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_pdm.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_pdm.cpp
index 8c098ffa2e..62dfc76aef 100644
--- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_pdm.cpp
+++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_pdm.cpp
@@ -8,14 +8,11 @@
 //It also contains subroutines for printing pdm and gdmx
 //for checking purpose
 
-//There are 6 subroutines in this file:
-//1. cal_projected_DM, which is used for calculating pdm for gamma point calculation
-//2. cal_projected_DM_k, counterpart of 1, for multi-k
+//There are 3 subroutines in this file:
+//1. read_projected_DM, which reads pdm from file
+//2. cal_projected_DM, which is used for calculating pdm 
 //3. check_projected_dm, which prints pdm to descriptor.dat
 
-//4. cal_gdmx, calculating gdmx (and optionally gdm_epsl for stress) for gamma point
-//5. check_gdmx, which prints gdmx to a series of .dat files
-
 #ifdef __DEEPKS
 
 #include "LCAO_deepks.h"
@@ -49,7 +46,7 @@ void LCAO_Deepks::read_projected_DM(bool read_pdm_file, bool is_equiv, const Num
 
         if (!ifs)
         {
-            ModuleBase::WARNING_QUIT("LCAO_Deepks::cal_projected_DM", "Cannot find the file deepks_projdm.dat");
+            ModuleBase::WARNING_QUIT("LCAO_Deepks::read_projected_DM", "Cannot find the file deepks_projdm.dat");
         }
         for(int inl=0;inl<this->inlmax;inl++)
         {
@@ -66,10 +63,13 @@ void LCAO_Deepks::read_projected_DM(bool read_pdm_file, bool is_equiv, const Num
 
 //this subroutine performs the calculation of projected density matrices
 //pdm_m,m'=\sum_{mu,nu} rho_{mu,nu} <chi_mu|alpha_m><alpha_m'|chi_nu>
-void LCAO_Deepks::cal_projected_DM(const elecstate::DensityMatrix<double, double>* dm,
-                                   const UnitCell& ucell,
-                                   const LCAO_Orbitals& orb,
-                                   const Grid_Driver& GridD)
+template <typename TK>
+void LCAO_Deepks::cal_projected_DM(
+    const elecstate::DensityMatrix<TK, double>* dm, 
+    const UnitCell &ucell,
+    const LCAO_Orbitals &orb,
+    const Grid_Driver& GridD)
+
 {
     ModuleBase::TITLE("LCAO_Deepks", "cal_projected_DM");
 
@@ -106,7 +106,6 @@ void LCAO_Deepks::cal_projected_DM(const elecstate::DensityMatrix<double, double
     }
 
     const double Rcut_Alpha = orb.Alpha[0].getRcut();
-    int nrow = this->pv->nrow;
     for (int T0 = 0; T0 < ucell.ntype; T0++)
     {
 		Atom* atom0 = &ucell.atoms[T0]; 
@@ -174,18 +173,38 @@ void LCAO_Deepks::cal_projected_DM(const elecstate::DensityMatrix<double, double
                     continue;
                 }
 
+                ModuleBase::Vector3<double> dR1(GridD.getBox(ad1).x, GridD.getBox(ad1).y, GridD.getBox(ad1).z); 
+                key_tuple key_1(ibt1,dR1.x,dR1.y,dR1.z);
+                if constexpr (std::is_same<TK, std::complex<double>>::value)
+                {
+                    if(this->nlm_save_k[iat].find(key_1) == this->nlm_save_k[iat].end()) 
+                    { 
+                        continue;
+                    }
+                }
+
                 auto row_indexes = pv->get_indexes_row(ibt1);
                 const int row_size = row_indexes.size();
-                if(row_size == 0) { continue;
-}
+                if(row_size == 0) 
+                { 
+                    continue;
+                }
 
                 // no possible to unexist key
                 std::vector<double> s_1t(trace_alpha_size * row_size);
                 std::vector<double> g_1dmt(trace_alpha_size * row_size, 0.0);
                 for(int irow=0;irow<row_size;irow++)
                 {
-                    const double* row_ptr = this->nlm_save[iat][ad1][row_indexes[irow]][0].data();
-                    
+                    const double* row_ptr;
+                    if constexpr (std::is_same<TK, double>::value)
+                    {
+                        row_ptr = this->nlm_save[iat][ad1][row_indexes[irow]][0].data();
+                    }
+                    else
+                    {
+                        row_ptr = this->nlm_save_k[iat][key_1][row_indexes[irow]][0].data();
+                    }
+
                     for(int i=0;i<trace_alpha_size;i++)
                     {
                         s_1t[i * row_size + irow] = row_ptr[trace_alpha_row[i]];
@@ -201,6 +220,16 @@ void LCAO_Deepks::cal_projected_DM(const elecstate::DensityMatrix<double, double
 					const Atom* atom2 = &ucell.atoms[T2];
 					const int nw2_tot = atom2->nw*PARAM.globalv.npol;
 					
+                    ModuleBase::Vector3<double> dR2(GridD.getBox(ad2).x, GridD.getBox(ad2).y, GridD.getBox(ad2).z);
+					key_tuple key_2(ibt2,dR2.x,dR2.y,dR2.z);
+                    if constexpr (std::is_same<TK, std::complex<double>>::value)
+                    {
+                        if (this->nlm_save_k[iat].find(key_2) == this->nlm_save_k[iat].end()) 
+                        { 
+                            continue;
+                        }
+                    }
+
 					const double Rcut_AO2 = orb.Phi[T2].getRcut();
                 	const double dist2 = (tau2-tau0).norm() * ucell.lat0;
 
@@ -211,14 +240,24 @@ void LCAO_Deepks::cal_projected_DM(const elecstate::DensityMatrix<double, double
 
                     auto col_indexes = pv->get_indexes_col(ibt2);
                     const int col_size = col_indexes.size();
-                    if(col_size == 0) { continue;
-}
+                    if(col_size == 0) 
+                    { 
+                        continue;
+                    }
 
                     std::vector<double> s_2t(trace_alpha_size * col_size);
                     // no possible to unexist key
                     for(int icol=0;icol<col_size;icol++)
                     {
-                        const double* col_ptr = this->nlm_save[iat][ad2][col_indexes[icol]][0].data();
+                        const double* col_ptr;
+                        if constexpr (std::is_same<TK, double>::value)
+                        {
+                            col_ptr = this->nlm_save[iat][ad2][col_indexes[icol]][0].data();
+                        }
+                        else
+                        {
+                            col_ptr = this->nlm_save_k[iat][key_2][col_indexes[icol]][0].data();
+                        }
                         for(int i=0;i<trace_alpha_size;i++)
                         {
                             s_2t[i * col_size + icol] = col_ptr[trace_alpha_col[i]];
@@ -229,7 +268,20 @@ void LCAO_Deepks::cal_projected_DM(const elecstate::DensityMatrix<double, double
                     const double* dm_current = nullptr;
                     for(int is=0;is<dm->get_DMR_vector().size();is++)
                     {
-                        auto* tmp = dm->get_DMR_vector()[is]->find_matrix(ibt1, ibt2, 0, 0, 0);
+                        int dRx, dRy, dRz;
+                        if constexpr (std::is_same<TK, double>::value)
+                        {
+                            dRx = 0;
+                            dRy = 0;
+                            dRz = 0;
+                        }
+                        else
+                        {
+                            dRx = dR2.x - dR1.x;
+                            dRy = dR2.y - dR1.y;
+                            dRz = dR2.z - dR1.z;
+                        }
+                        auto* tmp = dm->get_DMR_vector()[is]->find_matrix(ibt1, ibt2, dRx, dRy, dRz);
                         if(tmp == nullptr)
                         {
                             // in case of no deepks_scf but out_deepks_label, size of DMR would mismatch with deepks-orbitals
@@ -242,263 +294,11 @@ void LCAO_Deepks::cal_projected_DM(const elecstate::DensityMatrix<double, double
                             dm_array[idm] += dm_current[idm];
                         }
                     }
-                    if(dm_current == nullptr) { continue; //skip the long range DM pair more than nonlocal term
-}
-                    dm_current = dm_array.data();
-                    //dgemm for s_2t and dm_current to get g_1dmt
-                    constexpr char transa='T', transb='N';
-                    const double gemm_alpha = 1.0, gemm_beta = 1.0;
-                    dgemm_(
-                        &transa, &transb, 
-                        &row_size, 
-                        &trace_alpha_size, 
-                        &col_size, 
-                        &gemm_alpha, 
-                        dm_current, 
-                        &col_size,
-                        s_2t.data(),  
-                        &col_size,     
-                        &gemm_beta,      
-                        g_1dmt.data(),    
-                        &row_size);
-				}//ad2
-                // do dot of g_1dmt and s_1t to get orbital_pdm_shell
-                if(!PARAM.inp.deepks_equiv)
-                {
-                    int ib=0, index=0, inc=1;
-                    for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0)
-                    {
-                        for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0)
-                        {
-                            const int inl = this->inl_index[T0](I0, L0, N0);
-                            const int nm = 2*L0+1;
-                    
-                            for (int m1=0; m1<nm; ++m1) // m1 = 1 for s, 3 for p, 5 for d
-                            {
-                                for (int m2=0; m2<nm; ++m2) // m1 = 1 for s, 3 for p, 5 for d
-                                {
-                                    int ind = m1*nm + m2;
-                                    pdm[inl][ind] += 
-                                        ddot_(&row_size, g_1dmt.data()+index*row_size, &inc, s_1t.data()+index*row_size, &inc);
-                                    index++;
-                                }
-                            }
-                            ib+=nm;
-                        }
-                    }
-                }
-                else
-                {
-                    int index = 0, inc=1;
-                    int nproj = 0;
-                    for(int il = 0; il < this->lmaxd + 1; il++)
-                    {
-                        nproj += (2 * il + 1) * orb.Alpha[0].getNchi(il);
-                    }
-                    for(int iproj = 0; iproj < nproj; iproj ++)
-                    {
-                        for(int jproj = 0; jproj < nproj; jproj ++)
-                        {
-                            pdm[iat][iproj * nproj + jproj] += 
-                                ddot_(&row_size, g_1dmt.data()+index*row_size, &inc, s_1t.data()+index*row_size, &inc);
-                            index ++;
-                        }
-                    }
-                }
-			}//ad1
-        }//I0
-    }//T0
-
-#ifdef __MPI
-    allsum_deepks(this->inlmax,pdm_size,this->pdm);
-#endif
-    ModuleBase::timer::tick("LCAO_Deepks","cal_projected_DM"); 
-    return;
-}
-
-void LCAO_Deepks::cal_projected_DM(const elecstate::DensityMatrix<std::complex<double>, double>* dm,
-                                   const UnitCell& ucell,
-                                   const LCAO_Orbitals& orb,
-                                   const Grid_Driver& GridD)
-{
-    // if pdm has been initialized, skip the calculation
-    if(this->init_pdm)
-    {
-        this->init_pdm = false;
-        return;
-    }
-
-    int pdm_size = 0;
-    if(!PARAM.inp.deepks_equiv)
-    {
-        pdm_size = (this->lmaxd * 2 + 1) * (this->lmaxd * 2 + 1);
-    }
-    else
-    {
-        int nproj = 0;
-        for(int il = 0; il < this->lmaxd + 1; il++)
-        {
-            nproj += (2 * il + 1) * orb.Alpha[0].getNchi(il);
-        }
-        pdm_size = nproj * nproj;        
-    }
-
-    //check for skipping
-    //if(dm.size() == 0 || dm[0].size() == 0)
-    //{
-    //    return;
-    //}
-    ModuleBase::timer::tick("LCAO_Deepks","cal_projected_DM_k");
-
-    for(int inl=0;inl<inlmax;inl++)
-    {
-        ModuleBase::GlobalFunc::ZEROS(pdm[inl],pdm_size);
-    }
-
-    const double Rcut_Alpha = orb.Alpha[0].getRcut();
-    for (int T0 = 0; T0 < ucell.ntype; T0++)
-    {
-		Atom* atom0 = &ucell.atoms[T0]; 
-        for (int I0 =0; I0< atom0->na; I0++)
-        {
-            const int iat = ucell.itia2iat(T0,I0);
-            const ModuleBase::Vector3<double> tau0 = atom0->tau[I0];
-            GridD.Find_atom(ucell, atom0->tau[I0] ,T0, I0);
-
-            //trace alpha orbital
-            std::vector<int> trace_alpha_row;
-            std::vector<int> trace_alpha_col;
-            if(!PARAM.inp.deepks_equiv)
-            {
-                int ib=0;
-                for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0)
-                {
-                    for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0)
-                    {
-                        const int inl = this->inl_index[T0](I0, L0, N0);
-                        const int nm = 2*L0+1;
-                
-                        for (int m1=0; m1<nm; ++m1) // m1 = 1 for s, 3 for p, 5 for d
-                        {
-                            for (int m2=0; m2<nm; ++m2) // m1 = 1 for s, 3 for p, 5 for d
-                            {
-                                trace_alpha_row.push_back(ib+m1);
-                                trace_alpha_col.push_back(ib+m2);
-                            }
-                        }
-                        ib+=nm;
-                    }
-                }
-            }
-            else
-            {
-                int nproj = 0;
-                for(int il = 0; il < this->lmaxd + 1; il++)
-                {
-                    nproj += (2 * il + 1) * orb.Alpha[0].getNchi(il);
-                }
-                for(int iproj = 0; iproj < nproj; iproj ++)
-                {
-                    for(int jproj = 0; jproj < nproj; jproj ++)
-                    {
-                        trace_alpha_row.push_back(iproj);
-                        trace_alpha_col.push_back(jproj);
+                    if(dm_current == nullptr) 
+                    { 
+                        continue; //skip the long range DM pair more than nonlocal term
                     }
-                }                
-            }
-            const int trace_alpha_size = trace_alpha_row.size();
 
-            for (int ad1=0; ad1<GridD.getAdjacentNum()+1 ; ++ad1)
-            {
-                const int T1 = GridD.getType(ad1);
-                const int I1 = GridD.getNatom(ad1);
-                const int ibt1 = ucell.itia2iat(T1,I1);
-                const int start1 = ucell.itiaiw2iwt(T1, I1, 0);
-                const ModuleBase::Vector3<double> tau1 = GridD.getAdjacentTau(ad1);
-				const Atom* atom1 = &ucell.atoms[T1];
-				const int nw1_tot = atom1->nw*PARAM.globalv.npol;
-				const double Rcut_AO1 = orb.Phi[T1].getRcut();
-                const double dist1 = (tau1-tau0).norm() * ucell.lat0;
-                if (dist1 >= Rcut_Alpha + Rcut_AO1)
-                {
-                    continue;
-                }
-
-                ModuleBase::Vector3<double> dR1(GridD.getBox(ad1).x, GridD.getBox(ad1).y, GridD.getBox(ad1).z); 
-
-                auto row_indexes = pv->get_indexes_row(ibt1);
-                const int row_size = row_indexes.size();
-                if(row_size == 0) { continue;
-}
-
-                key_tuple key_1(ibt1,dR1.x,dR1.y,dR1.z);
-                if(this->nlm_save_k[iat].find(key_1) == this->nlm_save_k[iat].end()) { continue;
-}
-                std::vector<double> s_1t(trace_alpha_size * row_size);
-                std::vector<double> g_1dmt(trace_alpha_size * row_size, 0.0);
-                for(int irow=0;irow<row_size;irow++)
-                {
-                    const double* row_ptr = this->nlm_save_k[iat][key_1][row_indexes[irow]][0].data();
-                    for(int i=0;i<trace_alpha_size;i++)
-                    {
-                        s_1t[i * row_size + irow] = row_ptr[trace_alpha_row[i]];
-                    }
-                }
-
-				for (int ad2=0; ad2 < GridD.getAdjacentNum()+1 ; ad2++)
-				{
-					const int T2 = GridD.getType(ad2);
-					const int I2 = GridD.getNatom(ad2);
-                    const int ibt2 = ucell.itia2iat(T2,I2);
-					const ModuleBase::Vector3<double> tau2 = GridD.getAdjacentTau(ad2);
-					const Atom* atom2 = &ucell.atoms[T2];
-					const int nw2_tot = atom2->nw*PARAM.globalv.npol;
-                    ModuleBase::Vector3<double> dR2(GridD.getBox(ad2).x, GridD.getBox(ad2).y, GridD.getBox(ad2).z);
-					
-					const double Rcut_AO2 = orb.Phi[T2].getRcut();
-                	const double dist2 = (tau2-tau0).norm() * ucell.lat0;
-
-					if (dist2 >= Rcut_Alpha + Rcut_AO2)
-					{
-						continue;
-					}
-
-                    auto col_indexes = pv->get_indexes_col(ibt2);
-                    const int col_size = col_indexes.size();
-                    if(col_size == 0) { continue;
-}
-
-                    std::vector<double> s_2t(trace_alpha_size * col_size);
-                    key_tuple key_2(ibt2,dR2.x,dR2.y,dR2.z);
-                    if(this->nlm_save_k[iat].find(key_2) == this->nlm_save_k[iat].end()) { continue;
-}
-                    for(int icol=0;icol<col_size;icol++)
-                    {
-                        const double* col_ptr = this->nlm_save_k[iat][key_2][col_indexes[icol]][0].data();
-                        for(int i=0;i<trace_alpha_size;i++)
-                        {
-                            s_2t[i * col_size + icol] = col_ptr[trace_alpha_col[i]];
-                        }
-                    }
-                    // prepare DM_gamma from DMR
-                    std::vector<double> dm_array(row_size*col_size, 0.0);
-                    const double* dm_current;
-                    for(int is=0;is<dm->get_DMR_vector().size();is++)
-                    {
-                        auto tmp_matrix = dm->get_DMR_vector()[is]->find_matrix(ibt1, ibt2, (dR2-dR1).x, (dR2-dR1).y, (dR2-dR1).z);
-                        if(tmp_matrix == nullptr)
-                        {
-                            dm_current = nullptr;
-                            break;
-                        }
-                        dm_current = tmp_matrix->get_pointer();
-                        for(int idm=0;idm<row_size*col_size;idm++)
-                        {
-                            dm_array[idm] += dm_current[idm];
-                        }
-                    }
-                    if(dm_current == nullptr) { continue;
-}
                     dm_current = dm_array.data();
                     //dgemm for s_2t and dm_current to get g_1dmt
                     constexpr char transa='T', transb='N';
@@ -558,7 +358,7 @@ void LCAO_Deepks::cal_projected_DM(const elecstate::DensityMatrix<std::complex<d
                                 ddot_(&row_size, g_1dmt.data()+index*row_size, &inc, s_1t.data()+index*row_size, &inc);
                             index ++;
                         }
-                    }                    
+                    }
                 }
 			}//ad1
         }//I0
@@ -567,9 +367,8 @@ void LCAO_Deepks::cal_projected_DM(const elecstate::DensityMatrix<std::complex<d
 #ifdef __MPI
     allsum_deepks(this->inlmax,pdm_size,this->pdm);
 #endif
-    ModuleBase::timer::tick("LCAO_Deepks","cal_projected_DM_k");
+    ModuleBase::timer::tick("LCAO_Deepks","cal_projected_DM"); 
     return;
-    
 }
 
 void LCAO_Deepks::check_projected_dm()
@@ -589,4 +388,16 @@ void LCAO_Deepks::check_projected_dm()
     }
 }
 
+template void LCAO_Deepks::cal_projected_DM<double>(
+    const elecstate::DensityMatrix<double, double>* dm, 
+    const UnitCell& ucell, 
+    const LCAO_Orbitals& orb, 
+    const Grid_Driver& GridD);
+
+template void LCAO_Deepks::cal_projected_DM<std::complex<double>>(
+    const elecstate::DensityMatrix<std::complex<double>, double>* dm, 
+    const UnitCell& ucell, 
+    const LCAO_Orbitals& orb, 
+    const Grid_Driver& GridD);
+
 #endif
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_vdelta.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_vdelta.cpp
index 9a1f151f70..9bf6b08c21 100644
--- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_vdelta.cpp
+++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_vdelta.cpp
@@ -6,8 +6,7 @@
 //tr (rho * V_delta)
 
 //Four subroutines are contained in the file:
-//5. cal_e_delta_band : calculates e_delta_bands for gamma only
-//6. cal_e_delta_band_k : counterpart of 4, for multi-k
+//5. cal_e_delta_band : calculates e_delta_bands
 
 #ifdef __DEEPKS
 
@@ -17,43 +16,12 @@
 #include "module_base/timer.h"
 
 //calculating sum of correction band energies
-//for gamma_only calculations
-void LCAO_Deepks::cal_e_delta_band(const std::vector<std::vector<double>>& dm, const int /*nks*/)
+template <typename TK>
+void LCAO_Deepks::cal_e_delta_band(const std::vector<std::vector<TK>>& dm, const int nks)
 {
     ModuleBase::TITLE("LCAO_Deepks", "cal_e_delta_band");
-    this->e_delta_band = 0;
-    for (int i = 0; i < PARAM.globalv.nlocal; ++i)
-    {
-        for (int j = 0; j < PARAM.globalv.nlocal; ++j)
-        {
-            const int mu = pv->global2local_row(j);
-            const int nu = pv->global2local_col(i);
-            
-            if (mu >= 0 && nu >= 0)
-            {                
-                const int index = nu * pv->nrow + mu;
-                for (int is = 0; is < dm.size(); ++is)  //dm.size() == PARAM.inp.nspin
-                {
-                    //this->e_delta_band += dm[is](nu, mu) * this->H_V_delta[index];
-					this->e_delta_band += dm[is][nu*this->pv->nrow+mu] * this->H_V_delta[0][index];
-                }
-            }
-        }
-    }
-#ifdef __MPI
-    Parallel_Reduce::reduce_all(this->e_delta_band);
-#endif
-    return;
-}
-
-//calculating sum of correction band energies
-//for multi_k calculations
-void LCAO_Deepks::cal_e_delta_band(const std::vector<std::vector<std::complex<double>>>& dm,
-    const int nks)
-{
-    ModuleBase::TITLE("LCAO_Deepks", "cal_e_delta_band");
-	ModuleBase::timer::tick("LCAO_Deepks","cal_e_delta_band_k");
-    std::complex<double> e_delta_band_k=std::complex<double>(0.0,0.0);
+    ModuleBase::timer::tick("LCAO_Deepks","cal_e_delta_band");
+    TK e_delta_band_tmp = TK(0);
     for (int i = 0; i < PARAM.globalv.nlocal; ++i)
     {
         for (int j = 0; j < PARAM.globalv.nlocal; ++j)
@@ -72,21 +40,40 @@ void LCAO_Deepks::cal_e_delta_band(const std::vector<std::vector<std::complex<do
                 {
                     iic = mu * pv->ncol + nu;
                 }
-                for(int ik=0;ik<nks;ik++)
+                if constexpr (std::is_same<TK, double>::value)
+                {
+                    for (int is = 0; is < dm.size(); ++is)  //dm.size() == PARAM.inp.nspin
+                    {
+                        e_delta_band_tmp += dm[is][nu * this->pv->nrow + mu] * this->H_V_delta[0][iic];
+                    }
+                }
+                else
                 {
-                    //e_delta_band_k += dm[ik](nu, mu) * this->H_V_delta_k[ik][iic];
-					e_delta_band_k += dm[ik][nu * this->pv->nrow + mu] * this->H_V_delta_k[ik][iic];
+                    for(int ik = 0; ik < nks; ik++)
+                    {
+                        e_delta_band_tmp += dm[ik][nu * this->pv->nrow + mu] * this->H_V_delta_k[ik][iic];
+                    }
                 }
+                
             }
         }
     }
-
-    this->e_delta_band = e_delta_band_k.real();
+    if constexpr (std::is_same<TK, double>::value)
+    {
+        this->e_delta_band = e_delta_band_tmp;
+    }
+    else
+    {
+        this->e_delta_band = e_delta_band_tmp.real();
+    }
 #ifdef __MPI
     Parallel_Reduce::reduce_all(this->e_delta_band);
 #endif
-	ModuleBase::timer::tick("LCAO_Deepks","cal_e_delta_band_k");
+    ModuleBase::timer::tick("LCAO_Deepks","cal_e_delta_band");
     return;
 }
 
+template void LCAO_Deepks::cal_e_delta_band<double>(const std::vector<std::vector<double>>& dm, const int nks);
+template void LCAO_Deepks::cal_e_delta_band<std::complex<double>>(const std::vector<std::vector<std::complex<double>>>& dm, const int nks);
+
 #endif
diff --git a/source/module_hamilt_lcao/module_deepks/cal_gdmx.cpp b/source/module_hamilt_lcao/module_deepks/cal_gdmx.cpp
index cbbbac3b42..b83e40f2ec 100644
--- a/source/module_hamilt_lcao/module_deepks/cal_gdmx.cpp
+++ b/source/module_hamilt_lcao/module_deepks/cal_gdmx.cpp
@@ -14,6 +14,11 @@
 /// be calculated: 
 /// gdm_epsl = d/d\epsilon_{ab} * 
 ///           sum_{mu,nu} rho_{mu,nu} <chi_mu|alpha_m><alpha_m'|chi_nu>
+
+//There are 2 subroutines in this file:
+//1. cal_gdmx, calculating gdmx (and optionally gdm_epsl for stress) for gamma point
+//2. check_gdmx, which prints gdmx to a series of .dat files
+
 template <typename TK>
 void LCAO_Deepks::cal_gdmx(const std::vector<std::vector<TK>>& dm,
                            const UnitCell& ucell,
diff --git a/source/module_hamilt_lcao/module_deepks/deepks_fgamma.cpp b/source/module_hamilt_lcao/module_deepks/deepks_fgamma.cpp
index 8649aeb978..60077ebdac 100644
--- a/source/module_hamilt_lcao/module_deepks/deepks_fgamma.cpp
+++ b/source/module_hamilt_lcao/module_deepks/deepks_fgamma.cpp
@@ -16,6 +16,7 @@
 #include "module_hamilt_lcao/module_hcontainer/atom_pair.h"
 #include "module_base/libm/libm.h"
 
+typedef std::tuple<int, int, int, int> key_tuple;
 
 //force for gamma only calculations
 //Pulay and HF terms are calculated together
@@ -23,9 +24,11 @@ void DeePKS_domain::cal_f_delta_gamma(
     const std::vector<std::vector<double>>& dm,
     const UnitCell& ucell,
     const LCAO_Orbitals& orb,
-    const Grid_Driver& gd,
+    const Grid_Driver& GridD,
     const Parallel_Orbitals& pv,
     const int lmaxd,
+    const int nks,
+    const std::vector<ModuleBase::Vector3<double>> &kvec_d,
     std::vector<std::vector<std::unordered_map<int, std::vector<std::vector<double>>>>>& nlm_save,
     double** gedm,
     ModuleBase::IntArray* inl_index,
@@ -34,6 +37,9 @@ void DeePKS_domain::cal_f_delta_gamma(
     ModuleBase::matrix& svnl_dalpha)
 {
     ModuleBase::TITLE("DeePKS_domain", "cal_f_delta_gamma");
+
+    using TK = double; // temporary used, will be replaced by the template parameter
+
     f_delta.zero_out();
 
     const double Rcut_Alpha = orb.Alpha[0].getRcut();
@@ -47,26 +53,29 @@ void DeePKS_domain::cal_f_delta_gamma(
         {
             const int iat = ucell.itia2iat(T0,I0);
             const ModuleBase::Vector3<double> tau0 = atom0->tau[I0];
-            gd.Find_atom(ucell, atom0->tau[I0] ,T0, I0);
+            GridD.Find_atom(ucell, atom0->tau[I0] ,T0, I0);
 
-            for (int ad1=0; ad1<gd.getAdjacentNum()+1 ; ++ad1)
+            for (int ad1=0; ad1<GridD.getAdjacentNum()+1 ; ++ad1)
             {
-                const int T1 = gd.getType(ad1);
-                const int I1 = gd.getNatom(ad1);
+                const int T1 = GridD.getType(ad1);
+                const int I1 = GridD.getNatom(ad1);
                 const int ibt1 = ucell.itia2iat(T1,I1);
-                const ModuleBase::Vector3<double> tau1 = gd.getAdjacentTau(ad1);
+                const ModuleBase::Vector3<double> tau1 = GridD.getAdjacentTau(ad1);
                 const Atom* atom1 = &ucell.atoms[T1];
                 const int nw1_tot = atom1->nw*PARAM.globalv.npol;
                 const double Rcut_AO1 = orb.Phi[T1].getRcut();
 
-                for (int ad2=0; ad2 < gd.getAdjacentNum()+1 ; ad2++)
+                ModuleBase::Vector3<double> dR1(GridD.getBox(ad1).x, GridD.getBox(ad1).y, GridD.getBox(ad1).z);
+
+                for (int ad2=0; ad2 < GridD.getAdjacentNum()+1 ; ad2++)
                 {
-                    const int T2 = gd.getType(ad2);
-                    const int I2 = gd.getNatom(ad2);
+                    const int T2 = GridD.getType(ad2);
+                    const int I2 = GridD.getNatom(ad2);
                     const int ibt2 = ucell.itia2iat(T2,I2);
-                    const ModuleBase::Vector3<double> tau2 = gd.getAdjacentTau(ad2);
+                    const ModuleBase::Vector3<double> tau2 = GridD.getAdjacentTau(ad2);
                     const Atom* atom2 = &ucell.atoms[T2];
                     const int nw2_tot = atom2->nw*PARAM.globalv.npol;
+                    ModuleBase::Vector3<double> dR2(GridD.getBox(ad2).x, GridD.getBox(ad2).y, GridD.getBox(ad2).z);
                     
                     const double Rcut_AO2 = orb.Phi[T2].getRcut();
                     const double dist1 = (tau1-tau0).norm() * ucell.lat0;
@@ -98,21 +107,58 @@ void DeePKS_domain::cal_f_delta_gamma(
 						continue;
 					}
 
-                    hamilt::AtomPair<double> dm_pair(ibt1, ibt2, 0, 0, 0, &pv);
+                    int dRx;
+                    int dRy;
+                    int dRz;
+                    if constexpr (std::is_same<TK, double>::value)
+                    {
+                        dRx = 0;
+                        dRy = 0;
+                        dRz = 0;
+                    }
+                    else
+                    {
+                        dRx = dR2.x - dR1.x;
+                        dRy = dR2.y - dR1.y;
+                        dRz = dR2.z - dR1.z;
+                    }
+
+                    hamilt::AtomPair<double> dm_pair(ibt1, ibt2, dRx, dRy, dRz, &pv);
 
                     dm_pair.allocate(nullptr, 1);
 
-                    for(int is=0;is<dm.size();is++)
+                    if constexpr (std::is_same<TK, double>::value)
                     {
-                        if(ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver))
-                        {
-                            dm_pair.add_from_matrix(dm[is].data(), pv.get_row_size(), 1.0, 1);
-                        }
-                        else
+                        for(int is=0;is<dm.size();is++)
                         {
-                            dm_pair.add_from_matrix(dm[is].data(), pv.get_col_size(), 1.0, 0);
+                            if(ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver))
+                            {
+                                dm_pair.add_from_matrix(dm[is].data(), pv.get_row_size(), 1.0, 1);
+                            }
+                            else
+                            {
+                                dm_pair.add_from_matrix(dm[is].data(), pv.get_col_size(), 1.0, 0);
+                            }
                         }
                     }
+                    else
+                    {
+                        // for(int ik=0;ik<nks;ik++)
+                        // {
+                        //     const double arg = - (kvec_d[ik] * (dR2-dR1) ) * ModuleBase::TWO_PI;
+                        //     double sinp, cosp;
+                        //     ModuleBase::libm::sincos(arg, &sinp, &cosp);
+                        //     const std::complex<double> kphase = std::complex<double>(cosp, sinp);
+                        //     if(ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver))
+                        //     {
+                        //         dm_pair.add_from_matrix(dm[ik].data(), pv.get_row_size(), kphase, 1);
+                        //     }
+                        //     else
+                        //     {
+                        //         dm_pair.add_from_matrix(dm[ik].data(), pv.get_col_size(), kphase, 0);
+                        //     }
+                        // }
+                    }
 
                     const double* dm_current = dm_pair.get_pointer();
 
@@ -122,13 +168,27 @@ void DeePKS_domain::cal_f_delta_gamma(
                         {
                             double nlm[3]={0,0,0};
                             double nlm_t[3] = {0,0,0}; //for stress
-                            std::vector<double> nlm1 = nlm_save[iat][ad1][row_indexes[iw1]][0];
+                            key_tuple key_1(ibt1,dR1.x,dR1.y,dR1.z);
+                            key_tuple key_2(ibt2,dR2.x,dR2.y,dR2.z);
+                            std::vector<double> nlm1;
                             std::vector<std::vector<double>> nlm2;
                             nlm2.resize(3);
 
-                            for(int dim=0;dim<3;++dim)
+                            if constexpr (std::is_same<TK, double>::value)
+                            {
+                                nlm1 = nlm_save[iat][ad1][row_indexes[iw1]][0];
+                                for(int dim=0;dim<3;dim++)
+                                {
+                                    nlm2[dim] = nlm_save[iat][ad2][col_indexes[iw2]][dim+1];
+                                }
+                            }
+                            else
                             {
-                                nlm2[dim] = nlm_save[iat][ad2][col_indexes[iw2]][dim+1];
+                                // nlm1 = nlm_save_k[iat][key_1][row_indexes[iw1]][0];
+                                // for(int dim=0;dim<3;dim++)
+                                // {
+                                //     nlm2[dim] = nlm_save_k[iat][key_2][col_indexes[iw2]][dim+1];
+                                // }
                             }
 
                             assert(nlm1.size()==nlm2[0].size());
@@ -188,10 +248,21 @@ void DeePKS_domain::cal_f_delta_gamma(
 
                             if(isstress)
                             {
-                                nlm1 = nlm_save[iat][ad2][col_indexes[iw2]][0];
-                                for(int i=0;i<3;i++)
+                                if constexpr (std::is_same<TK, double>::value)
                                 {
-                                    nlm2[i] = nlm_save[iat][ad1][row_indexes[iw1]][i+1];
+                                    nlm1 = nlm_save[iat][ad2][col_indexes[iw2]][0];
+                                    for(int i=0;i<3;i++)
+                                    {
+                                        nlm2[i] = nlm_save[iat][ad1][row_indexes[iw1]][i+1];
+                                    }
+                                }
+                                else
+                                {
+                                    // nlm1 = nlm_save_k[iat][key_2][col_indexes[iw2]][0];
+                                    // for(int i=0;i<3;i++)
+                                    // {
+                                    //     nlm2[i] = nlm_save_k[iat][key_1][row_indexes[iw1]][i+1];
+                                    // }
                                 }
 
                                 assert(nlm1.size()==nlm2[0].size());                                
@@ -265,8 +336,10 @@ void DeePKS_domain::cal_f_delta_gamma(
 		{
 			for(int j=0;j<3;++j)
 			{
-				if(j>i) { svnl_dalpha(j,i) = svnl_dalpha(i,j);
-}
+				if(j>i) 
+                { 
+                    svnl_dalpha(j,i) = svnl_dalpha(i,j);
+                }
 				svnl_dalpha(i,j) *= weight;
 			}
 		}
diff --git a/source/module_hamilt_lcao/module_deepks/deepks_fk.cpp b/source/module_hamilt_lcao/module_deepks/deepks_fk.cpp
index 1d26f0adff..8a724bf098 100644
--- a/source/module_hamilt_lcao/module_deepks/deepks_fk.cpp
+++ b/source/module_hamilt_lcao/module_deepks/deepks_fk.cpp
@@ -47,13 +47,11 @@ void DeePKS_domain::cal_f_delta_k(
             const ModuleBase::Vector3<double> tau0 = atom0->tau[I0];
             GridD.Find_atom(ucell, atom0->tau[I0] ,T0, I0);
 	
-
             for (int ad1=0; ad1<GridD.getAdjacentNum()+1 ; ++ad1)
             {
                 const int T1 = GridD.getType(ad1);
                 const int I1 = GridD.getNatom(ad1);
                 const int ibt1 = ucell.itia2iat(T1,I1);
-                const int start1 = ucell.itiaiw2iwt(T1, I1, 0);
                 const ModuleBase::Vector3<double> tau1 = GridD.getAdjacentTau(ad1);
                 const Atom* atom1 = &ucell.atoms[T1];
                 const int nw1_tot = atom1->nw*PARAM.globalv.npol;
@@ -66,7 +64,6 @@ void DeePKS_domain::cal_f_delta_k(
                     const int T2 = GridD.getType(ad2);
                     const int I2 = GridD.getNatom(ad2);
                     const int ibt2 = ucell.itia2iat(T2,I2);
-                    const int start2 = ucell.itiaiw2iwt(T2, I2, 0);
                     const ModuleBase::Vector3<double> tau2 = GridD.getAdjacentTau(ad2);
                     const Atom* atom2 = &ucell.atoms[T2];
                     const int nw2_tot = atom2->nw*PARAM.globalv.npol;
diff --git a/source/module_hamilt_lcao/module_deepks/deepks_force.h b/source/module_hamilt_lcao/module_deepks/deepks_force.h
index 5ecf7d0f0e..58474b9d56 100644
--- a/source/module_hamilt_lcao/module_deepks/deepks_force.h
+++ b/source/module_hamilt_lcao/module_deepks/deepks_force.h
@@ -32,19 +32,22 @@ namespace DeePKS_domain
     // 2. cal_f_delta_k, which is used for multi-k calculation
     // 3. check_f_delta, which prints F_delta into F_delta.dat for checking
 
-    // for gamma only, pulay and HF terms of force are calculated together
-void cal_f_delta_gamma(const std::vector<std::vector<double>>& dm,
-                       const UnitCell& ucell,
-                       const LCAO_Orbitals& orb,
-                       const Grid_Driver& gd,
-                       const Parallel_Orbitals& pv,
-                       const int lmaxd,
-                       std::vector<std::vector<std::unordered_map<int, std::vector<std::vector<double>>>>>& nlm_save,
-                       double** gedm,
-                       ModuleBase::IntArray* inl_index,
-                       ModuleBase::matrix& f_delta,
-                       const bool isstress,
-                       ModuleBase::matrix& svnl_dalpha);
+// for gamma only, pulay and HF terms of force are calculated together
+void cal_f_delta_gamma(
+    const std::vector<std::vector<double>>& dm,
+    const UnitCell& ucell,
+    const LCAO_Orbitals& orb,
+    const Grid_Driver& GridD,
+    const Parallel_Orbitals& pv,
+    const int lmaxd,
+    const int nks,
+    const std::vector<ModuleBase::Vector3<double>>& kvec_d,
+    std::vector<std::vector<std::unordered_map<int, std::vector<std::vector<double>>>>>& nlm_save,
+    double** gedm,
+    ModuleBase::IntArray* inl_index,
+    ModuleBase::matrix& f_delta,
+    const bool isstress,
+    ModuleBase::matrix& svnl_dalpha);
 
 // for multi-k, pulay and HF terms of force are calculated together
 
diff --git a/source/module_hamilt_lcao/module_deepks/test/LCAO_deepks_test.cpp b/source/module_hamilt_lcao/module_deepks/test/LCAO_deepks_test.cpp
index c79536a72a..ae8384057c 100644
--- a/source/module_hamilt_lcao/module_deepks/test/LCAO_deepks_test.cpp
+++ b/source/module_hamilt_lcao/module_deepks/test/LCAO_deepks_test.cpp
@@ -115,7 +115,7 @@ void test_deepks::check_pdm()
     {
         this->read_dm_k(kv.get_nkstot());
         this->set_dm_k_new();
-        this->ld.cal_projected_DM_k(dm_k_new, ucell, ORB, Test_Deepks::GridD);
+        this->ld.cal_projected_DM(dm_k_new, ucell, ORB, Test_Deepks::GridD);
     }
     this->ld.check_projected_dm();
     this->compare_with_ref("pdm.dat", "pdm_ref.dat");
@@ -221,12 +221,12 @@ void test_deepks::check_e_deltabands()
 {
     if (PARAM.sys.gamma_only_local)
     {
-        this->ld.cal_e_delta_band(dm_new);
+        this->ld.cal_e_delta_band(dm_new, 1); // 1 for gamma-only
     }
     else
     {
         this->folding_nnr(kv);
-        this->ld.cal_e_delta_band_k(dm_k_new, kv.get_nkstot());
+        this->ld.cal_e_delta_band(dm_k_new, kv.get_nkstot());
     }
 
     std::ofstream ofs("E_delta_bands.dat");
@@ -241,7 +241,8 @@ void test_deepks::check_f_delta()
     svnl_dalpha.create(3, 3);
     if (PARAM.sys.gamma_only_local)
     {
-        ld.cal_f_delta_gamma(dm_new, ucell, ORB, Test_Deepks::GridD, 1, svnl_dalpha);
+        const int nks=1;
+        ld.cal_f_delta_gamma(dm_new, ucell, ORB, Test_Deepks::GridD, nks, kv.kvec_d, 1, svnl_dalpha);
     }
     else
     {