deepmodeling
diff --git a/‎CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/advanced/input_files/input-main.md‎
Lines changed: 8 additions & 7 deletions b/‎docs/advanced/input_files/input-main.md‎
Lines changed: 8 additions & 7 deletions
diff --git a/‎source/module_basis/module_pw/module_fft/fft_base.h‎
Lines changed: 15 additions & 7 deletions b/‎source/module_basis/module_pw/module_fft/fft_base.h‎
Lines changed: 15 additions & 7 deletions
diff --git a/‎source/module_basis/module_pw/module_fft/fft_bundle.cpp‎
Lines changed: 13 additions & 6 deletions b/‎source/module_basis/module_pw/module_fft/fft_bundle.cpp‎
Lines changed: 13 additions & 6 deletions
diff --git a/‎source/module_basis/module_pw/module_fft/fft_bundle.h‎
Lines changed: 13 additions & 0 deletions b/‎source/module_basis/module_pw/module_fft/fft_bundle.h‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎source/module_basis/module_pw/module_fft/fft_dsp.cpp‎
Lines changed: 3 additions & 1 deletion b/‎source/module_basis/module_pw/module_fft/fft_dsp.cpp‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎source/module_basis/module_pw/module_fft/fft_dsp.h‎
Lines changed: 7 additions & 1 deletion b/‎source/module_basis/module_pw/module_fft/fft_dsp.h‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎source/module_basis/module_pw/pw_basis_k.h‎
Lines changed: 1 addition & 1 deletion b/‎source/module_basis/module_pw/pw_basis_k.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎source/module_basis/module_pw/pw_transform_k_dsp.cpp‎
Lines changed: 37 additions & 19 deletions b/‎source/module_basis/module_pw/pw_transform_k_dsp.cpp‎
Lines changed: 37 additions & 19 deletions
diff --git a/‎source/module_esolver/esolver_ks_lcao.cpp‎
Lines changed: 0 additions & 1 deletion b/‎source/module_esolver/esolver_ks_lcao.cpp‎
Lines changed: 0 additions & 1 deletion
@@ -269,6 +269,7 @@ if(ENABLE_MPI)
   list(APPEND math_libs MPI::MPI_CXX)
 endif()
 
+
 if (USE_DSP)
   add_compile_definitions(__DSP)
   target_link_libraries(${ABACUS_BIN_NAME} ${OMPI_LIBRARY1})
 
@@ -1648,21 +1648,22 @@ These variables are used to control the output of properties.
 
 - **Type**: Boolean
 - **Availability**: Numerical atomic orbital basis
-- **Description**: Whether to output the density matrix of localized orbitals into files in the folder `OUT.${suffix}`. The files are named as:
+- **Description**: Whether to output the density matrix for each k-point into files in the folder `OUT.${suffix}`. The files are named as:
   - For gamma only case:
-    - nspin = 1: SPIN1_DM;
-    - nspin = 2: SPIN1_DM, and SPIN2_DM.
+    - nspin = 1: `dms1_nao.csr`;
+    - nspin = 2: `dms1_nao.csr` and `dms2_nao.csr` for the two spin channels. 
   - For multi-k points case:
-    - SPIN\*_K\*_DM, where \* stands for index of spin and kpoints;
+    - nspin = 1: `dms1k1_nao.csr`, `dms1k2_nao.csr`, ...;
+    - nspin = 2: `dms1k1_nao.csr`... and `dms2k1_nao.csr`... for the two spin channels. 
 - **Default**: False
 
 ### out_dm1
 
 - **Type**: Boolean
 - **Availability**: Numerical atomic orbital basis (multi-k points)
-- **Description**: Whether to output the density matrix of localized orbitals into files in the folder `OUT.${suffix}`. The density matrices are written in the format of sparse matrices, as mentioned in [out_mat_hs2](#out_mat_hs2). The files are named as:
-  - nspin = 1: data-DMR-sparse_SPIN0.csr;
-  - nspin = 2: data-DMR-sparse_SPIN0.csr, and data-DMR-sparse_SPIN1.csr.
+- **Description**: Whether to output the density matrix with Bravias lattice vector R, labelled as DM(R), into files in the folder `OUT.${suffix}`. The files are named as `dmr{s}{spin index}{g}{geometry index}{_nao} + {".csr"}`. Here, 's' refers to spin, where s1 means spin up channel while s2 means spin down channel, and the sparse matrix format 'csr' is mentioned in [out_mat_hs2](#out_mat_hs2). Finally, if [out_app_flag](#out_app_flag) is set to false, the file name contains the optinal 'g' index for each ionic step that may have different geometries, and if [out_app_flag](#out_app_flag) is set to true, the density matrix with respect to Bravias lattice vector R accumulates during ionic steps:
+  - nspin = 1: `dmrs1_nao.csr`;
+  - nspin = 2: `dmrs1_nao.csr` and `dmrs2_nao.csr` for the two spin channels.
 - **Default**: False
 
 ### out_wfc_pw
 
@@ -12,7 +12,7 @@ class FFT_BASE
     virtual ~FFT_BASE() {};
 
     /**
-     * @brief Initialize the fft parameters As virtual function.
+     * @brief Initialize the fft parameters as virtual function.
      *
      * The function is used to initialize the fft parameters.
      */
@@ -30,32 +30,40 @@ class FFT_BASE
     virtual __attribute__((weak)) void initfft(int nx_in, int ny_in, int nz_in);
 
     /**
-     * @brief Setup the fft Plan and data As pure virtual function.
+     * @brief Setup the fft plan and data as pure virtual function.
      *
      * The function is set as pure virtual function.In order to
      * override the function in the derived class.In the derived
-     * class, the function is used to setup the fft Plan and data.
+     * class, the function is used to setup the fft plan and data.
      */
     virtual void setupFFT() = 0;
 
     /**
-     * @brief Clean the fft Plan  As pure virtual function.
+     * @brief Clean the fft plan as pure virtual function.
      *
      * The function is set as pure virtual function.In order to
      * override the function in the derived class.In the derived
-     * class, the function is used to clean the fft Plan.
+     * class, the function is used to clean the fft plan.
      */
     virtual void cleanFFT() = 0;
 
     /**
-     * @brief Clear the fft data As pure virtual function.
+     * @brief Clear the fft data as pure virtual function.
      *
      * The function is set as pure virtual function.In order to
      * override the function in the derived class.In the derived
      * class, the function is used to clear the fft data.
      */
     virtual void clear() = 0;
-
+    /**
+     * @brief Allocate and destory the resoure in FFT running time,
+     * Now it only used in the DSP mode.
+     * 
+     * The function is set as pure virtual function.In order to
+     * override the function in the derived class.In the derived
+     * class, the function is used to allocate and destory the
+     * resoure in FFT running time.
+     */
     virtual void resource_handler(const int flag) const {};
     /**
      * @brief Get the real space data in cpu-like fft
 
@@ -50,16 +50,22 @@ void FFT_Bundle::initfft(int nx_in,
     if (this->precision == "single" || this->precision == "mixing")
     {
         float_flag = true;
+        if (this->precision == "mixing")
+        {
+            double_flag = true;
+        }
 #if not defined(__ENABLE_FLOAT_FFTW)
         if (this->device == "cpu")
         {
             ModuleBase::WARNING_QUIT("FFT_Bundle", "Please enable float fftw in the cmake to use float fft");
         }
 #endif
     }
-    if (this->precision == "double" || this->precision == "mixing")
+    else if (this->precision == "double")
     {
         double_flag = true;
+    }else{
+        ModuleBase::WARNING_QUIT("FFT_Bundle", "Please set the precision to single or double or mixing");
     }
 #if defined(__DSP)
     if (device == "dsp")
@@ -70,24 +76,23 @@ void FFT_Bundle::initfft(int nx_in,
         }
         fft_double = make_unique<FFT_DSP<double>>();
         fft_double->initfft(nx_in, ny_in, nz_in);
-    }
+    }else
 #endif
     if (device == "cpu")
     {
-        fft_float = make_unique<FFT_CPU<float>>(this->fft_mode);
-        fft_double = make_unique<FFT_CPU<double>>(this->fft_mode);
         if (float_flag)
         {
+            fft_float = make_unique<FFT_CPU<float>>(this->fft_mode);
             fft_float
                 ->initfft(nx_in, ny_in, nz_in, lixy_in, rixy_in, ns_in, nplane_in, nproc_in, gamma_only_in, xprime_in);
         }
         if (double_flag)
         {
+            fft_double = make_unique<FFT_CPU<double>>(this->fft_mode);
             fft_double
                 ->initfft(nx_in, ny_in, nz_in, lixy_in, rixy_in, ns_in, nplane_in, nproc_in, gamma_only_in, xprime_in);
         }
-    }
-    if (device == "gpu")
+    }else if (device == "gpu")
     {
 #if defined(__ROCM)
         fft_float = make_unique<FFT_ROCM<float>>();
@@ -100,6 +105,8 @@ void FFT_Bundle::initfft(int nx_in,
         fft_double = make_unique<FFT_CUDA<double>>();
         fft_double->initfft(nx_in, ny_in, nz_in);
 #endif
+    }else{
+        ModuleBase::WARNING_QUIT("FFT_Bundle", "Please set the device to cpu or gpu or dsp");
     }
 }
 
 
@@ -203,5 +203,18 @@ class FFT_Bundle
     std::string device = "cpu";
     std::string precision = "double";
 };
+// Use RAII (Resource Acquisition Is Initialization) to 
+// control the resources used by hthread when setting the DSP
+struct FFT_Guard
+  {
+      const FFT_Bundle& fft_;
+      FFT_Guard(const FFT_Bundle& fft) : fft_(fft) 
+        {fft_.resource_handler(1);}
+      ~FFT_Guard()
+      {
+        fft_.resource_handler(0);
+      }
+  };
+
 } // namespace ModulePW
 #endif // FFT_H
@@ -63,7 +63,7 @@ void FFT_DSP<double>::setupFFT()
 template <>
 void FFT_DSP<double>::resource_handler(const int flag) const
 {
-    if (flag==0)
+    if (flag == 0)
     {
         hthread_barrier_destroy(b_id);
         hthread_group_destroy(thread_id_for);
@@ -76,6 +76,8 @@ void FFT_DSP<double>::resource_handler(const int flag) const
         b_id = hthread_barrier_create(cluster_id);
         args_for[0] = b_id;
         args_back[0] = b_id;
+    }else{
+        ModuleBase::WARNING_QUIT("FFT_DSP", "Error use of fft resource handle");
     }
 }
 template <>
 
@@ -12,6 +12,7 @@
 
 namespace ModulePW
 {
+    
 template <typename FPTYPE>
 class FFT_DSP : public FFT_BASE<FPTYPE>
 {
@@ -24,7 +25,12 @@ class FFT_DSP : public FFT_BASE<FPTYPE>
         void clear() override;
 
         void cleanFFT() override;
-
+        /**
+         * @brief Control the allocation or deallocation of hthread 
+         * resource 
+         * @param flag  0: deallocate, 1: allocate
+         */
+        void resource_handler(const int flag) const override;
         /** 
         * @brief Initialize the fft parameters
         * @param nx_in  number of grid points in x direction
 
@@ -187,7 +187,7 @@ class PW_Basis_K : public PW_Basis
                        const typename GetTypeReal<TK>::type factor = 1.0) const
     {
       #if defined(__DSP)
-        this->recip2real_dsp(in, out, ik, add, factor);
+        this->real2recip_dsp(in, out, ik, add, factor);
       #else
         this->real2recip(in,out,ik,add,factor);
       #endif
 
@@ -8,12 +8,30 @@
 #if defined (__DSP)
 namespace ModulePW
 {
-template <typename FPTYPE>
-void PW_Basis_K::real2recip_dsp(const std::complex<FPTYPE>* in,
-                                std::complex<FPTYPE>* out,
+    template <>
+void PW_Basis_K::real2recip_dsp(const std::complex<float>* in,
+                                std::complex<float>* out,
                                 const int ik,
                                 const bool add,
-                                const FPTYPE factor) const
+                                const float factor) const
+                                {
+
+                                }
+    template <>
+void PW_Basis_K::recip2real_dsp(const std::complex<float>* in,
+                                std::complex<float>* out,
+                                const int ik,
+                                const bool add,
+                                const float factor) const
+                                {
+
+                                }
+template <>
+void PW_Basis_K::real2recip_dsp(const std::complex<double>* in,
+                                std::complex<double>* out,
+                                const int ik,
+                                const bool add,
+                                const double factor) const
 {
     const base_device::DEVICE_CPU* ctx;
     const base_device::DEVICE_GPU* gpux;
@@ -31,20 +49,20 @@ void PW_Basis_K::real2recip_dsp(const std::complex<FPTYPE>* in,
                                    auxr);
     this->fft_bundle.resource_handler(0);
     // copy the result from the auxr to the out ,while consider the add
-    set_real_to_recip_output_op<FPTYPE, base_device::DEVICE_CPU>()(npw_k,
+    set_real_to_recip_output_op<double, base_device::DEVICE_CPU>()(npw_k,
                                                                    this->nxyz,
                                                                    add,
                                                                    factor,
                                                                    this->ig2ixyz_k_cpu.data() + startig,
                                                                    auxr,
                                                                    out);
 }
-template <typename FPTYPE>
-void PW_Basis_K::recip2real_dsp(const std::complex<FPTYPE>* in,
-                                std::complex<FPTYPE>* out,
+template <>
+void PW_Basis_K::recip2real_dsp(const std::complex<double>* in,
+                                std::complex<double>* out,
                                 const int ik,
                                 const bool add,
-                                const FPTYPE factor) const
+                                const double factor) const
 {
     assert(this->gamma_only == false);
     const base_device::DEVICE_CPU* ctx;
@@ -128,16 +146,16 @@ void PW_Basis_K::convolution(const base_device::DEVICE_CPU* ctx,
     ModuleBase::timer::tick(this->classname, "convolution");
 }
 
-// template void PW_Basis_K::real2recip_dsp<float>(const std::complex<float>* in,
-//                                             std::complex<float>* out,
-//                                             const int ik,
-//                                             const bool add,
-//                                             const float factor) const; // in:(nplane,nx*ny)  ; out(nz, ns)
-// template void PW_Basis_K::recip2real_dsp<float>(const std::complex<float>* in,
-//                                             std::complex<float>* out,
-//                                             const int ik,
-//                                             const bool add,
-//                                             const float factor) const; // in:(nz, ns)  ; out(nplane,nx*ny)
+template void PW_Basis_K::real2recip_dsp<float>(const std::complex<float>* in,
+                                            std::complex<float>* out,
+                                            const int ik,
+                                            const bool add,
+                                            const float factor) const; // in:(nplane,nx*ny)  ; out(nz, ns)
+template void PW_Basis_K::recip2real_dsp<float>(const std::complex<float>* in,
+                                            std::complex<float>* out,
+                                            const int ik,
+                                            const bool add,
+                                            const float factor) const; // in:(nz, ns)  ; out(nplane,nx*ny)
 
 template void PW_Basis_K::real2recip_dsp<double>(const std::complex<double>* in,
                                                  std::complex<double>* out,
 
@@ -26,7 +26,6 @@
 #include "module_io/to_wannier90_lcao.h"
 #include "module_io/to_wannier90_lcao_in_pw.h"
 #include "module_io/write_HS.h"
-#include "module_io/write_dmr.h"
 #include "module_io/write_elecstat_pot.h"
 #include "module_io/write_istate_info.h"
 #include "module_io/write_proj_band_lcao.h"
Original file line number	Diff line number	Diff line change
`@@ -50,16 +50,22 @@ void FFT_Bundle::initfft(int nx_in,`
`50`	`50`	`if (this->precision == "single" \|\| this->precision == "mixing")`
`51`	`51`	`{`
`52`	`52`	`float_flag = true;`
	`53`	`+ if (this->precision == "mixing")`
	`54`	`+ {`
	`55`	`+ double_flag = true;`
	`56`	`+ }`
`53`	`57`	`#if not defined(__ENABLE_FLOAT_FFTW)`
`54`	`58`	`if (this->device == "cpu")`
`55`	`59`	`{`
`56`	`60`	`ModuleBase::WARNING_QUIT("FFT_Bundle", "Please enable float fftw in the cmake to use float fft");`
`57`	`61`	`}`
`58`	`62`	`#endif`
`59`	`63`	`}`
`60`		`- if (this->precision == "double" \|\| this->precision == "mixing")`
	`64`	`+ else if (this->precision == "double")`
`61`	`65`	`{`
`62`	`66`	`double_flag = true;`
	`67`	`+ }else{`
	`68`	`+ ModuleBase::WARNING_QUIT("FFT_Bundle", "Please set the precision to single or double or mixing");`
`63`	`69`	`}`
`64`	`70`	`#if defined(__DSP)`
`65`	`71`	`if (device == "dsp")`
`@@ -70,24 +76,23 @@ void FFT_Bundle::initfft(int nx_in,`
`70`	`76`	`}`
`71`	`77`	`fft_double = make_unique<FFT_DSP<double>>();`
`72`	`78`	`fft_double->initfft(nx_in, ny_in, nz_in);`
`73`		`- }`
	`79`	`+ }else`
`74`	`80`	`#endif`
`75`	`81`	`if (device == "cpu")`
`76`	`82`	`{`
`77`		`- fft_float = make_unique<FFT_CPU<float>>(this->fft_mode);`
`78`		`- fft_double = make_unique<FFT_CPU<double>>(this->fft_mode);`
`79`	`83`	`if (float_flag)`
`80`	`84`	`{`
	`85`	`+ fft_float = make_unique<FFT_CPU<float>>(this->fft_mode);`
`81`	`86`	`fft_float`
`82`	`87`	`->initfft(nx_in, ny_in, nz_in, lixy_in, rixy_in, ns_in, nplane_in, nproc_in, gamma_only_in, xprime_in);`
`83`	`88`	`}`
`84`	`89`	`if (double_flag)`
`85`	`90`	`{`
	`91`	`+ fft_double = make_unique<FFT_CPU<double>>(this->fft_mode);`
`86`	`92`	`fft_double`
`87`	`93`	`->initfft(nx_in, ny_in, nz_in, lixy_in, rixy_in, ns_in, nplane_in, nproc_in, gamma_only_in, xprime_in);`
`88`	`94`	`}`
`89`		`- }`
`90`		`- if (device == "gpu")`
	`95`	`+ }else if (device == "gpu")`
`91`	`96`	`{`
`92`	`97`	`#if defined(__ROCM)`
`93`	`98`	`fft_float = make_unique<FFT_ROCM<float>>();`
`@@ -100,6 +105,8 @@ void FFT_Bundle::initfft(int nx_in,`
`100`	`105`	`fft_double = make_unique<FFT_CUDA<double>>();`
`101`	`106`	`fft_double->initfft(nx_in, ny_in, nz_in);`
`102`	`107`	`#endif`
	`108`	`+ }else{`
	`109`	`+ ModuleBase::WARNING_QUIT("FFT_Bundle", "Please set the device to cpu or gpu or dsp");`
`103`	`110`	`}`
`104`	`111`	`}`
`105`	`112`
Original file line number	Diff line number	Diff line change
`@@ -63,7 +63,7 @@ void FFT_DSP<double>::setupFFT()`
`63`	`63`	`template <>`
`64`	`64`	`void FFT_DSP<double>::resource_handler(const int flag) const`
`65`	`65`	`{`
`66`		`- if (flag==0)`
	`66`	`+ if (flag == 0)`
`67`	`67`	`{`
`68`	`68`	`hthread_barrier_destroy(b_id);`
`69`	`69`	`hthread_group_destroy(thread_id_for);`
`@@ -76,6 +76,8 @@ void FFT_DSP<double>::resource_handler(const int flag) const`
`76`	`76`	`b_id = hthread_barrier_create(cluster_id);`
`77`	`77`	`args_for[0] = b_id;`
`78`	`78`	`args_back[0] = b_id;`
	`79`	`+ }else{`
	`80`	`+ ModuleBase::WARNING_QUIT("FFT_DSP", "Error use of fft resource handle");`
`79`	`81`	`}`
`80`	`82`	`}`
`81`	`83`	`template <>`
Original file line number	Diff line number	Diff line change
`@@ -187,7 +187,7 @@ class PW_Basis_K : public PW_Basis`
`187`	`187`	`const typename GetTypeReal<TK>::type factor = 1.0) const`
`188`	`188`	`{`
`189`	`189`	`#if defined(__DSP)`
`190`		`- this->recip2real_dsp(in, out, ik, add, factor);`
	`190`	`+ this->real2recip_dsp(in, out, ik, add, factor);`
`191`	`191`	`#else`
`192`	`192`	`this->real2recip(in,out,ik,add,factor);`
`193`	`193`	`#endif`