deepmodeling
diff --git a/‎docs/advanced/acceleration/cuda.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/advanced/acceleration/cuda.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/advanced/input_files/input-main.md‎
Lines changed: 28 additions & 2 deletions b/‎docs/advanced/input_files/input-main.md‎
Lines changed: 28 additions & 2 deletions
diff --git a/‎python/pyabacus/src/py_diago_dav_subspace.hpp‎
Lines changed: 5 additions & 7 deletions b/‎python/pyabacus/src/py_diago_dav_subspace.hpp‎
Lines changed: 5 additions & 7 deletions
diff --git a/‎python/pyabacus/src/py_diago_david.hpp‎
Lines changed: 5 additions & 7 deletions b/‎python/pyabacus/src/py_diago_david.hpp‎
Lines changed: 5 additions & 7 deletions
diff --git a/‎source/Makefile.Objects‎
Lines changed: 9 additions & 1 deletion b/‎source/Makefile.Objects‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎source/module_base/global_variable.cpp‎
Lines changed: 0 additions & 1 deletion b/‎source/module_base/global_variable.cpp‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎source/module_base/global_variable.h‎
Lines changed: 0 additions & 2 deletions b/‎source/module_base/global_variable.h‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎source/module_elecstate/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions b/‎source/module_elecstate/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎source/module_elecstate/elecstate.h‎
Lines changed: 8 additions & 0 deletions b/‎source/module_elecstate/elecstate.h‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎source/module_elecstate/elecstate_lcao.cpp‎
Lines changed: 2 additions & 12 deletions b/‎source/module_elecstate/elecstate_lcao.cpp‎
Lines changed: 2 additions & 12 deletions
@@ -36,7 +36,7 @@ The ABACUS program will automatically determine whether the current ELPA support
 ## Run with the GPU support by editing the INPUT script:
 
 In `INPUT` file we need to set the input parameter [device](../input_files/input-main.md#device) to `gpu`. If this parameter is not set, ABACUS will try to determine if there are available GPUs.
-- Set `ks_solver`: For the PW basis, CG, BPCG and Davidson methods are supported on GPU; set the input parameter [ks_solver](../input_files/input-main.md#ks_solver) to `cg`, `bpcg` or `dav`. For the LCAO basis, `cusolver` and `elpa` is supported on GPU.
+- Set `ks_solver`: For the PW basis, CG, BPCG and Davidson methods are supported on GPU; set the input parameter [ks_solver](../input_files/input-main.md#ks_solver) to `cg`, `bpcg` or `dav`. For the LCAO basis, `cusolver`, `cusolvermp` and `elpa` is supported on GPU.
 - **multi-card**: ABACUS allows for multi-GPU acceleration. If you have multiple GPU cards, you can run ABACUS with several MPI processes, and each process will utilize one GPU card. For example, the command `mpirun -n 2 abacus` will by default launch two GPUs for computation. If you only have one card, this command will only start one GPU. 
 
 ## Examples
 
@@ -161,6 +161,7 @@
     - [nbands\_istate](#nbands_istate)
     - [bands\_to\_print](#bands_to_print)
     - [if\_separate\_k](#if_separate_k)
+    - [out\_elf](#out_elf)
   - [Density of states](#density-of-states)
     - [dos\_edelta\_ev](#dos_edelta_ev)
     - [dos\_sigma](#dos_sigma)
@@ -932,6 +933,8 @@ calculations.
   - **genelpa**: This method should be used if you choose localized orbitals.
   - **scalapack_gvx**: Scalapack can also be used for localized orbitals.
   - **cusolver**: This method needs building with CUDA and at least one gpu is available.
+  - **cusolvermp**: This method supports multi-GPU acceleration and needs building with CUDA。 Note that when using cusolvermp, you should set the number of MPI processes to be equal to the number of GPUs.
+  - **elpa**: The ELPA solver supports both CPU and GPU. By setting the `device` to GPU, you can launch the ELPA solver with GPU acceleration (provided that you have installed a GPU-supported version of ELPA, which requires you to manually compile and install ELPA, and the ABACUS should be compiled with -DUSE_ELPA=ON and -DUSE_CUDA=ON). The ELPA solver also supports multi-GPU acceleration.
 
   If you set ks_solver=`genelpa` for basis_type=`pw`, the program will be stopped with an error message:
 
@@ -940,7 +943,13 @@ calculations.
   ```
 
   Then the user has to correct the input file and restart the calculation.
-- **Default**: cg (plane-wave basis), or genelpa (localized atomic orbital basis, if compiling option `USE_ELPA` has been set),lapack (localized atomic orbital basis, if compiling option `ENABLE_MPI` has not been set), scalapack_gvx, (localized atomic orbital basis, if compiling option `USE_ELPA` has not been set and if compiling option `ENABLE_MPI` has been set)
+- **Default**: 
+  - **PW basis**: cg.
+  - **LCAO basis**:
+    - genelpa (if compiling option `USE_ELPA` has been set)
+    - lapack (if compiling option `ENABLE_MPI` has not been set)
+    - scalapack_gvx (if compiling option `USE_ELPA` has not been set and compiling option `ENABLE_MPI` has been set)
+    - cusolver (if compiling option `USE_CUDA` has been set)
 
 ### nbands
 
@@ -1521,7 +1530,7 @@ These variables are used to control the output of properties.
 - **Type**: Integer \[Integer\](optional)
 - **Description**: 
   The first integer controls whether to output the charge density on real space grids:
-  - 1. Output the charge density (in Bohr^-3) on real space grids into the density files in the folder `OUT.${suffix}`. The files are named as:
+  - 1: Output the charge density (in Bohr^-3) on real space grids into the density files in the folder `OUT.${suffix}`. The files are named as:
     - nspin = 1: SPIN1_CHG.cube;
     - nspin = 2: SPIN1_CHG.cube, and SPIN2_CHG.cube;
     - nspin = 4: SPIN1_CHG.cube, SPIN2_CHG.cube, SPIN3_CHG.cube, and SPIN4_CHG.cube.
@@ -1801,6 +1810,23 @@ The band (KS orbital) energy for each (k-point, spin, band) will be printed in t
 - **Description**: Specifies whether to write the partial charge densities for all k-points to individual files or merge them. **Warning**: Enabling symmetry may produce incorrect results due to incorrect k-point weights. Therefore, when calculating partial charge densities, it is strongly recommended to set `symmetry = -1`.
 - **Default**: false
 
+### out_elf
+
+- **Type**: Integer \[Integer\](optional)
+- **Availability**: Only for Kohn-Sham DFT and Orbital Free DFT.
+- **Description**: Whether to output the electron localization function (ELF) in the folder `OUT.${suffix}`. The files are named as 
+    - nspin = 1:
+      - ELF.cube: ${\rm{ELF}} = \frac{1}{1+\chi^2}$, $\chi = \frac{\frac{1}{2}\sum_{i}{f_i |\nabla\psi_{i}|^2} - \frac{|\nabla\rho|^2}{8\rho}}{\frac{3}{10}(3\pi^2)^{2/3}\rho^{5/3}}$;
+    - nspin = 2:
+      - ELF_SPIN1.cube, ELF_SPIN2.cube: ${\rm{ELF}}_\sigma = \frac{1}{1+\chi_\sigma^2}$, $\chi_\sigma = \frac{\frac{1}{2}\sum_{i}{f_i |\nabla\psi_{i,\sigma}|^2} - \frac{|\nabla\rho_\sigma|^2}{8\rho_\sigma}}{\frac{3}{10}(6\pi^2)^{2/3}\rho_\sigma^{5/3}}$;
+      - ELF.cube: ${\rm{ELF}} = \frac{1}{1+\chi^2}$, $\chi = \frac{\frac{1}{2}\sum_{i,\sigma}{f_i |\nabla\psi_{i,\sigma}|^2} - \sum_{\sigma}{\frac{|\nabla\rho_\sigma|^2}{8\rho_\sigma}}}{\sum_{\sigma}{\frac{3}{10}(6\pi^2)^{2/3}\rho_\sigma^{5/3}}}$;
+
+  The second integer controls the precision of the kinetic energy density output, if not given, will use `3` as default. For purpose restarting from this file and other high-precision involved calculation, recommend to use `10`.
+
+  ---
+  In molecular dynamics calculations, the output frequency is controlled by [out_interval](#out_interval).
+- **Default**: 0 3
+
 [back to top](#full-list-of-input-keywords)
 
 ## Density of states
 
@@ -113,23 +113,21 @@ class PyDiagoDavSubspace
         auto hpsi_func = [mm_op] (
             std::complex<double> *psi_in,
             std::complex<double> *hpsi_out, 
-            const int nband_in,
-            const int nbasis_in, 
-            const int band_index1,
-            const int band_index2
+            const int ld_psi,
+            const int nvec
         ) {
             // Note: numpy's py::array_t is row-major, but
             //       our raw pointer-array is column-major
-            py::array_t<std::complex<double>, py::array::f_style> psi({nbasis_in, band_index2 - band_index1 + 1});
+            py::array_t<std::complex<double>, py::array::f_style> psi({ld_psi, nvec});
             py::buffer_info psi_buf = psi.request();
             std::complex<double>* psi_ptr = static_cast<std::complex<double>*>(psi_buf.ptr);
-            std::copy(psi_in + band_index1 * nbasis_in, psi_in + (band_index2 + 1) * nbasis_in, psi_ptr);
+            std::copy(psi_in, psi_in + nvec * ld_psi, psi_ptr);
 
             py::array_t<std::complex<double>, py::array::f_style> hpsi = mm_op(psi);
 
             py::buffer_info hpsi_buf = hpsi.request();
             std::complex<double>* hpsi_ptr = static_cast<std::complex<double>*>(hpsi_buf.ptr);
-            std::copy(hpsi_ptr, hpsi_ptr + (band_index2 - band_index1 + 1) * nbasis_in, hpsi_out);
+            std::copy(hpsi_ptr, hpsi_ptr + nvec * ld_psi, hpsi_out);
         };
 
         obj = std::make_unique<hsolver::Diago_DavSubspace<std::complex<double>, base_device::DEVICE_CPU>>(
 
@@ -111,23 +111,21 @@ class PyDiagoDavid
         auto hpsi_func = [mm_op] (
             std::complex<double> *psi_in,
             std::complex<double> *hpsi_out, 
-            const int nband_in, 
-            const int nbasis_in, 
-            const int band_index1, 
-            const int band_index2
+            const int ld_psi, 
+            const int nvec
         ) {
             // Note: numpy's py::array_t is row-major, but
             //       our raw pointer-array is column-major
-            py::array_t<std::complex<double>, py::array::f_style> psi({nbasis_in, band_index2 - band_index1 + 1});
+            py::array_t<std::complex<double>, py::array::f_style> psi({ld_psi, nvec});
             py::buffer_info psi_buf = psi.request();
             std::complex<double>* psi_ptr = static_cast<std::complex<double>*>(psi_buf.ptr);
-            std::copy(psi_in + band_index1 * nbasis_in, psi_in + (band_index2 + 1) * nbasis_in, psi_ptr);
+            std::copy(psi_in, psi_in + nvec * ld_psi, psi_ptr);
 
             py::array_t<std::complex<double>, py::array::f_style> hpsi = mm_op(psi);
 
             py::buffer_info hpsi_buf = hpsi.request();
             std::complex<double>* hpsi_ptr = static_cast<std::complex<double>*>(hpsi_buf.ptr);
-            std::copy(hpsi_ptr, hpsi_ptr + (band_index2 - band_index1 + 1) * nbasis_in, hpsi_out);
+            std::copy(hpsi_ptr, hpsi_ptr + nvec * ld_psi, hpsi_out);
         };
 
         auto spsi_func = [this] (
 
@@ -213,6 +213,7 @@ OBJS_ELECSTAT=elecstate.o\
     elecstate_print.o\
     elecstate_pw.o\
     elecstate_pw_sdft.o\
+    elecstate_pw_cal_tau.o\
     elecstate_op.o\
     efield.o\
     gatefield.o\
@@ -226,6 +227,7 @@ OBJS_ELECSTAT=elecstate.o\
 
 OBJS_ELECSTAT_LCAO=elecstate_lcao.o\
       elecstate_lcao_tddft.o\
+      elecstate_lcao_cal_tau.o\
       density_matrix.o\
       cal_dm_psi.o\
 
@@ -454,7 +456,12 @@ OBJS_XC=xc_functional.o\
     xc_functional_gradcorr.o\
     xc_functional_wrapper_xc.o\
     xc_functional_wrapper_gcxc.o\
-    xc_functional_wrapper_tauxc.o\
+    xc_functional_libxc.o\
+    xc_functional_libxc_tools.o\
+    xc_functional_libxc_vxc.o\
+    xc_functional_libxc_wrapper_xc.o\
+    xc_functional_libxc_wrapper_gcxc.o\
+    xc_functional_libxc_wrapper_tauxc.o\
     xc_funct_exch_lda.o\
     xc_funct_corr_lda.o\
     xc_funct_exch_gga.o\
@@ -496,6 +503,7 @@ OBJS_IO=input_conv.o\
     winput.o\
     write_cube.o\
     write_elecstat_pot.o\
+    write_elf.o\
     write_dipole.o\
     td_current_io.o\
     write_wfc_r.o\
 
@@ -21,7 +21,6 @@ namespace GlobalV
 int NBANDS = 0;
 int NLOCAL = 0;        // total number of local basis.
 
-int NSPIN = 1;       // LDA
 double nupdown = 0.0;
 
 bool use_uspp = false;
 
@@ -20,8 +20,6 @@ namespace GlobalV
 extern int NBANDS;
 extern int NLOCAL;        // 1.1 // mohan add 2009-05-29
 
-
-extern int NSPIN;       // 7
 extern double nupdown;
 extern bool use_uspp;
 
 
@@ -7,6 +7,7 @@ list(APPEND objects
     elecstate_print.cpp
     elecstate_pw.cpp
     elecstate_pw_sdft.cpp
+    elecstate_pw_cal_tau.cpp
     potentials/gatefield.cpp
     potentials/efield.cpp
     potentials/H_Hartree_pw.cpp
@@ -31,6 +32,7 @@ if(ENABLE_LCAO)
   list(APPEND objects
       elecstate_lcao.cpp
       elecstate_lcao_tddft.cpp
+      elecstate_lcao_cal_tau.cpp
       potentials/H_TDDFT_pw.cpp
       module_dm/density_matrix.cpp
       module_dm/cal_dm_psi.cpp
 
@@ -53,6 +53,14 @@ class ElecState
     }
     // virtual void updateRhoK(const psi::Psi<std::complex<double>> &psi) = 0;
     // virtual void updateRhoK(const psi::Psi<double> &psi)=0
+    virtual void cal_tau(const psi::Psi<std::complex<double>>& psi)
+    {
+        return;
+    }
+    virtual void cal_tau(const psi::Psi<double>& psi)
+    {
+        return;
+    }
 
     // update charge density for next scf step
     // in this function, 1. input rho for construct Hamilt and 2. calculated rho from Psi will mix to 3. new charge
 
@@ -67,12 +67,7 @@ void ElecStateLCAO<std::complex<double>>::psiToRho(const psi::Psi<std::complex<d
 
     if (XC_Functional::get_func_type() == 3 || XC_Functional::get_func_type() == 5)
     {
-        for (int is = 0; is < PARAM.inp.nspin; is++)
-        {
-            ModuleBase::GlobalFunc::ZEROS(this->charge->kin_r[is], this->charge->nrxx);
-        }
-        Gint_inout inout1(this->charge->kin_r, Gint_Tools::job_type::tau);
-        this->gint_k->cal_gint(&inout1);
+        this->cal_tau(psi);
     }
 
     this->charge->renormalize_rho();
@@ -124,12 +119,7 @@ void ElecStateLCAO<double>::psiToRho(const psi::Psi<double>& psi)
 
     if (XC_Functional::get_func_type() == 3 || XC_Functional::get_func_type() == 5)
     {
-        for (int is = 0; is < PARAM.inp.nspin; is++)
-        {
-            ModuleBase::GlobalFunc::ZEROS(this->charge->kin_r[is], this->charge->nrxx);
-        }
-        Gint_inout inout1(this->charge->kin_r, Gint_Tools::job_type::tau);
-        this->gint_gamma->cal_gint(&inout1);
+        this->cal_tau(psi);
     }
 
     this->charge->renormalize_rho();
Original file line number	Diff line number	Diff line change
`@@ -67,12 +67,7 @@ void ElecStateLCAO<std::complex<double>>::psiToRho(const psi::Psi<std::complex<d`
`67`	`67`
`68`	`68`	`if (XC_Functional::get_func_type() == 3 \|\| XC_Functional::get_func_type() == 5)`
`69`	`69`	`{`
`70`		`- for (int is = 0; is < PARAM.inp.nspin; is++)`
`71`		`- {`
`72`		`- ModuleBase::GlobalFunc::ZEROS(this->charge->kin_r[is], this->charge->nrxx);`
`73`		`- }`
`74`		`- Gint_inout inout1(this->charge->kin_r, Gint_Tools::job_type::tau);`
`75`		`- this->gint_k->cal_gint(&inout1);`
	`70`	`+ this->cal_tau(psi);`
`76`	`71`	`}`
`77`	`72`
`78`	`73`	`this->charge->renormalize_rho();`
`@@ -124,12 +119,7 @@ void ElecStateLCAO<double>::psiToRho(const psi::Psi<double>& psi)`
`124`	`119`
`125`	`120`	`if (XC_Functional::get_func_type() == 3 \|\| XC_Functional::get_func_type() == 5)`
`126`	`121`	`{`
`127`		`- for (int is = 0; is < PARAM.inp.nspin; is++)`
`128`		`- {`
`129`		`- ModuleBase::GlobalFunc::ZEROS(this->charge->kin_r[is], this->charge->nrxx);`
`130`		`- }`
`131`		`- Gint_inout inout1(this->charge->kin_r, Gint_Tools::job_type::tau);`
`132`		`- this->gint_gamma->cal_gint(&inout1);`
	`122`	`+ this->cal_tau(psi);`
`133`	`123`	`}`
`134`	`124`
`135`	`125`	`this->charge->renormalize_rho();`