deepmodeling
diff --git a/‎CMakeLists.txt‎
Lines changed: 10 additions & 7 deletions b/‎CMakeLists.txt‎
Lines changed: 10 additions & 7 deletions
diff --git a/‎docs/advanced/input_files/input-main.md‎
Lines changed: 20 additions & 4 deletions b/‎docs/advanced/input_files/input-main.md‎
Lines changed: 20 additions & 4 deletions
diff --git a/‎docs/advanced/interface/Wannier90.md‎
Lines changed: 10 additions & 5 deletions b/‎docs/advanced/interface/Wannier90.md‎
Lines changed: 10 additions & 5 deletions
diff --git a/‎docs/advanced/opt.md‎
Lines changed: 3 additions & 1 deletion b/‎docs/advanced/opt.md‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎docs/quick_start/easy_install.md‎
Lines changed: 26 additions & 19 deletions b/‎docs/quick_start/easy_install.md‎
Lines changed: 26 additions & 19 deletions
diff --git a/‎examples/interface_wannier90/ABACUS_towannier90_lcao/INPUT-nscf‎
Lines changed: 2 additions & 2 deletions b/‎examples/interface_wannier90/ABACUS_towannier90_lcao/INPUT-nscf‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/interface_wannier90/ABACUS_towannier90_lcao_in_pw/INPUT-nscf‎
Lines changed: 1 addition & 1 deletion b/‎examples/interface_wannier90/ABACUS_towannier90_lcao_in_pw/INPUT-nscf‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/interface_wannier90/ABACUS_towannier90_pw/INPUT-nscf‎
Lines changed: 2 additions & 2 deletions b/‎examples/interface_wannier90/ABACUS_towannier90_pw/INPUT-nscf‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/lr-tddft/lcao_H2O/INPUT‎
Lines changed: 3 additions & 0 deletions b/‎examples/lr-tddft/lcao_H2O/INPUT‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎examples/lr-tddft/lcao_Si2/INPUT‎
Lines changed: 4 additions & 1 deletion b/‎examples/lr-tddft/lcao_Si2/INPUT‎
Lines changed: 4 additions & 1 deletion
@@ -40,6 +40,7 @@ option(ENABLE_CNPY "Enable cnpy usage." OFF)
 option(ENABLE_PEXSI "Enable support for PEXSI." OFF)
 option(ENABLE_CUSOLVERMP "Enable cusolvermp." OFF)
 option(USE_DSP "Enable DSP usage." OFF)
+option(USE_CUDA_ON_DCU "Enable CUDA on DCU" OFF)
 
 # enable json support
 if(ENABLE_RAPIDJSON)
@@ -126,6 +127,10 @@ if (USE_DSP)
   set(ABACUS_BIN_NAME abacus_dsp)
 endif()
 
+if (USE_CUDA_ON_DCU)
+  add_compile_definitions(__CUDA_ON_DCU)
+endif()
+
 list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
 
 if(ENABLE_COVERAGE)
@@ -421,10 +426,8 @@ else()
   include_directories(${FFTW3_INCLUDE_DIRS})
   list(APPEND math_libs FFTW3::FFTW3 LAPACK::LAPACK BLAS::BLAS)
 
-  if(ENABLE_LCAO)
-    find_package(ScaLAPACK REQUIRED)
-    list(APPEND math_libs ScaLAPACK::ScaLAPACK)
-  endif()
+  find_package(ScaLAPACK REQUIRED)
+  list(APPEND math_libs ScaLAPACK::ScaLAPACK)
 
   if(USE_OPENMP)
     list(APPEND math_libs FFTW3::FFTW3_OMP)
@@ -713,17 +716,17 @@ target_link_libraries(
   esolver
   vdw
   device
-  container)
+  container
+  dftu
+  deltaspin)
 if(ENABLE_LCAO)
   target_link_libraries(
     ${ABACUS_BIN_NAME}
     hamilt_lcao
     tddft
     orb
     gint
-    dftu
     hcontainer
-    deltaspin
     numerical_atomic_orbitals
     lr
     rdmft)
 
@@ -37,6 +37,7 @@
     - [ndx, ndy, ndz](#ndx-ndy-ndz)
     - [pw\_seed](#pw_seed)
     - [pw\_diag\_thr](#pw_diag_thr)
+    - [diago\_smooth\_ethr](#diago_smooth_ethr)
     - [pw\_diag\_nmax](#pw_diag_nmax)
     - [pw\_diag\_ndim](#pw_diag_ndim)
     - [erf\_ecut](#erf_ecut)
@@ -777,6 +778,12 @@ These variables are used to control the plane wave related parameters.
 - **Description**: Only used when you use `ks_solver = cg/dav/dav_subspace/bpcg`. It indicates the threshold for the first electronic iteration, from the second iteration the pw_diag_thr will be updated automatically. **For nscf calculations with planewave basis set, pw_diag_thr should be <= 1e-3.**
 - **Default**: 0.01
 
+### diago_smooth_ethr
+
+- **Type**: bool
+- **Description**: If `TRUE`, the smooth threshold strategy, which applies a larger threshold (10e-5) for the empty states, will be implemented in the diagonalization methods. (This strategy should not affect total energy, forces, and other ground-state properties, but computational efficiency will be improved.) If `FALSE`, the smooth threshold strategy will not be applied.
+- **Default**: false
+
 ### pw_diag_nmax
 
 - **Type**: Integer
@@ -1235,6 +1242,12 @@ Note: In new angle mixing, you should set `mixing_beta_mag >> mixing_beta`. The
 - **Description**: To determine the number of old iterations' `drho` used in slope calculations.
 - **Default**: `mixing_ndim`
 
+### sc_os_ndim
+
+- **Type**: int
+- **Description**: To determine the number of old iterations to judge oscillation, it occured,  more accurate lambda with DeltaSpin method would be calculated, only for PW base.
+- **Default**: 5
+
 ### chg_extrap
 
 - **Type**: String
@@ -1369,6 +1382,7 @@ These variables are used to control the geometry relaxation.
 - **Description**: The methods to do geometry optimization.
   - cg: using the conjugate gradient (CG) algorithm. Note that there are two implementations of the conjugate gradient (CG) method, see [relax_new](#relax_new).
   - bfgs: using the Broyden–Fletcher–Goldfarb–Shanno (BFGS) algorithm.
+  - bfgs_trad: using the traditional Broyden–Fletcher–Goldfarb–Shanno (BFGS) algorithm. 
   - cg_bfgs: using the CG method for the initial steps, and switching to BFGS method when the force convergence is smaller than [relax_cg_thr](#relax_cg_thr).
   - sd: using the steepest descent (SD) algorithm.
   - fire: the Fast Inertial Relaxation Engine method (FIRE), a kind of molecular-dynamics-based relaxation algorithm, is implemented in the molecular dynamics (MD) module. The algorithm can be used by setting [calculation](#calculation) to `md` and [md_type](#md_type) to `fire`. Also ionic velocities should be set in this case. See [fire](../md.md#fire) for more details.
@@ -2046,7 +2060,7 @@ Warning: this function is not robust enough for the current version. Please try
 - **Type**: int
 - **Availability**: numerical atomic orbital basis
 - **Description**: Include V_delta label for DeePKS training. When `deepks_out_labels` is true and `deepks_v_delta` > 0, ABACUS will output h_base.npy, v_delta.npy and h_tot.npy(h_tot=h_base+v_delta). 
-  Meanwhile, when `deepks_v_delta` equals 1, ABACUS will also output v_delta_precalc.npy, which is used to calculate V_delta during DeePKS training. However, when the number of atoms grows, the size of v_delta_precalc.npy will be very large. In this case, it's recommended to set `deepks_v_delta` as 2, and ABACUS will output psialpha.npy and grad_evdm.npy but not v_delta_precalc.npy. These two files are small and can be used to calculate v_delta_precalc in the procedure of training DeePKS.
+  Meanwhile, when `deepks_v_delta` equals 1, ABACUS will also output v_delta_precalc.npy, which is used to calculate V_delta during DeePKS training. However, when the number of atoms grows, the size of v_delta_precalc.npy will be very large. In this case, it's recommended to set `deepks_v_delta` as 2, and ABACUS will output phialpha.npy and grad_evdm.npy but not v_delta_precalc.npy. These two files are small and can be used to calculate v_delta_precalc in the procedure of training DeePKS.
 - **Default**: 0
 
 ### deepks_out_unittest
@@ -2432,10 +2446,11 @@ These variables are relevant when using hybrid functionals.
 ### exx_ccp_rmesh_times
 
 - **Type**: Real
-- **Description**: This parameter determines how many times larger the radial mesh required for calculating Columb potential is to that of atomic orbitals. For HSE, setting it to 1 is enough. But for PBE0, a much larger number must be used.
+- **Description**: This parameter determines how many times larger the radial mesh required for calculating Columb potential is to that of atomic orbitals. The value should be at least 1. Reducing this value can effectively increase the speed of self-consistent calculations using hybrid functionals.
 - **Default**:
-  - 1.5: if *[dft_functional](#dft_functional)==hse*
-  - 5: else
+  - 5: if *[dft_functional](#dft_functional)==hf/pbe0/scan0/muller/power/wp22*
+  - 1.5: if *[dft_functional](#dft_functional)==hse/cwp22*
+  - 1: else
 
 ### exx_distribute_type
 
@@ -2474,6 +2489,7 @@ These variables are relevant when using hybrid functionals.
 - **Description**:
   - True: Enforce LibRI to use `double` data type.
   - False: Enforce LibRI to use `complex` data type.
+  Setting it to True can effectively improve the speed of self-consistent calculations with hybrid functionals.
 - **Default**: depends on the [gamma_only](#gamma_only) option
   - True: if gamma_only
   - False: else
 
@@ -2,11 +2,11 @@
 
 [Wannier90](http://www.wannier.org/) is a useful package to generating the maximally-localized Wannier functions (MLWFs), which can be used to compute advanced electronic properties. Some post-processing tools (such as WannierTools, etc.) will use MLWFs for further analysis and calculations. 
 
-Currently ABACUS provides an interface to Wannier90 package. The users are assumed to be familiar with the use of Wannier90. The ABACUS-Wannier90 interface is only suitable for nspin=1 or 2, not for nspin=4 or spin-orbit coupling (SOC). 
+Currently ABACUS provides an interface to Wannier90 package. The users are assumed to be familiar with the use of Wannier90. The ABACUS-Wannier90 interface is suitable for nspin=1, 2, 4 (including lspinorb=1).
 
 To construct the MLWFs using the wave functions of ABACUS generally requires four steps. Here we use the diamond as an example which can be found in [examples/interface_wannier90/](https://github.com/abacusmodeling/abacus-develop/tree/develop/examples/interface_wannier90).
 
-1. Enter the `ABACUS_towannier90/` folder, prepare a Wannier90 input file `diamond.win`, which is the main input file for Wannier90. Then To generate `diamond.nnkp` file by running Wannier90,  which ABACUS will read later: 
+1. Enter the `ABACUS_towannier90_pw/` folder, prepare a Wannier90 input file `diamond.win`, which is the main input file for Wannier90. Then To generate `diamond.nnkp` file by running Wannier90,  which ABACUS will read later: 
 
     ```
     wannier90 -pp diamond.win
@@ -73,17 +73,22 @@ To construct the MLWFs using the wave functions of ABACUS generally requires fou
     ```
     INPUT_PARAMETERS
 
+    pseudo_dir              ../../../tests/PP_ORB
+    orbital_dir             ../../../tests/PP_ORB
     ntype                   1
     ecutwfc                 50
     nbands                  4
+    smearing_method         fixed
     calculation             nscf
     scf_nmax                50
     pw_diag_thr             1.0e-12
-    scf_thr                 1.0e-15
+    scf_thr                 1.0e-13
     init_chg                file
-    symmetry                0
+    symmetry                -1
     towannier90             1
     nnkpfile                diamond.nnkp
+    basis_type              pw
+    out_wannier_unk         0
     ```
 
     There are seven interface-related parameters in the `INPUT` file:
@@ -97,7 +102,7 @@ To construct the MLWFs using the wave functions of ABACUS generally requires fou
     - [out_wannier_unk](../input_files/input-main.md#out_wannier_unk): control whether to output the "UNK.*" file.
     - [out_wannier_wvfn_formatted](../input_files/input-main.md#out_wannier_wvfn_formatted): control what format of the Wannier function file to output, `true`: output the formatted text file; `false`: output the binary file. Note that the `wvfn_formatted` option in `*.win` file (input file of Wannier90) has to be set accordingly with this option.
 
-    Note: You need to turn off the symmetry during the entire nscf calculation.
+    **Note: You need to turn off the symmetry during the entire nscf calculation.**
 
     To setup the `KPT` file according to the `diamond.win` file, which is similar to "begin kpoints ..." in the `diamond.win` file: 
 
 
@@ -22,7 +22,9 @@ In the nested procedure mentioned above, we used CG method to perform cell relax
 
 The [BFGS method](https://en.wikipedia.org/wiki/Broyden%E2%80%93Fletcher%E2%80%93Goldfarb%E2%80%93Shanno_algorithm) is a quasi-Newton method for solving nonlinear optimization problem. It belongs to the class of quasi-Newton method where the Hessian matrix is approximated during the optimization process. If the initial point is not far from the extrema, BFGS tends to work better than gradient-based methods.
 
-In ABACUS, we implemented the BFGS method for doing fixed-cell structural relaxation.
+There is an alternative traditional BFGS method, which can be called by using the keyword 'bfgs_trad'. The bfgs_trad method is a quasi-Newton method that substitute an approximate matrix B for the Hessian matrix. The main difference between 'bfgs' and 'bfgs_trad' is that 'bfgs' updates the inverse of matrix B while 'bfgs_trad' updates matrix B and obtains the inverse of B by solving the matrix eigenvalues and taking the reciprocal of the eigenvalues. Both methods are mathematically equivalent, but in some cases, 'bfgs_trad' performs better.
+
+In ABACUS, we implemented the BFGS method for doing fixed-cell structural relaxation. Users can choose which implementation of BFGS to call by adding the 'bfgs_trad' or 'bfgs' parameter.
 
 ### SD method
 
 
@@ -1,6 +1,17 @@
 # Easy Installation
 
-This guide helps you install ABACUS with basic features. **For DeePKS, DeePMD and Libxc support, or building with `make`, please refer to [the advanced installation guide](../advanced/install.md)** after going through this page. We recommend building ABACUS with `cmake` to avoid dependency issues. We recommend compiling ABACUS(and possibly its requirements) from the source code using the latest compiler for the best performace. You can also deploy ABACUS **without building** by [Docker](#container-deployment) or [conda](#install-by-conda). Please note that ABACUS only supports Linux; for Windows users, please consider using [WSL](https://learn.microsoft.com/en-us/windows/wsl/) or docker.
+This guide helps you install ABACUS with basic features. **For DeePKS, DeePMD and Libxc support, or building with `make`, please refer to [the advanced installation guide](../advanced/install.md)** after going through this page. We recommend building ABACUS with `cmake` to avoid dependency issues. We recommend compiling ABACUS(and possibly its requirements) from the source code using the latest compiler for the best performace. You can try [toolchain](#install-requirements-by-toolchain) to install ABACUS and dependencies in a source-code compilation way with convience. You can also deploy ABACUS **without building** by [Docker](#container-deployment) or [conda](#install-by-conda). Please note that ABACUS only supports Linux; for Windows users, please consider using [WSL](https://learn.microsoft.com/en-us/windows/wsl/) or docker.
+
+## Get ABACUS source code
+
+ABACUS source code can be obtained via one of the following choices:
+
+- Clone the whole repo with git: `git clone https://github.com/deepmodeling/abacus-develop.git`
+- Clone the minimum required part of repo: `git clone https://github.com/deepmodeling/abacus-develop.git --depth=1`
+- Download the latest source code without git: `wget https://github.com/deepmodeling/abacus-develop/archive/refs/heads/develop.zip`
+- Get the source code of a stable version [here](https://github.com/deepmodeling/abacus-develop/releases)
+- If you have connection issues accessing GitHub, please try out our official [Gitee repo](https://gitee.com/deepmodeling/abacus-develop/): e.g. `git clone https://gitee.com/deepmodeling/abacus-develop.git`. This Gitee repo is updated synchronously with GitHub.
+
 
 ## Prerequisites
 
@@ -43,32 +54,22 @@ Please refer to our [guide](https://github.com/deepmodeling/abacus-develop/wiki/
 
 We offer a set of [toolchain](https://github.com/deepmodeling/abacus-develop/tree/develop/toolchain)
 scripts to compile and install all the requirements
-automatically and suitable for machine characteristic in an online or offline way.
-The toolchain can be downloaded with ABACUS repo, which is easily used and can
-have a convenient installation under HPC environment in both `GNU` or `Intel-oneAPI` toolchain.
-Sometimes, ABACUS by toolchain installation may have highly efficient performance.
-A Tutorial for using this toolchain can be accessed in [bohrium-notebook](https://nb.bohrium.dp.tech/detail/5215742477)
-
-> Notice: the toolchain is under development, please let me know if you encounter any problem in using this toolchain.
+automatically and suitable for machine characteristic in an online or offline way. 
+The toolchain can be downloaded with ABACUS repo, and users can easily compile the requirements by running *toolchain_[gnu,intel].sh* and ABACUS itself by running *build_abacus_[gnu,intel].sh* script in the toolchain directory in both `GNU` and `Intel-oneAPI` toolchain.
+Sometimes, ABACUS by toolchain installation may have better efficient performance due to the suitable compiled dependencies.
 
+Users should read the README in toolchain directory for most of the information before use, and a tutorial for using this toolchain can be accessed in [bohrium-notebook](https://nb.bohrium.dp.tech/detail/5215742477) as reference.
 
-## Get ABACUS source code
-
-Of course a copy of ABACUS source code is required, which can be obtained via one of the following choices:
+> Notice: the toolchain is under development, please let we know if you encounter any problem in using this toolchain by raising issue or contacting us.
 
-- Clone the whole repo with git: `git clone https://github.com/deepmodeling/abacus-develop.git`
-- Clone the minimum required part of repo: `git clone https://github.com/deepmodeling/abacus-develop.git --depth=1`
-- Download the latest source code without git: `wget https://github.com/deepmodeling/abacus-develop/archive/refs/heads/develop.zip`
-- Get the source code of a stable version [here](https://github.com/deepmodeling/abacus-develop/releases)
-- If you have connection issues accessing GitHub, please try out our official [Gitee repo](https://gitee.com/deepmodeling/abacus-develop/): e.g. `git clone https://gitee.com/deepmodeling/abacus-develop.git`
 
-### Update to latest release
+## Update to latest release by git
 
 Please check the [release page](https://github.com/deepmodeling/abacus-develop/releases) for the release note of a new version.
 
 It is OK to download the new source code from beginning following the previous step.
 
-To update your cloned git repo in-place:
+You can update your cloned git repo (from Github or Gitee) in-place with the following commands:
 
 ```bash
 git remote -v
@@ -78,7 +79,7 @@ git remote -v
 
 # Replace "origin" with "upstream" or the remote name corresponding to deepmodeling/abacus-develop if necessary
 git fetch origin
-git checkout v3.2.0 # Replace the tag with the latest version
+git checkout v3.8.4 # Replace the tag with the latest version
 git describe --tags # Verify if the tag has been successfully checked out
 ```
 
@@ -158,6 +159,12 @@ If ABACUS is installed into a custom directory using `CMAKE_INSTALL_PREFIX`, ple
 export PATH=/my-install-dir/:$PATH
 ```
 
+If ABACUS is installed by toolchain, there will be an environment script in the toolchain directory named as *abacus_env.sh*. You can source it to set the environment variables.
+
+```bash
+source /path/to/abacus/toolchain/abacus_env.sh
+```
+
 Please set OpenMP threads by setting environment variable:
 
 ```bash
 
@@ -10,9 +10,9 @@ scf_nmax                50
 pw_diag_thr             1.0e-12
 scf_thr                 1.0e-13
 init_chg                file
-symmetry                0
+symmetry                -1
 towannier90             1
 nnkpfile                diamond.nnkp
-basis_type		        lcao
+basis_type              lcao
 wannier_method          2
 out_wannier_unk         0
@@ -10,7 +10,7 @@ scf_nmax                50
 pw_diag_thr             1.0e-12
 scf_thr                 1.0e-13
 init_chg                file
-symmetry                0
+symmetry                -1
 towannier90             1
 nnkpfile                diamond.nnkp
 basis_type              lcao
 
@@ -11,8 +11,8 @@ scf_nmax                50
 pw_diag_thr             1.0e-12
 scf_thr                 1.0e-13
 init_chg                file
-symmetry                0
+symmetry                -1
 towannier90             1
 nnkpfile                diamond.nnkp
-basis_type		        pw
+basis_type              pw
 out_wannier_unk         0
@@ -6,6 +6,7 @@ orbital_dir                 ../../../tests/PP_ORB
 calculation             scf
 nbands                23
 symmetry               	-1
+nspin                   2 
 
 #Parameters (2.Iteration)
 ecutwfc                  60 ###Energy cutoff needs to be tested to ensure your calculation is reliable.[1]
@@ -30,6 +31,7 @@ xc_kernel lda
 lr_solver dav
 lr_thr 1e-2
 pw_diag_ndim 2
+# lr_unrestricted 1  ### use this to do TDUKS calculation for closeshell systems (openshell system will force TDUKS)
 
 esolver_type ks-lr
 out_alllog	1
@@ -39,6 +41,7 @@ out_alllog	1
 nvirt 19
 abs_wavelen_range  40 180
 abs_broadening 0.01
+abs_gauge length
 
 ### [1] Energy cutoff determines the quality of numerical quadratures in your calculations.
 ###     So it is strongly recommended to test whether your result (such as converged SCF energies) is
 
@@ -5,7 +5,8 @@ pseudo_dir              ../../../tests/PP_ORB
 orbital_dir                 ../../../tests/PP_ORB
 calculation             scf
 nbands                23
-symmetry               	0
+symmetry               	-1
+nspin                    2
 
 #Parameters (2.Iteration)
 ecutwfc                  60 ###Energy cutoff needs to be tested to ensure your calculation is reliable.[1]
@@ -37,6 +38,8 @@ out_alllog	1
 
 nvirt 19
 abs_wavelen_range  100 175
+abs_broadening 0.01 # in Ry
+abs_gauge velocity ### velocity gauge is recommended for periodic systems
 
 
 ### [1] Energy cutoff determines the quality of numerical quadratures in your calculations.