deepmodeling
diff --git a/‎CMakeLists.txt‎
Lines changed: 37 additions & 28 deletions b/‎CMakeLists.txt‎
Lines changed: 37 additions & 28 deletions
diff --git a/‎docs/advanced/acceleration/cuda.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/advanced/acceleration/cuda.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/advanced/input_files/input-main.md‎
Lines changed: 50 additions & 21 deletions b/‎docs/advanced/input_files/input-main.md‎
Lines changed: 50 additions & 21 deletions
diff --git a/‎docs/quick_start/output.md‎
Lines changed: 7 additions & 1 deletion b/‎docs/quick_start/output.md‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎source/Makefile.Objects‎
Lines changed: 6 additions & 2 deletions b/‎source/Makefile.Objects‎
Lines changed: 6 additions & 2 deletions
@@ -10,39 +10,45 @@ project(
   HOMEPAGE_URL "https://github.com/deepmodeling/abacus-develop"
   LANGUAGES CXX)
 
-option(ENABLE_LCAO "Enable LCAO calculation." ON)
-option(ENABLE_DEEPKS "Enable DeePKS functionality" OFF)
-option(ENABLE_MLKEDF "Enable Machine Learning based KEDF for OFDFT" OFF)
-option(ENABLE_LIBXC "Enable LibXC functionality" OFF)
-option(USE_CUDA "Enable support to CUDA for ABACUS." OFF)
-option(ENABLE_FLOAT_FFTW "Enable support to single precision FFTW library." OFF)
-option(USE_ROCM "Enable support to ROCm." OFF)
-option(USE_OPENMP "Enable OpenMP in ABACUS." ON)
+option(ENABLE_MPI "Enable MPI" ON)
+option(USE_OPENMP "Enable OpenMP" ON)
+option(USE_CUDA "Enable CUDA" OFF)
+option(USE_CUDA_MPI "Enable CUDA-aware MPI" OFF)
+option(USE_CUDA_ON_DCU "Enable CUDA on DCU" OFF)
+option(USE_ROCM "Enable ROCm" OFF)
+option(USE_DSP "Enable DSP" OFF)
+
+option(USE_ABACUS_LIBM "Build libmath from source to speed up" OFF)
+option(ENABLE_LIBXC "Enable using the LibXC package" OFF)
+option(ENABLE_FLOAT_FFTW "Enable using single-precision FFTW library." OFF)
+option(ENABLE_DEEPKS "Enable the DeePKS algorithm" OFF)
+option(ENABLE_MLKEDF "Enable the Machine-Learning-based KEDF for OFDFT" OFF)
+
+option(ENABLE_LCAO "Enable LCAO algorithm" ON)
+option(USE_ELPA "Enable ELPA for LCAO" ON)
+option(ENABLE_LIBRI "Enable LibRI for hybrid functional" OFF)
+option(ENABLE_LIBCOMM "Enable LibComm" OFF)
+option(ENABLE_PEXSI "Enable PEXSI for LCAO" OFF)
+
+option(BUILD_TESTING "Build unittests" OFF)
+option(DEBUG_INFO "Print message to debug" OFF)
 option(ENABLE_ASAN "Enable AddressSanitizer" OFF)
-option(BUILD_TESTING "Build ABACUS unit tests" OFF)
-option(INFO "Enable gathering of math library information" OFF)
-option(ENABLE_COVERAGE "Enable coverage build." OFF)
-option(ENABLE_LIBRI "Enable EXX with LibRI." OFF)
-option(ENABLE_LIBCOMM "Enable communicate with LibComm." OFF)
-option(ENABLE_PAW "Enable PAW calculation" OFF)
-option(ENABLE_MPI "Enable compilation with or without MPI." ON)
-option(USE_ELPA "Enable ELPA" ON)
-option(USE_ABACUS_LIBM "Build libmath from source to speed up." OFF)
+option(INFO "Enable gathering math library information" OFF)
+option(ENABLE_COVERAGE "Enable coverage build" OFF)
 option(GIT_SUBMODULE "Check submodules during build" ON)
-option(DEBUG_INFO "Print message for developers to debug." OFF)
+
+option(ENABLE_PAW "Enable PAW method" OFF)
+
 # Do not enable it if generated code will run on different CPUs
 option(ENABLE_NATIVE_OPTIMIZATION
        "Enable compilation optimization for the native machine's CPU type" OFF)
+
 option(COMMIT_INFO "Print commit information in log" ON)
-option(ENABLE_FFT_TWO_CENTER "Enable FFT-based two-center integral method." ON)
-option(ENABLE_GOOGLEBENCH "Enable GOOGLE-benchmark usage." OFF)
-option(ENABLE_RAPIDJSON "Enable rapid-json usage." OFF)
-option(ENABLE_CNPY "Enable cnpy usage." OFF)
-option(ENABLE_PEXSI "Enable support for PEXSI." OFF)
-option(ENABLE_CUSOLVERMP "Enable cusolvermp." OFF)
-option(USE_DSP "Enable DSP usage." OFF)
-option(USE_CUDA_ON_DCU "Enable CUDA on DCU" OFF)
-option(USE_CUDA_MPI "Enable CUDA-aware MPI" OFF)
+option(ENABLE_FFT_TWO_CENTER "Enable FFT-based two-center integral method" ON)
+option(ENABLE_GOOGLEBENCH "Enable GOOGLE-benchmark usage" OFF)
+option(ENABLE_RAPIDJSON "Enable rapid-json usage" OFF)
+option(ENABLE_CNPY "Enable cnpy usage" OFF)
+option(ENABLE_CUSOLVERMP "Enable cusolvermp" OFF)
 
 # enable json support
 if(ENABLE_RAPIDJSON)
@@ -108,11 +114,13 @@ You can install Git first and reinstall abacus.")
   endif()
 endif()
 
+# Serial version of ABACUS will not use ELPA
 if(NOT ENABLE_MPI)
   set(USE_ELPA OFF)
   set(ENABLE_DEEPKS OFF)
 endif()
 
+# Different exe files of ABACUS
 if(ENABLE_LCAO AND ENABLE_MPI)
   set(ABACUS_BIN_NAME abacus)
 elseif(NOT ENABLE_LCAO AND ENABLE_MPI)
@@ -123,6 +131,7 @@ elseif(ENABLE_LCAO AND NOT ENABLE_MPI)
   set(ABACUS_BIN_NAME abacus_serial)
 endif()
 
+# Use DSP hardware
 if (USE_DSP)
   set(USE_ELPA OFF)
   set(ENABLE_LCAO OFF)
@@ -206,7 +215,7 @@ if(CMAKE_CXX_COMPILER_ID MATCHES Intel)
   add_compile_options(-fp-model=strict)
   set(USE_ABACUS_LIBM OFF) # Force turn off USE_ABACUS_LIBM on Intel Compiler
   set(CMAKE_CXX_FLAGS
-      "${CMAKE_CXX_FLAGS} -Wno-write-strings -Wno-tautological-constant-compare"
+      "${CMAKE_CXX_FLAGS} -Wno-write-strings "
   )
 endif()
 
 
@@ -46,6 +46,7 @@ We provides [examples](https://github.com/deepmodeling/abacus-develop/tree/devel
 PW basis:
 - Only k point parallelization is supported, so the input keyword `kpar` will be set to match the number of MPI tasks automatically.
 - By default, CUDA architectures 60, 70, 75, 80, 86, and 89 are compiled (if supported). It can be overriden using the CMake variable [`CMAKE_CUDA_ARCHITECTURES`](https://cmake.org/cmake/help/latest/variable/CMAKE_CUDA_ARCHITECTURES.html) or the environmental variable [`CUDAARCHS`](https://cmake.org/cmake/help/latest/envvar/CUDAARCHS.html).
+
 LCAO basis:
 - Unless there is a specific reason, avoid using multiple GPUs, as it can be slower than using a single GPU. This is because the generalized eigenvalue solution of the LCAO basis set will incur additional communication overhead when calculated on multiple cards. When the memory limit of a GPU card makes it insufficient to complete the task, it is recommended to use multiple cards for calculation.
 - When using elpa on GPUs, some ELPA internal logs will be output.
@@ -52,7 +52,6 @@
     - [lcao\_dr](#lcao_dr)
     - [lcao\_rmax](#lcao_rmax)
     - [search\_radius](#search_radius)
-    - [search\_pbc](#search_pbc)
     - [bx, by, bz](#bx-by-bz)
     - [elpa\_num\_thread](#elpa_num_thread)
     - [num\_stream](#num_stream)
@@ -140,6 +139,7 @@
     - [out\_wfc\_r](#out_wfc_r)
     - [out\_wfc\_lcao](#out_wfc_lcao)
     - [out\_dos](#out_dos)
+    - [out\_ldos](#out_ldos)
     - [out\_band](#out_band)
     - [out\_proj\_band](#out_proj_band)
     - [out\_stru](#out_stru)
@@ -152,8 +152,10 @@
     - [out\_mat\_hs2](#out_mat_hs2)
     - [out\_mat\_t](#out_mat_t)
     - [out\_mat\_dh](#out_mat_dh)
+    - [out\_mat\_ds](#out_mat_ds)
     - [out\_mat\_xc](#out_mat_xc)
     - [out\_mat\_xc2](#out_mat_xc2)
+    - [out\_mat\_l](#out_mat_l)
     - [out\_eband\_terms](#out_eband_terms)
     - [out\_hr\_npz/out\_dm\_npz](#out_hr_npzout_dm_npz)
     - [dm\_to\_rho](#dm_to_rho)
@@ -174,6 +176,7 @@
     - [dos\_emin\_ev](#dos_emin_ev)
     - [dos\_emax\_ev](#dos_emax_ev)
     - [dos\_nche](#dos_nche)
+    - [stm\_bias](#stm_bias)
   - [NAOs](#naos)
     - [bessel\_nao\_ecut](#bessel_nao_ecut)
     - [bessel\_nao\_tolerence](#bessel_nao_tolerence)
@@ -920,12 +923,6 @@ These variables are used to control the numerical atomic orbitals related parame
 - **Default**: -1
 - **Unit**: Bohr
 
-### search_pbc
-
-- **Type**: Boolean
-- **Description**: If True, periodic images will be included in searching for the neighbouring atoms. If False, periodic images will be ignored.
-- **Default**: True
-
 ### bx, by, bz
 
 - **Type**: Integer
@@ -1702,10 +1699,16 @@ These variables are used to control the output of properties.
 - **Description**: Whether to output the density of states (DOS). For more information, refer to the [dos.md](../elec_properties/dos.md).
   - 0: no output
   - 1: output the density of states (DOS)
-  - 2: 
-    - lcao-only: output the density of states (DOS) and the projected density of states (PDOS)
+  - 2: (LCAO) output the density of states (DOS) and the projected density of states (PDOS)
+  - 3: output the Fermi surface file (fermi.bxsf) in BXSF format that can be visualized by XCrySDen 
 - **Default**: 0
 
+### out_ldos
+
+- **Type**: Boolean
+- **Description**: Whether to output the local density of states for given bias in cube file format, which is controlled by [stm_bias](#stm_bias). 
+- **Default**: False
+
 ### out_band
 
 - **Type**: Boolean \[Integer\](optional)
@@ -1792,6 +1795,13 @@ These variables are used to control the output of properties.
 - **Description**: Whether to print files containing the derivatives of the Hamiltonian matrix (in Ry/Bohr). The format will be the same as the Hamiltonian matrix $H(R)$ and overlap matrix $S(R)$ as mentioned in [out_mat_hs2](#out_mat_hs2). The name of the files will be `data-dHRx-sparse_SPIN0.csr` and so on. Also controled by [out_interval](#out_interval) and [out_app_flag](#out_app_flag).
 - **Default**: False
 
+### out_mat_ds
+
+- **Type**: Boolean
+- **Availability**: Numerical atomic orbital basis (not gamma-only algorithm)
+- **Description**: Whether to print files containing the derivatives of the Overlap matrix (in Ry/Bohr). The format will be the same as the Overlap matrix $dH(R)$ as mentioned in [out_mat_dh](#out_mat_dh). The name of the files will be `data-dSRx-sparse_SPIN0.csr` and so on. Also controled by [out_interval](#out_interval) and [out_app_flag](#out_app_flag). This feature can be used with `calculation get_S`.
+- **Default**: False
+
 ### out_mat_xc
 
 - **Type**: Boolean
@@ -1807,6 +1817,13 @@ The band (KS orbital) energy for each (k-point, spin, band) will be printed in t
 - **Description**: Whether to print the exchange-correlation matrices in **numerical orbital representation** (unit: Ry): $\braket{\phi_i|V_\text{xc}^\text{(semi-)local}+V_\text{exx}+V_\text{DFTU}|\phi_j}(\mathbf{R})$ in CSR format (the same format as [out_mat_hs2](../elec_properties/hs_matrix.md#out_mat_hs2)) in the directory `OUT.${suffix}`. (Note that currently DeePKS term is not included. ) The files are named `Vxc_R_spin$s`.
 - **Default**: False
 
+### out_mat_l
+
+- **Type**: Boolean [Integer\](optional)
+- **Availability**: Numerical atomic orbital (NAO) basis
+- **Description**: Whether to print the expectation value of the angular momentum operator $\hat{L}_x$, $\hat{L}_y$, and $\hat{L}_z$ in the basis of the localized atomic orbitals. The files are named `OUT.${suffix}/${suffix}_Lx.dat`, `OUT.${suffix}/${suffix}_Ly.dat`, and `OUT.${suffix}/${suffix}_Lz.dat`. The second integer controls the precision of the output.
+- **Default**: False 8
+
 ### out_eband_terms
 
 - **Type**: Boolean
@@ -1955,9 +1972,20 @@ These variables are used to control the calculation of DOS. [Detailed introducti
 ### dos_nche
 
 - **Type**: Integer
-The order of Chebyshev expansions when using Stochastic Density Functional Theory (SDFT) to calculate DOS.
+- **Description**: The order of Chebyshev expansions when using Stochastic Density Functional Theory (SDFT) to calculate DOS.
 - **Default**: 100
 
+### stm_bias
+
+- **Type**: Real Real(optional) Integer(optional)
+- **Description**: The bias voltage used to calculate local density of states to simulate scanning tunneling microscope, see details in [out_ldos](#out_ldos). When using three parameters:
+
+  - The first parameter specifies the initial bias voltage value. 
+  - The second parameter defines the voltage increment (step size between consecutive bias values). 
+  - The third parameter determines the total number of voltage points
+- **Default**: 1.0
+- **Unit**: V
+
 [back to top](#full-list-of-input-keywords)
 
 ## NAOs
@@ -2004,10 +2032,13 @@ Warning: this function is not robust enough for the current version. Please try
 
 ### deepks_out_labels
 
-- **Type**: Boolean
+- **Type**: Integer
 - **Availability**: numerical atomic orbital basis
-- **Description**: Print labels and descriptors for DeePKS training in OUT.${suffix}. The names of these files start with "deepks".
-- **Note**: In `LCAO` calculation, the path of a numerical descriptor (an `orb` file) is needed to be specified under the `NUMERICAL_DESCRIPTOR` tag in the `STRU` file. For example:
+- **Description**: Print labels and descriptors for DeePKS in OUT.${suffix}. The names of these files start with "deepks".
+  - 0 : No output.
+  - 1 : Output intermediate files needed during DeePKS training.
+  - 2 : Output target labels for label preperation. The label files are named as `deepks_<property>.npy`, where the units and formats are the same as label files `<property>.npy` required for training, except that the first dimension (`nframes`) is excluded. System structrue files are also given in `deepks_atom.npy` and `deepks_box.npy` in the unit of *Bohr*, which means `lattice_constant` should be set to 1 when training. 
+- **Note**: When `deepks_out_labels` equals **1**, the path of a numerical descriptor (an `orb` file) is needed to be specified under the `NUMERICAL_DESCRIPTOR` tag in the `STRU` file. For example:
 
   ```text
   NUMERICAL_ORBITAL
@@ -2017,8 +2048,8 @@ Warning: this function is not robust enough for the current version. Please try
   NUMERICAL_DESCRIPTOR
   jle.orb
   ```
-
-- **Default**: False
+  This is not needed when `deepks_out_labels` equals 2. 
+- **Default**: 0
 
 ### deepks_scf
 
@@ -3548,19 +3579,23 @@ These variables are used to control berry phase and wannier90 interface paramete
 - **Type**: Real
 - **Description**:
   `td_lcut1` is the lower bound of the interval in the length gauge RT-TDDFT, where $x$ is the fractional coordinate:
+
   $$
     E(x)=\begin{cases}E_0, & \mathtt{cut1}\leqslant x \leqslant \mathtt{cut2} \\-E_0\left(\dfrac{1}{\mathtt{cut1}+1-\mathtt{cut2}}-1\right), & 0 < x < \mathtt{cut1~~or~~cut2} < x < 1 \end{cases}
   $$
+
 - **Default**: 0.05
 
 ### td_lcut2
 
 - **Type**: Real
 - **Description**:
   `td_lcut2` is the upper bound of the interval in the length gauge RT-TDDFT, where $x$ is the fractional coordinate:
+
   $$
     E(x)=\begin{cases}E_0, & \mathtt{cut1}\leqslant x \leqslant \mathtt{cut2} \\-E_0\left(\dfrac{1}{\mathtt{cut1}+1-\mathtt{cut2}}-1\right), & 0 < x < \mathtt{cut1~~or~~cut2} < x < 1 \end{cases}
   $$
+
 - **Default**: 0.95
 
 ### td_gauss_freq
@@ -3858,12 +3893,6 @@ These variables are used to control berry phase and wannier90 interface paramete
   - 1: Yes.
 - **Default**: 0
 
-- **Type**: Boolean
-- **Description**: Specify whether to set the colorful output in terminal.
-  - 0: No.
-  - 1: Yes.
-- **Default**: 0
-
 ### test_skip_ewald
 
 - **Type**: Boolean
 
@@ -6,7 +6,13 @@ The following files are the central output files for ABACUS. After executing the
 
 Different from `INPUT` given by the users, `OUT.suffix/INPUT` contains all parameters in ABACUS.
 
-> **Note:** `OUT.suffix/INPUT` contain the initial default of ABACUS instead of the real parameters used in calculations. This file is stored for reproduction in case the default value is changed during development. If you want to figure out the real parameters used in calculations, you can open `OUT.suffix/runing_scf.log` and research corresponding parameter you are interested.
+> **Note:** `OUT.suffix/INPUT` contains the **actual parameters used in the calculation**, including:
+> 1. **User-specified parameters** (explicitly defined in your input file or command-line arguments, overriding default parameters).
+> 2. **System default parameters** (automatically applied when not explicitly provided by the user).
+
+
+This file ensures calculations can be fully reproduced, even if default values change in future ABACUS versions.
+Also notice that in rare cases, a small number of parameters may be dynamically reset to appropriate values during runtime.
 
 For a complete list of input parameters, please consult this [instruction](../advanced/input_files/input-main.md).
 
 
@@ -259,6 +259,7 @@ OBJS_ESOLVER=esolver.o\
     esolver_of_tool.o\
     esolver_of_interface.o\
     pw_others.o\
+    pw_setup.o\
 
 OBJS_ESOLVER_LCAO=esolver_ks_lcao.o\
       esolver_ks_lcao_tddft.o\
@@ -495,10 +496,13 @@ OBJS_IO=input_conv.o\
     bessel_basis.o\
     cal_test.o\
     write_dos_pw.o\
+    nscf_fermi_surf.o\
     nscf_band.o\
     cal_dos.o\
+    cal_pdos_gamma.o\
+    cal_pdos_multik.o\
+    cal_ldos.o\
     cif_io.o\
-    dos_nao.o\
     numerical_descriptor.o\
     numerical_basis.o\
     numerical_basis_jyjy.o\
@@ -554,13 +558,13 @@ OBJS_IO=input_conv.o\
     read_input_item_output.o\
     read_set_globalv.o\
     orb_io.o\
+    cal_pLpR.o\
 
 OBJS_IO_LCAO=cal_r_overlap_R.o\
       write_orb_info.o\
       write_dos_lcao.o\
       write_proj_band_lcao.o\
       write_istate_info.o\
-      nscf_fermi_surf.o\
       get_pchg_lcao.o\
       get_wf_lcao.o\
       io_dmk.o\