deepmodeling
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 10 additions & 7 deletions b/‎.github/workflows/test.yml‎
Lines changed: 10 additions & 7 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 10 additions & 3 deletions b/‎CMakeLists.txt‎
Lines changed: 10 additions & 3 deletions
diff --git a/‎cmake/FindMKL.cmake‎
Lines changed: 1 addition & 1 deletion b/‎cmake/FindMKL.cmake‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/advanced/input_files/input-main.md‎
Lines changed: 29 additions & 17 deletions b/‎docs/advanced/input_files/input-main.md‎
Lines changed: 29 additions & 17 deletions
diff --git a/‎docs/advanced/install.md‎
Lines changed: 2 additions & 0 deletions b/‎docs/advanced/install.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/advanced/interface/ase.md‎
Lines changed: 30 additions & 4 deletions b/‎docs/advanced/interface/ase.md‎
Lines changed: 30 additions & 4 deletions
diff --git a/‎docs/quick_start/input.md‎
Lines changed: 16 additions & 17 deletions b/‎docs/quick_start/input.md‎
Lines changed: 16 additions & 17 deletions
diff --git a/‎source/CMakeLists.txt‎
Lines changed: 7 additions & 0 deletions b/‎source/CMakeLists.txt‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎source/Makefile.Objects‎
Lines changed: 2 additions & 0 deletions b/‎source/Makefile.Objects‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎source/module_base/CMakeLists.txt‎
Lines changed: 4 additions & 1 deletion b/‎source/module_base/CMakeLists.txt‎
Lines changed: 4 additions & 1 deletion
@@ -33,13 +33,16 @@ jobs:
         run: |
           cmake -B build -DBUILD_TESTING=ON -DENABLE_DEEPKS=ON -DENABLE_MLKEDF=ON -DENABLE_LIBXC=ON -DENABLE_LIBRI=ON -DENABLE_PAW=ON -DENABLE_GOOGLEBENCH=ON -DENABLE_RAPIDJSON=ON  -DCMAKE_EXPORT_COMPILE_COMMANDS=1
 
-      - uses: pre-commit/[email protected]
-        with:
-          extra_args:
-            --from-ref ${{ github.event.pull_request.base.sha }}
-              --to-ref ${{ github.event.pull_request.head.sha }}
-        continue-on-error: true
-      - uses: pre-commit-ci/[email protected]
+# Temporarily removed because no one maintains this now.
+# And it will break the CI test workflow.
+
+#      - uses: pre-commit/[email protected]
+#        with:
+#          extra_args:
+#            --from-ref ${{ github.event.pull_request.base.sha }}
+#              --to-ref ${{ github.event.pull_request.head.sha }}
+#        continue-on-error: true
+#      - uses: pre-commit-ci/[email protected]
 
       - name: Build
         run: |
 
@@ -42,6 +42,7 @@ option(ENABLE_PEXSI "Enable support for PEXSI." OFF)
 option(ENABLE_CUSOLVERMP "Enable cusolvermp." OFF)
 option(USE_DSP "Enable DSP usage." OFF)
 option(USE_CUDA_ON_DCU "Enable CUDA on DCU" OFF)
+option(USE_CUDA_MPI "Enable CUDA-aware MPI" OFF)
 
 # enable json support
 if(ENABLE_RAPIDJSON)
@@ -132,6 +133,10 @@ if (USE_CUDA_ON_DCU)
   add_compile_definitions(__CUDA_ON_DCU)
 endif()
 
+if (USE_CUDA_MPI)
+  add_compile_definitions(__CUDA_MPI)
+endif()
+
 list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
 
 if(ENABLE_COVERAGE)
@@ -257,8 +262,12 @@ if(ENABLE_MPI)
 endif()
 
 if (USE_DSP)
-  target_link_libraries(${ABACUS_BIN_NAME} ${DIR_MTBLAS_LIBRARY})
   add_compile_definitions(__DSP)
+  target_link_libraries(${ABACUS_BIN_NAME} ${OMPI_LIBRARY1})
+  include_directories(${MTBLAS_FFT_DIR}/libmtblas/include)
+  include_directories(${MT_HOST_DIR}/include)
+  target_link_libraries(${ABACUS_BIN_NAME} ${MT_HOST_DIR}/hthreads/lib/libhthread_device.a)
+  target_link_libraries(${ABACUS_BIN_NAME} ${MT_HOST_DIR}/hthreads/lib/libhthread_host.a)
 endif()
 
 find_package(Threads REQUIRED)
@@ -429,10 +438,8 @@ else()
   find_package(Lapack REQUIRED)
   include_directories(${FFTW3_INCLUDE_DIRS})
   list(APPEND math_libs FFTW3::FFTW3 LAPACK::LAPACK BLAS::BLAS)
-
   find_package(ScaLAPACK REQUIRED)
   list(APPEND math_libs ScaLAPACK::ScaLAPACK)
-
   if(USE_OPENMP)
     list(APPEND math_libs FFTW3::FFTW3_OMP)
   endif()
 
@@ -83,7 +83,7 @@ endif()
 endif() # MKL::MKL
 
 # For compatibility with legacy libpaw_interface CMakeLists.txt
-if(TARGET MKL::MKL)
+if(TARGET MKL::MKL AND NOT TARGET IntelMKL::MKL)
   add_library(IntelMKL::MKL ALIAS MKL::MKL)
 endif()
 
 
@@ -20,8 +20,8 @@
     - [kspacing](#kspacing)
     - [min\_dist\_coef](#min_dist_coef)
     - [device](#device)
-    - [nb2d](#nb2d)
     - [precision](#precision)
+    - [nb2d](#nb2d)
   - [Variables related to input files](#variables-related-to-input-files)
     - [stru\_file](#stru_file)
     - [kpoint\_file](#kpoint_file)
@@ -224,6 +224,8 @@
     - [of\_ml\_q](#of_ml_q)
     - [of\_ml\_tanhp](#of_ml_tanhp)
     - [of\_ml\_tanhq](#of_ml_tanhq)
+    - [of\_ml\_chi\_p](#of_ml_chi_p)
+    - [of\_ml\_chi\_q](#of_ml_chi_q)
     - [of\_ml\_gammanl](#of_ml_gammanl)
     - [of\_ml\_pnl](#of_ml_pnl)
     - [of\_ml\_qnl](#of_ml_qnl)
@@ -234,8 +236,6 @@
     - [of\_ml\_tanh\_qnl](#of_ml_tanh_qnl)
     - [of\_ml\_tanhp\_nl](#of_ml_tanhp_nl)
     - [of\_ml\_tanhq\_nl](#of_ml_tanhq_nl)
-    - [of\_ml\_chi\_p](#of_ml_chi_p)
-    - [of\_ml\_chi\_q](#of_ml_chi_q)
     - [of\_ml\_chi\_xi](#of_ml_chi_xi)
     - [of\_ml\_chi\_pnl](#of_ml_chi_pnl)
     - [of\_ml\_chi\_qnl](#of_ml_chi_qnl)
@@ -583,7 +583,7 @@ These variables are used to control general system parameters.
 ### init_wfc
 
 - **Type**: String
-- **Description**: Only useful for plane wave basis only now. It is the name of the starting wave functions. In the future. we should also make this variable available for localized orbitals set.
+- **Description**: The type of the starting wave functions.
 
   Available options are:
 
@@ -593,6 +593,8 @@ These variables are used to control general system parameters.
   - random: random numbers
   - nao: from numerical atomic orbitals. If they are not enough, other wave functions are initialized with random numbers.
   - nao+random: add small random numbers on numerical atomic orbitals
+  
+  > Only the `file` option is useful for the lcao basis set, which is mostly used when [calculation](#calculation) is set to `set_wf` and `get_pchg`. See more details in [out_wfc_lcao](#out_wfc_lcao).
 - **Default**: atomic
 
 ### init_chg
@@ -1234,6 +1236,7 @@ Note: In new angle mixing, you should set `mixing_beta_mag >> mixing_beta`. The
 - **Type**: Real
 - **Description**: It's the density threshold for electronic iteration. It represents the charge density error between two sequential densities from electronic iterations. Usually for local orbitals, usually 1e-6 may be accurate enough.
 - **Default**: 1.0e-9 (plane-wave basis), or 1.0e-7 (localized atomic orbital basis).
+- **Unit**: Ry if `scf_thr_type=1`, **dimensionless** if `scf_thr_type=2`
 
 ### scf_ene_thr
 
@@ -1246,10 +1249,8 @@ Note: In new angle mixing, you should set `mixing_beta_mag >> mixing_beta`. The
 
 - **Type**: Integer
 - **Description**: Choose the calculation method of convergence criterion.
-  - **1**: the criterion is defined as $\Delta\rho_G = \frac{1}{2}\iint{\frac{\Delta\rho(r)\Delta\rho(r')}{|r-r'|}d^3r d^3r'}$.
-  - **2**: the criterion is defined as $\Delta\rho_R = \frac{1}{N_e}\int{|\Delta\rho(r)|d^3r}$, where $N_e$ is the number of electron.
-
-  Note: This parameter is still under testing and the default setting is usually sufficient.
+  - **1**: the criterion is defined as $\Delta\rho_G = \frac{1}{2}\iint{\frac{\Delta\rho(r)\Delta\rho(r')}{|r-r'|}d^3r d^3r'}$, which is used in SCF of PW basis with unit Ry. 
+  - **2**: the criterion is defined as $\Delta\rho_R = \frac{1}{N_e}\int{|\Delta\rho(r)|d^3r}$, where $N_e$ is the number of electron, which is used in SCF of LCAO with unit **dimensionless**.
 
 - **Default**: 1 (plane-wave basis), or 2 (localized atomic orbital basis).
 
@@ -2488,7 +2489,7 @@ These variables are relevant to electric field and dipole correction
   - True：A dipole correction is also added to the bare ionic potential.
   - False: A dipole correction is not added to the bare ionic potential.
 
-> Note: If you want no electric field, parameter efield_amp  should be zero. Must be used ONLY in a slab geometry for surface alculations, with the discontinuity FALLING IN THE EMPTY SPACE.
+> Note: If you do not want any electric field, the parameter `efield_amp` should be set to zero. This should ONLY be used in a slab geometry for surface calculations, with the discontinuity FALLING IN THE EMPTY SPACE.
 
 - **Default**: False
 
@@ -3475,9 +3476,10 @@ These variables are used to control berry phase and wannier90 interface paramete
 - **Type**: Integer
 - **Description**:
   method of propagator
-  - 0: Crank-Nicolson.
+  - 0: Crank-Nicolson, based on matrix inversion.
   - 1: 4th Taylor expansions of exponential.
   - 2: enforced time-reversal symmetry (ETRS).
+  - 3: Crank-Nicolson, based on solving linear equation.
 - **Default**: 0
 
 ### td_vext
@@ -3537,19 +3539,29 @@ These variables are used to control berry phase and wannier90 interface paramete
 
 - **Type**: Real
 - **Description**:
-  cut1 of interval in length gauge\
-  E = E0 , cut1<x<cut2\
-  E = -E0/(cut1+1-cut2) , x<cut1 or cut2<x<1
+  `td_lcut1` is the lower bound of the interval in the length gauge RT-TDDFT, where $x$ is the fractional coordinate:
+  $$
+    E(x)=
+    \begin{cases}
+        E_0, & \mathtt{cut1}\leqslant x \leqslant \mathtt{cut2} \\
+        -E_0\left(\dfrac{1}{\mathtt{cut1}+1-\mathtt{cut2}}-1\right), & \text{$0<x<\mathtt{cut1}$ or $\mathtt{cut2}<x<1$}
+    \end{cases}
+  $$
 - **Default**: 0.05
 
 ### td_lcut2
 
 - **Type**: Real
 - **Description**:
-  cut2 of interval in length gauge\
-  E = E0 , cut1<x<cut2\
-  E = -E0/(cut1+1-cut2) , x<cut1 or cut2<x<1
-- **Default**: 0.05
+  `td_lcut2` is the upper bound of the interval in the length gauge RT-TDDFT, where $x$ is the fractional coordinate:
+  $$
+    E(x)=
+    \begin{cases}
+        E_0, & \mathtt{cut1}\leqslant x \leqslant \mathtt{cut2} \\
+        -E_0\left(\dfrac{1}{\mathtt{cut1}+1-\mathtt{cut2}}-1\right), & \text{$0<x<\mathtt{cut1}$ or $\mathtt{cut2}<x<1$}
+    \end{cases}
+  $$
+- **Default**: 0.95
 
 ### td_gauss_freq
 
 
@@ -115,6 +115,8 @@ To build NVIDIA GPU support for ABACUS, define `USE_CUDA` flag. You can also spe
 cmake -B build -DUSE_CUDA=1 -DCMAKE_CUDA_COMPILER=${path to cuda toolkit}/bin/nvcc
 ```
 
+If you are confident that your MPI supports CUDA Aware, you can add `-DUSE_CUDA_MPI=ON`. In this case, the program will directly communicate data with the CUDA hardware, rather than transferring it to the CPU first before communication. But note that if CUDA Aware is not supported, adding `-DUSE_CUDA_MPI=ON` will cause the program to throw an error.
+
 ## Build math library from source
 
 > Note: We recommend using the latest available compiler sets, since they offer faster implementations of math functions.
 
@@ -9,7 +9,12 @@
 ```bash
 git clone https://gitlab.com/1041176461/ase-abacus.git
 cd ase-abacus
-python3 setup.py install
+pip install .
+```
+
+Another direct way:
+```bash
+pip install git+https://gitlab.com/1041176461/ase-abacus.git
 ```
 
 ## Environment variables
@@ -25,6 +30,8 @@ python3 setup.py install
 
 For PW calculations, only `ABACUS_PP_PATH` is needed. For LCAO calculations, both `ABACUS_PP_PATH` and `ABACUS_ORBITAL_PATH` should be set.
 
+Also, one can manally set the paths of PP and ORB when using ABACUS calculator in ASE.
+
 ## ABACUS Calculator
 
 The default initialization command for the ABACUS calculator is
@@ -49,22 +56,41 @@ For more information on pseudopotentials and numerical orbitals, please visit [A
 
 The input parameters can be set like::
 ```python
-  calc = Abacus(profile=profile, ntype=1, ecutwfc=50, scf_nmax=50, smearing_method='gaussian', smearing_sigma=0.01, basis_type='pw', ks_solver='cg', calculation='scf' pp=pp, basis=basis, kpts=kpts)
+  # for ABACUS calculator
+  calc = Abacus(profile=profile, 
+                ecutwfc=100, 
+                scf_nmax=100, 
+                smearing_method='gaussian', 
+                smearing_sigma=0.01, 
+                basis_type='pw', 
+                ks_solver='dav', 
+                calculation='scf', 
+                pp=pp, 
+                basis=basis, 
+                kpts=kpts)
 ```
 
 The command to run jobs can be set by specifying `AbacusProfile`::
 
 ```python
   from ase.calculators.abacus import AbacusProfile
-  abacus = '/usr/local/bin/abacus'
-  profile = AbacusProfile(argv=['mpirun','-n','2',abacus])
+  # for OpenMP setting inside python env
+  import os
+  os.environ("OMP_NUM_THREADS") = 1
+  # for MPI setting used in abacus
+  mpi_num = 4
+  # for ABACUS Profile
+  abacus = '/usr/local/bin/abacus' # specify abacus exec
+  profile = AbacusProfile(command=f'mpirun -n {mpi_num} {abacus}')  # directly the command for running ABACUS
 ```
 
 in which `abacus` sets the absolute path of the `abacus` executable.
 
 ## MD Analysis
 After molecular dynamics calculations, the log file `running_md.log` can be read. If the 'STRU_MD_*' files are not continuous (e.g. 'STRU_MD_0', 'STRU_MD_5', 'STRU_MD_10'...), the index parameter of read should be as a slice object. For example, when using the command `read('running_md.log', index=slice(0, 15, 5), format='abacus-out')` to parse 'running_md.log', 'STRU_MD_0', 'STRU_MD_5' and 'STRU_MD_10' will be read.
 
+The `MD_dump` file is also supported to be read-in by `read('MD_dump', format='abacus-md')`
+
 
 ## SPAP Analysis
 
 
@@ -10,15 +10,14 @@ Below is an example `INPUT` file with some of the most important parameters that
 
 ```plaintext
 INPUT_PARAMETERS
-suffix                  MgO
-ntype                   2
-pseudo_dir              ./
-orbital_dir             ./
+suffix                  MgO  # the output files will be in OUT.{suffix} directory 
+pseudo_dir              ./   # where the pseudopotential for each element is
+orbital_dir             ./   # where the orbital file for each element is
 ecutwfc                 100  # in Rydberg
-scf_thr                 1e-4 # Rydberg
-basis_type              lcao
+scf_thr                 1e-6 # dimensionless for LCAO, Rydberg for PW. See documents for details.
+basis_type              lcao # lcao or pw
 calculation             scf  # this is the key parameter telling abacus to do a scf calculation
-out_chg                 True
+out_chg                 0    # only output binary charge file for restart
 ```
 
 The parameter list always starts with key word `INPUT_PARAMETERS`. Any content before `INPUT_PARAMETERS` will be ignored.
@@ -36,19 +35,18 @@ Furthermore, if a given parameter name appeared more than once in the input file
 
 In the above example, the meanings of the parameters are:
 
-- `suffix` : the name of the system, default `ABACUS`
-- `ntype` : how many types of elements in the unit cell
-- `pseudo_dir` : the directory where pseudopotential files are provided
-- `orbital_dir` : the directory where orbital files are provided
-- `ecutwfc` : the plane-wave energy cutoff for the wave function expansion (UNIT: Rydberg)
-- `scf_thr` : the threshold for the convergence of charge density (UNIT: Rydberg)
-- `basis_type` : the type of basis set for expanding the electronic wave functions
+- `suffix` : the name of the system, default `ABACUS`, and output files will be in OUT.{suffix} directory. 
+- `pseudo_dir` : the directory where pseudopotential files are provided.
+- `orbital_dir` : the directory where orbital files are provided.
+- `ecutwfc` : the plane-wave energy cutoff for the wave function expansion (UNIT: Rydberg).
+- `scf_thr` : the threshold for the convergence of charge density (UNIT: Rydberg for PW, dimensionless for LCAO), we recommend `1e-7` for LCAO and `1e-9` for PW basis.
+- `basis_type` : the type of basis set for expanding the electronic wave functions, one can set lcao or pw.
 - `calculation` : the type of calculation to be performed by ABACUS
-- `out_chg` : if true, output the charge density on real space grid
+- `out_chg` : setting for output the charge density in real space grid, -1 for no output, 0 for binary output, 1 for binary and cube output.
 
 For a complete list of input parameters, please consult this [instruction](../advanced/input_files/input-main.md).
 
-> **Note:** Users cannot change the filename “INPUT” to other names. Boolean paramerters such as `out_chg` can be set by using `True` and `False`, `1` and `0`, or `T` and `F`. It is case insensitive so that other preferences such as `true` and `false`, `TRUE` and `FALSE`, and `t` and `f` for setting boolean values are also supported. Specifically for the `out_chg`, `-1` option is also available, which means turn off the checkpoint of charge density in binary (always dumped in `OUT.{suffix}`, whose name ends with `CHARGE-DENSITY.restart`). Some parameters controlling the output also support a second option to control the output precision, e.g., `out_chg True 8` will output the charge density on realspace grid with 8 digits after the decimal point.
+> **Note:** Users cannot change the filename “INPUT” to other names. Boolean paramerters such as `out_chg` can be set by using `True` and `False`, `1` and `0`, or `T` and `F`. It is case insensitive so that other preferences such as `true` and `false`, `TRUE` and `FALSE`, and `t` and `f` for setting boolean values are also supported. Specifically for the `out_chg`, `-1` option is also available, which means turn off the checkpoint of charge density in binary (always dumped in `OUT.{suffix}`, whose name ends with `CHARGE-DENSITY.restart`). Some parameters controlling the output also support a second option to control the output precision, e.g., `out_chg 1 8` will output the charge density on realspace grid with 8 digits after the decimal point.
 
 ## *STRU*
 
@@ -69,7 +67,8 @@ Mg_gga_8au_100Ry_4s2p1d.orb
 O_gga_8au_100Ry_2s2p1d.orb
 
 LATTICE_CONSTANT
-1.8897259886 # 1.8897259886 Bohr =  1.0 Angstrom
+1.889726126 # 1.0 Ang = 1/a_0 = 1/0.529177210544
+# Bohr radius ref: https://physics.nist.gov/cgi-bin/cuu/Value?bohrrada0
 
 LATTICE_VECTORS
 4.25648 0.00000 0.00000  
 
@@ -104,6 +104,13 @@ if(USE_ROCM)
   )
 endif()
 
+if(USE_DSP)
+  list(APPEND device_srcs
+  module_base/kernels/dsp/dsp_connector.cpp
+  )
+endif()
+
+
 add_library(device OBJECT ${device_srcs})
 
 if(USE_CUDA)
 
@@ -260,6 +260,7 @@ OBJS_ESOLVER=esolver.o\
 OBJS_ESOLVER_LCAO=esolver_ks_lcao.o\
       esolver_ks_lcao_tddft.o\
       lcao_before_scf.o\
+      lcao_after_scf.o\
       esolver_gets.o\
       lcao_others.o\
 
@@ -572,6 +573,7 @@ OBJS_LCAO=evolve_elec.o\
       td_velocity.o\
       td_current.o\
       snap_psibeta_half_tddft.o\
+      solve_propagation.o\
       upsi.o\
       FORCE_STRESS.o\
 	  FORCE_gamma.o\
 
@@ -65,7 +65,10 @@ add_library(
 )
 
 target_link_libraries(base PUBLIC container)
-
+if (USE_DSP)
+  target_link_libraries(base PUBLIC ${MTBLAS_FFT_DIR}/libmtblas/lib/libmtblas.a)
+  target_link_libraries(base PUBLIC ${MTBLAS_FFT_DIR}/libmtblas/lib/libmtblasdev.a)
+endif()
 add_subdirectory(module_container)
 
 if(ENABLE_COVERAGE)