deepmodeling
diff --git a/‎.github/workflows/coverage.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/coverage.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/advanced/acceleration/cuda.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/advanced/acceleration/cuda.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/advanced/input_files/input-main.md‎
Lines changed: 9 additions & 1 deletion b/‎docs/advanced/input_files/input-main.md‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎docs/advanced/install.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/advanced/install.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎python/pyabacus/src/py_diago_dav_subspace.hpp‎
Lines changed: 5 additions & 7 deletions b/‎python/pyabacus/src/py_diago_dav_subspace.hpp‎
Lines changed: 5 additions & 7 deletions
diff --git a/‎python/pyabacus/src/py_diago_david.hpp‎
Lines changed: 5 additions & 8 deletions b/‎python/pyabacus/src/py_diago_david.hpp‎
Lines changed: 5 additions & 8 deletions
diff --git a/‎source/driver.cpp‎
Lines changed: 1 addition & 1 deletion b/‎source/driver.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎source/module_base/global_variable.cpp‎
Lines changed: 1 addition & 29 deletions b/‎source/module_base/global_variable.cpp‎
Lines changed: 1 addition & 29 deletions
diff --git a/‎source/module_base/global_variable.h‎
Lines changed: 0 additions & 44 deletions b/‎source/module_base/global_variable.h‎
Lines changed: 0 additions & 44 deletions
diff --git a/‎source/module_base/module_device/device.cpp‎
Lines changed: 1 addition & 2 deletions b/‎source/module_base/module_device/device.cpp‎
Lines changed: 1 addition & 2 deletions
@@ -18,7 +18,7 @@ jobs:
           apt update && apt install -y lcov
       - name: Building
         run: |
-          cmake -B build -DENABLE_DEEPKS=ON -DENABLE_LIBXC=ON -DBUILD_TESTING=ON -DENABLE_COVERAGE=ON
+          cmake -B build -DBUILD_TESTING=ON -DENABLE_DEEPKS=ON -DENABLE_LIBXC=ON -DENABLE_LIBRI=ON -DENABLE_PAW=ON -DENABLE_GOOGLEBENCH=ON -DENABLE_RAPIDJSON=ON
           cmake --build build -j`nproc`
           cmake --install build
       - name: Testing
 
@@ -36,7 +36,7 @@ The ABACUS program will automatically determine whether the current ELPA support
 ## Run with the GPU support by editing the INPUT script:
 
 In `INPUT` file we need to set the input parameter [device](../input_files/input-main.md#device) to `gpu`. If this parameter is not set, ABACUS will try to determine if there are available GPUs.
-- Set `ks_solver`: For the PW basis, CG, BPCG and Davidson methods are supported on GPU; set the input parameter [ks_solver](../input_files/input-main.md#ks_solver) to `cg`, `bpcg` or `dav`. For the LCAO basis, `cusolver` and `elpa` is supported on GPU.
+- Set `ks_solver`: For the PW basis, CG, BPCG and Davidson methods are supported on GPU; set the input parameter [ks_solver](../input_files/input-main.md#ks_solver) to `cg`, `bpcg` or `dav`. For the LCAO basis, `cusolver`, `cusolvermp` and `elpa` is supported on GPU.
 - **multi-card**: ABACUS allows for multi-GPU acceleration. If you have multiple GPU cards, you can run ABACUS with several MPI processes, and each process will utilize one GPU card. For example, the command `mpirun -n 2 abacus` will by default launch two GPUs for computation. If you only have one card, this command will only start one GPU. 
 
 ## Examples
 
@@ -933,6 +933,8 @@ calculations.
   - **genelpa**: This method should be used if you choose localized orbitals.
   - **scalapack_gvx**: Scalapack can also be used for localized orbitals.
   - **cusolver**: This method needs building with CUDA and at least one gpu is available.
+  - **cusolvermp**: This method supports multi-GPU acceleration and needs building with CUDA。 Note that when using cusolvermp, you should set the number of MPI processes to be equal to the number of GPUs.
+  - **elpa**: The ELPA solver supports both CPU and GPU. By setting the `device` to GPU, you can launch the ELPA solver with GPU acceleration (provided that you have installed a GPU-supported version of ELPA, which requires you to manually compile and install ELPA, and the ABACUS should be compiled with -DUSE_ELPA=ON and -DUSE_CUDA=ON). The ELPA solver also supports multi-GPU acceleration.
 
   If you set ks_solver=`genelpa` for basis_type=`pw`, the program will be stopped with an error message:
 
@@ -941,7 +943,13 @@ calculations.
   ```
 
   Then the user has to correct the input file and restart the calculation.
-- **Default**: cg (plane-wave basis), or genelpa (localized atomic orbital basis, if compiling option `USE_ELPA` has been set),lapack (localized atomic orbital basis, if compiling option `ENABLE_MPI` has not been set), scalapack_gvx, (localized atomic orbital basis, if compiling option `USE_ELPA` has not been set and if compiling option `ENABLE_MPI` has been set)
+- **Default**: 
+  - **PW basis**: cg.
+  - **LCAO basis**:
+    - genelpa (if compiling option `USE_ELPA` has been set)
+    - lapack (if compiling option `ENABLE_MPI` has not been set)
+    - scalapack_gvx (if compiling option `USE_ELPA` has not been set and compiling option `ENABLE_MPI` has been set)
+    - cusolver (if compiling option `USE_CUDA` has been set)
 
 ### nbands
 
 
@@ -93,9 +93,9 @@ cmake -B build -DUSE_CUDA=1 -DCMAKE_CUDA_COMPILER=${path to cuda toolkit}/bin/nv
 
 ## Build math library from source
 
-> Note: This flag is **enabled by default**. It will get better performance than the standard implementation on `gcc` and `clang`. But it **will be disabled** when using `Intel Compiler` since the math functions will get wrong results and the performance is also unexpectly poor.
+> Note: We recommend using the latest available compiler sets, since they offer faster implementations of math functions.
 
-To build math functions from source code, instead of using c++ standard implementation, define `USE_ABACUS_LIBM` flag.
+This flag is disabled by default. To build math functions from source code, define `USE_ABACUS_LIBM` flag. It is expected to get a better performance on legacy versions of `gcc` and `clang`.
 
 Currently supported math functions:
  `sin`, `cos`, `sincos`, `exp`, `cexp`
 
@@ -113,23 +113,21 @@ class PyDiagoDavSubspace
         auto hpsi_func = [mm_op] (
             std::complex<double> *psi_in,
             std::complex<double> *hpsi_out, 
-            const int nband_in,
-            const int nbasis_in, 
-            const int band_index1,
-            const int band_index2
+            const int ld_psi,
+            const int nvec
         ) {
             // Note: numpy's py::array_t is row-major, but
             //       our raw pointer-array is column-major
-            py::array_t<std::complex<double>, py::array::f_style> psi({nbasis_in, band_index2 - band_index1 + 1});
+            py::array_t<std::complex<double>, py::array::f_style> psi({ld_psi, nvec});
             py::buffer_info psi_buf = psi.request();
             std::complex<double>* psi_ptr = static_cast<std::complex<double>*>(psi_buf.ptr);
-            std::copy(psi_in + band_index1 * nbasis_in, psi_in + (band_index2 + 1) * nbasis_in, psi_ptr);
+            std::copy(psi_in, psi_in + nvec * ld_psi, psi_ptr);
 
             py::array_t<std::complex<double>, py::array::f_style> hpsi = mm_op(psi);
 
             py::buffer_info hpsi_buf = hpsi.request();
             std::complex<double>* hpsi_ptr = static_cast<std::complex<double>*>(hpsi_buf.ptr);
-            std::copy(hpsi_ptr, hpsi_ptr + (band_index2 - band_index1 + 1) * nbasis_in, hpsi_out);
+            std::copy(hpsi_ptr, hpsi_ptr + nvec * ld_psi, hpsi_out);
         };
 
         obj = std::make_unique<hsolver::Diago_DavSubspace<std::complex<double>, base_device::DEVICE_CPU>>(
 
@@ -111,30 +111,27 @@ class PyDiagoDavid
         auto hpsi_func = [mm_op] (
             std::complex<double> *psi_in,
             std::complex<double> *hpsi_out, 
-            const int nband_in, 
-            const int nbasis_in, 
-            const int band_index1, 
-            const int band_index2
+            const int ld_psi, 
+            const int nvec
         ) {
             // Note: numpy's py::array_t is row-major, but
             //       our raw pointer-array is column-major
-            py::array_t<std::complex<double>, py::array::f_style> psi({nbasis_in, band_index2 - band_index1 + 1});
+            py::array_t<std::complex<double>, py::array::f_style> psi({ld_psi, nvec});
             py::buffer_info psi_buf = psi.request();
             std::complex<double>* psi_ptr = static_cast<std::complex<double>*>(psi_buf.ptr);
-            std::copy(psi_in + band_index1 * nbasis_in, psi_in + (band_index2 + 1) * nbasis_in, psi_ptr);
+            std::copy(psi_in, psi_in + nvec * ld_psi, psi_ptr);
 
             py::array_t<std::complex<double>, py::array::f_style> hpsi = mm_op(psi);
 
             py::buffer_info hpsi_buf = hpsi.request();
             std::complex<double>* hpsi_ptr = static_cast<std::complex<double>*>(hpsi_buf.ptr);
-            std::copy(hpsi_ptr, hpsi_ptr + (band_index2 - band_index1 + 1) * nbasis_in, hpsi_out);
+            std::copy(hpsi_ptr, hpsi_ptr + nvec * ld_psi, hpsi_out);
         };
 
         auto spsi_func = [this] (
             const std::complex<double> *psi_in, 
             std::complex<double> *spsi_out, 
             const int nrow, 
-            const int npw, 
             const int nbands
         ) {
             syncmem_op()(this->ctx, this->ctx, spsi_out, psi_in, static_cast<size_t>(nbands * nrow));
 
@@ -41,7 +41,7 @@ void Driver::init()
 
     // (3) output information
     time_t time_finish = std::time(nullptr);
-    Print_Info::print_time(time_start, time_finish);
+    ModuleIO::print_time(time_start, time_finish);
 
     // (4) close all of the running logs
     ModuleBase::Global_File::close_all_log(GlobalV::MY_RANK, PARAM.inp.out_alllog,PARAM.inp.calculation);
 
@@ -12,24 +12,6 @@
 #include <vector>
 namespace GlobalV
 {
-
-//----------------------------------------------------------
-// EXPLAIN : Basic Global Variables
-// In practice calculation, these values are set in
-// input.cpp.
-//----------------------------------------------------------
-int NBANDS = 0;
-int NLOCAL = 0;        // total number of local basis.
-
-double nupdown = 0.0;
-
-bool use_uspp = false;
-std::string KS_SOLVER = "cg";  // xiaohui add 2013-09-01
-double SEARCH_RADIUS = -1.0;
-
-int NB2D = 1;
-
-
 //----------------------------------------------------------
 // EXPLAIN : Parallel information
 //----------------------------------------------------------
@@ -52,21 +34,11 @@ int GRANK = MY_RANK;
 int GSIZE = DSIZE;
 
 //----------------------------------------------------------
-// EXPLAIN : The input file name and directory
+// EXPLAIN : ofstream for output
 //----------------------------------------------------------
 std::ofstream ofs_running;
 std::ofstream ofs_warning;
 std::ofstream ofs_info;   // output math lib info
 std::ofstream ofs_device; // output device info
 
-
-//==========================================================
-// device flags added by denghui
-//==========================================================
-std::string device_flag = "unknown";
-
-double nelec = 0;
-
-
-// on-site orbitals
 } // namespace GlobalV
@@ -13,25 +13,6 @@
 
 namespace GlobalV
 {
-//==========================================================
-// EXPLAIN : Basic Global Variables
-//==========================================================
-
-extern int NBANDS;
-extern int NLOCAL;        // 1.1 // mohan add 2009-05-29
-
-extern double nupdown;
-extern bool use_uspp;
-
-extern std::string KS_SOLVER;  // xiaohui add 2013-09-01
-extern double SEARCH_RADIUS;   // 11.1 // mohan add 2011-03-10
-
-
-extern int NB2D;           // 16.5 dividsion of 2D_matrix.
-
-                         // pw, 2: real drho for lcao
-
-
 //========================================================================
 // EXPLAIN : Parallel information
 // GLOBAL VARIABLES :
@@ -84,30 +65,5 @@ extern std::ofstream ofs_running;
 extern std::ofstream ofs_warning;
 extern std::ofstream ofs_info;
 extern std::ofstream ofs_device;
-
-
-// mixing parameters
-
-//==========================================================
-// device flags added by denghui
-//==========================================================
-extern std::string device_flag;
-//==========================================================
-// precision flags added by denghui
-//==========================================================
-
-                             //  "out_chg" elec step.
-/// @brief method to initialize wavefunction
-/// @author kirk0830, 20230920
-/// @brief whether use the new psi initializer to initialize psi
-/// @author ykhuang, 20230920
-
-extern double nelec;
-
-// Deltaspin related
-
-// Quasiatomic orbital related
-
-// radius of on-site orbitals
 } // namespace GlobalV
 #endif
@@ -148,13 +148,12 @@ int set_device_by_rank(const MPI_Comm mpi_comm) {
 #endif
 
 std::string get_device_flag(const std::string &device,
-                            const std::string &ks_solver,
                             const std::string &basis_type) {
 if (device == "cpu") {
   return "cpu"; // no extra checks required
 }
 std::string error_message;
-if (device != "" and device != "gpu")
+if (device != "auto" and device != "gpu")
 {
   error_message += "Parameter \"device\" can only be set to \"cpu\" or \"gpu\"!";
   ModuleBase::WARNING_QUIT("device", error_message);
Original file line number	Diff line number	Diff line change
`@@ -148,13 +148,12 @@ int set_device_by_rank(const MPI_Comm mpi_comm) {`
`148`	`148`	`#endif`
`149`	`149`
`150`	`150`	`std::string get_device_flag(const std::string &device,`
`151`		`- const std::string &ks_solver,`
`152`	`151`	`const std::string &basis_type) {`
`153`	`152`	`if (device == "cpu") {`
`154`	`153`	`return "cpu"; // no extra checks required`
`155`	`154`	`}`
`156`	`155`	`std::string error_message;`
`157`		`-if (device != "" and device != "gpu")`
	`156`	`+if (device != "auto" and device != "gpu")`
`158`	`157`	`{`
`159`	`158`	`error_message += "Parameter \"device\" can only be set to \"cpu\" or \"gpu\"!";`
`160`	`159`	`ModuleBase::WARNING_QUIT("device", error_message);`