deepmodeling
diff --git a/‎docs/advanced/input_files/input-main.md‎
Lines changed: 14 additions & 18 deletions b/‎docs/advanced/input_files/input-main.md‎
Lines changed: 14 additions & 18 deletions
diff --git a/‎source/module_base/blas_connector.cpp‎
Lines changed: 4 additions & 4 deletions b/‎source/module_base/blas_connector.cpp‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎source/module_base/kernels/dsp/dsp_connector.h‎
Lines changed: 65 additions & 0 deletions b/‎source/module_base/kernels/dsp/dsp_connector.h‎
Lines changed: 65 additions & 0 deletions
diff --git a/‎source/module_base/module_device/memory_op.cpp‎
Lines changed: 52 additions & 0 deletions b/‎source/module_base/module_device/memory_op.cpp‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎source/module_base/module_device/memory_op.h‎
Lines changed: 30 additions & 1 deletion b/‎source/module_base/module_device/memory_op.h‎
Lines changed: 30 additions & 1 deletion
diff --git a/‎source/module_base/module_device/types.h‎
Lines changed: 1 addition & 0 deletions b/‎source/module_base/module_device/types.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎source/module_esolver/esolver.cpp‎
Lines changed: 0 additions & 5 deletions b/‎source/module_esolver/esolver.cpp‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎source/module_esolver/esolver.h‎
Lines changed: 0 additions & 17 deletions b/‎source/module_esolver/esolver.h‎
Lines changed: 0 additions & 17 deletions
diff --git a/‎source/module_esolver/esolver_fp.cpp‎
Lines changed: 10 additions & 0 deletions b/‎source/module_esolver/esolver_fp.cpp‎
Lines changed: 10 additions & 0 deletions
@@ -243,8 +243,8 @@
     - [exx\_opt\_orb\_ecut](#exx_opt_orb_ecut)
     - [exx\_opt\_orb\_tolerence](#exx_opt_orb_tolerence)
     - [exx\_real\_number](#exx_real_number)
-    - [exx\_symmetry\_realspace](#exx_symmetry_realspace)
     - [rpa\_ccp\_rmesh\_times](#rpa_ccp_rmesh_times)
+    - [exx\_symmetry\_realspace](#exx_symmetry_realspace)
     - [out\_ri\_cv](#out_ri_cv)
   - [Molecular dynamics](#molecular-dynamics)
     - [md\_type](#md_type)
@@ -273,6 +273,9 @@
     - [lj\_epsilon](#lj_epsilon)
     - [lj\_sigma](#lj_sigma)
     - [pot\_file](#pot_file)
+    - [dp\_rescaling](#dp_rescaling)
+    - [dp\_fparam](#dp_fparam)
+    - [dp\_aparam](#dp_aparam)
     - [msst\_direction](#msst_direction)
     - [msst\_vel](#msst_vel)
     - [msst\_vis](#msst_vis)
@@ -422,11 +425,12 @@
     - [nocc](#nocc)
     - [nvirt](#nvirt)
     - [lr\_nstates](#lr_nstates)
+    - [lr\_unrestricted](#lr_unrestricted)
     - [abs\_wavelen\_range](#abs_wavelen_range)
     - [out\_wfc\_lr](#out_wfc_lr)
     - [abs\_broadening](#abs_broadening)
     - [ri\_hartree\_benchmark](#ri_hartree_benchmark)
-    - [aims_nbasis](#aims_nbasis)
+    - [aims\_nbasis](#aims_nbasis)
 
 [back to top](#full-list-of-input-keywords)
 ## System variables
@@ -2908,46 +2912,38 @@ These variables are used to control vdW-corrected related parameters.
 - **Type**: String
 - **Description**: Specifies the method used for Van der Waals (VdW) correction. Available options are:
   - `d2`: [Grimme's D2](https://onlinelibrary.wiley.com/doi/abs/10.1002/jcc.20495) dispersion correction method
-  - `d3_0`: [Grimme's DFT-D3(0)](https://aip.scitation.org/doi/10.1063/1.3382344) dispersion correction method
-  - `d3_bj`: [Grimme's DFTD3(BJ)](https://onlinelibrary.wiley.com/doi/abs/10.1002/jcc.21759) dispersion correction method
+  - `d3_0`: [Grimme's DFT-D3(0)](https://aip.scitation.org/doi/10.1063/1.3382344) dispersion correction method (zero-damping)
+  - `d3_bj`: [Grimme's DFTD3(BJ)](https://onlinelibrary.wiley.com/doi/abs/10.1002/jcc.21759) dispersion correction method (BJ-damping)
   - `none`: no vdW correction
 - **Default**: none
+- **Note**: ABACUS supports automatic setting on DFT-D3 parameters for common functionals after version 3.8.3 (and several develop versions earlier). To benefit from this feature, please specify the parameter `dft_functional` explicitly (for more details on this parameter, please see [dft_functional](#dft_functional)), otherwise the autoset procedure will crash with error message like `cannot find DFT-D3 parameter for XC(***)`. If not satisfied with those in-built parameters, any manually setting on `vdw_s6`, `vdw_s8`, `vdw_a1` and `vdw_a2` will overwrite. 
+- **Special**: There are special cases for functional family wB97 (Omega-B97): if want to use the functional wB97X-D3BJ, one needs to specify the `dft_functional` as `HYB_GGA_WB97X_V` and `vdw_method` as `d3_bj`. If want to use the functional wB97X-D3, specify `dft_functional` as `HYB_GGA_WB97X_D3` and `vdw_method` as `d3_0`.
 
 ### vdw_s6
 
 - **Type**: Real
 - **Availability**: `vdw_method` is set to `d2`, `d3_0`, or `d3_bj`
-- **Description**: This scale factor is used to optimize the interaction energy deviations in van der Waals (vdW) corrected calculations. The recommended values of this parameter are dependent on the chosen vdW correction method and the DFT functional being used. For DFT-D2, the recommended values are 0.75 (PBE), 1.2 (BLYP), 1.05 (B-P86), 1.0 (TPSS), and 1.05 (B3LYP). For DFT-D3, recommended values with different DFT functionals can be found on the [here](https://www.chemiebn.uni-bonn.de/pctc/mulliken-center/software/dft-d3/dft-d3). The default value of this parameter in ABACUS is set to be the recommended value for PBE.
+- **Description**: This scale factor is used to optimize the interaction energy deviations in van der Waals (vdW) corrected calculations. The recommended values of this parameter are dependent on the chosen vdW correction method and the DFT functional being used. For DFT-D2, the recommended values are 0.75 (PBE), 1.2 (BLYP), 1.05 (B-P86), 1.0 (TPSS), and 1.05 (B3LYP). If not set, will use values of PBE functional. For DFT-D3, recommended values with different DFT functionals can be found on the [here](https://github.com/dftd3/simple-dftd3/blob/main/assets/parameters.toml). If not set, will search in ABACUS built-in dataset based on the `dft_functional` keywords. User set value will overwrite the searched value.
 - **Default**:
   - 0.75: if `vdw_method` is set to `d2`
-  - 1.0: if `vdw_method` is set to `d3_0` or `d3_bj`
 
 ### vdw_s8
 
 - **Type**: Real
 - **Availability**: `vdw_method` is set to `d3_0` or `d3_bj`
-- **Description**: This scale factor is relevant for D3(0) and D3(BJ) van der Waals (vdW) correction methods. The recommended values of this parameter with different DFT functionals can be found on the [webpage](https://www.chemiebn.uni-bonn.de/pctc/mulliken-center/software/dft-d3/dft-d3). The default value of this parameter in ABACUS is set to be the recommended value for PBE.
-- **Default**:
-  - 0.722: if `vdw_method` is set to `d3_0`
-  - 0.7875: if `vdw_method` is set to `d3_bj`
+- **Description**: This scale factor is relevant for D3(0) and D3(BJ) van der Waals (vdW) correction methods. The recommended values of this parameter with different DFT functionals can be found on the [webpage](https://github.com/dftd3/simple-dftd3/blob/main/assets/parameters.toml). If not set, will search in ABACUS built-in dataset based on the `dft_functional` keywords. User set value will overwrite the searched value.
 
 ### vdw_a1
 
 - **Type**: Real
 - **Availability**: `vdw_method` is set to `d3_0` or `d3_bj`
-- **Description**: This damping function parameter is relevant for D3(0) and D3(BJ) van der Waals (vdW) correction methods. The recommended values of this parameter with different DFT functionals can be found on the [webpage](https://www.chemiebn.uni-bonn.de/pctc/mulliken-center/software/dft-d3/dft-d3). The default value of this parameter in ABACUS is set to be the recommended value for PBE.
-- **Default**:
-  - 1.217: if `vdw_method` is set to `d3_0`
-  - 0.4289: if `vdw_method` is set to `d3_bj`
+- **Description**: This damping function parameter is relevant for D3(0) and D3(BJ) van der Waals (vdW) correction methods. The recommended values of this parameter with different DFT functionals can be found on the [webpage](https://github.com/dftd3/simple-dftd3/blob/main/assets/parameters.toml). If not set, will search in ABACUS built-in dataset based on the `dft_functional` keywords. User set value will overwrite the searched value.
 
 ### vdw_a2
 
 - **Type**: Real
 - **Availability**: `vdw_method` is set to `d3_0` or `d3_bj`
-- **Description**: This damping function parameter is only relevant for D3(0) and D3(BJ) van der Waals (vdW) correction methods. The recommended values of this parameter with different DFT functionals can be found on the [webpage](https://www.chemiebn.uni-bonn.de/pctc/mulliken-center/software/dft-d3/dft-d3). The default value of this parameter in ABACUS is set to be the recommended value for PBE.
-- **Default**:
-  - 1.0: if `vdw_method` is set to `d3_0`
-  - 4.4407: if `vdw_method` is set to `d3_bj`
+- **Description**: This damping function parameter is only relevant for D3(0) and D3(BJ) van der Waals (vdW) correction methods. The recommended values of this parameter with different DFT functionals can be found on the [webpage](https://github.com/dftd3/simple-dftd3/blob/main/assets/parameters.toml). If not set, will search in ABACUS built-in dataset based on the `dft_functional` keywords. User set value will overwrite the searched value.
 
 ### vdw_d
 
 
@@ -93,7 +93,7 @@ void BlasConnector::gemm(const char transa, const char transb, const int m, cons
 	}
 	#ifdef __DSP
 	else if (device_type == base_device::AbacusDevice_t::DspDevice){
-		sgemm_mt_(&transb, &transa, &n, &m, &k,
+		sgemm_mth_(&transb, &transa, &n, &m, &k,
 		&alpha, b, &ldb, a, &lda,
 		&beta, c, &ldc, GlobalV::MY_RANK);
 	}
@@ -111,7 +111,7 @@ void BlasConnector::gemm(const char transa, const char transb, const int m, cons
 	}
 	#ifdef __DSP
 	else if (device_type == base_device::AbacusDevice_t::DspDevice){
-		dgemm_mt_(&transb, &transa, &n, &m, &k,
+		dgemm_mth_(&transb, &transa, &n, &m, &k,
 		&alpha, b, &ldb, a, &lda,
 		&beta, c, &ldc, GlobalV::MY_RANK);
 	}
@@ -129,7 +129,7 @@ void BlasConnector::gemm(const char transa, const char transb, const int m, cons
 	}
 	#ifdef __DSP
 	else if (device_type == base_device::AbacusDevice_t::DspDevice) {
-    	cgemm_mt_(&transb, &transa, &n, &m, &k,
+    	cgemm_mth_(&transb, &transa, &n, &m, &k,
         &alpha, b, &ldb, a, &lda,
         &beta, c, &ldc, GlobalV::MY_RANK);
 	}
@@ -147,7 +147,7 @@ void BlasConnector::gemm(const char transa, const char transb, const int m, cons
 	}
 	#ifdef __DSP
 	else if (device_type == base_device::AbacusDevice_t::DspDevice) {
-    	zgemm_mt_(&transb, &transa, &n, &m, &k,
+    	zgemm_mth_(&transb, &transa, &n, &m, &k,
         &alpha, b, &ldb, a, &lda,
         &beta, c, &ldc, GlobalV::MY_RANK);
 	}
 
@@ -2,6 +2,10 @@
 #define DSP_CONNECTOR_H
 #ifdef __DSP
 
+#include "module_base/module_device/device.h"
+#include "module_base/module_device/memory_op.h"
+#include "module_hsolver/diag_comm_info.h"
+
 // Base dsp functions
 void dspInitHandle(int id);
 void dspDestoryHandle(int id);
@@ -62,5 +66,66 @@ void cgemm_mth_(const char *transa, const char *transb,
 
 //#define zgemm_ zgemm_mt
 
+// The next is dsp utils. It may be moved to other files if this file get too huge
+
+template <typename T>
+void dsp_dav_subspace_reduce(T* hcc, T* scc, int nbase, int nbase_x, int notconv, MPI_Comm diag_comm){
+
+	using syncmem_complex_op = base_device::memory::synchronize_memory_op<T, base_device::DEVICE_CPU, base_device::DEVICE_CPU>;
+
+	auto* swap = new T[notconv * nbase_x];
+    auto* target = new T[notconv * nbase_x];
+    syncmem_complex_op()(cpu_ctx, cpu_ctx, swap, hcc + nbase * nbase_x, notconv * nbase_x);
+    if (base_device::get_current_precision(swap) == "single")
+    {
+        MPI_Reduce(swap,
+                    target,
+                    notconv * nbase_x,
+                    MPI_COMPLEX,
+                    MPI_SUM,
+                    0,
+                    diag_comm);
+    }
+    else
+    {
+        MPI_Reduce(swap,
+                    target,
+                    notconv * nbase_x,
+                    MPI_DOUBLE_COMPLEX,
+                    MPI_SUM,
+                    0,
+                    diag_comm);
+    }
+
+    syncmem_complex_op()(cpu_ctx, cpu_ctx, hcc + nbase * nbase_x, target, notconv * nbase_x);
+    syncmem_complex_op()(cpu_ctx, cpu_ctx, swap, scc + nbase * nbase_x, notconv * nbase_x);
+
+    if (base_device::get_current_precision(swap) == "single")
+    {
+        MPI_Reduce(swap,
+                    target,
+                    notconv * nbase_x,
+                    MPI_COMPLEX,
+                    MPI_SUM,
+                    0,
+                    diag_comm);
+    }
+    else
+    {
+        MPI_Reduce(swap,
+                    target,
+                    notconv * nbase_x,
+                    MPI_DOUBLE_COMPLEX,
+                    MPI_SUM,
+                    0,
+                    diag_comm);
+    }
+
+    syncmem_complex_op()(cpu_ctx, cpu_ctx, scc + nbase * nbase_x, target, notconv * nbase_x);
+    delete[] swap;
+    delete[] target;
+}
+
+
 #endif
 #endif
@@ -346,5 +346,57 @@ template struct delete_memory_op<std::complex<float>, base_device::DEVICE_GPU>;
 template struct delete_memory_op<std::complex<double>, base_device::DEVICE_GPU>;
 #endif
 
+#ifdef __DSP
+
+template <typename FPTYPE>
+struct resize_memory_op_mt<FPTYPE, base_device::DEVICE_CPU>
+{
+    void operator()(const base_device::DEVICE_CPU* dev, FPTYPE*& arr, const size_t size, const char* record_in)
+    {
+        if (arr != nullptr)
+        {
+            free_ht(arr);
+        }
+        arr = (FPTYPE*)malloc_ht(sizeof(FPTYPE) * size, GlobalV::MY_RANK);
+        std::string record_string;
+        if (record_in != nullptr)
+        {
+            record_string = record_in;
+        }
+        else
+        {
+            record_string = "no_record";
+        }
+
+        if (record_string != "no_record")
+        {
+            ModuleBase::Memory::record(record_string, sizeof(FPTYPE) * size);
+        }
+    }
+};
+
+template <typename FPTYPE>
+struct delete_memory_op_mt<FPTYPE, base_device::DEVICE_CPU>
+{
+    void operator()(const base_device::DEVICE_CPU* dev, FPTYPE* arr)
+    {
+        free_ht(arr);
+    }
+};
+
+
+template struct resize_memory_op_mt<int, base_device::DEVICE_CPU>;
+template struct resize_memory_op_mt<float, base_device::DEVICE_CPU>;
+template struct resize_memory_op_mt<double, base_device::DEVICE_CPU>;
+template struct resize_memory_op_mt<std::complex<float>, base_device::DEVICE_CPU>;
+template struct resize_memory_op_mt<std::complex<double>, base_device::DEVICE_CPU>;
+
+template struct delete_memory_op_mt<int, base_device::DEVICE_CPU>;
+template struct delete_memory_op_mt<float, base_device::DEVICE_CPU>;
+template struct delete_memory_op_mt<double, base_device::DEVICE_CPU>;
+template struct delete_memory_op_mt<std::complex<float>, base_device::DEVICE_CPU>;
+template struct delete_memory_op_mt<std::complex<double>, base_device::DEVICE_CPU>;
+#endif
+
 } // namespace memory
 } // namespace base_device
@@ -146,6 +146,36 @@ struct delete_memory_op<FPTYPE, base_device::DEVICE_GPU>
 };
 #endif // __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM
 
+#ifdef __DSP
+
+template <typename FPTYPE, typename Device>
+struct resize_memory_op_mt
+{
+    /// @brief Allocate memory for a given pointer. Note this op will free the pointer first.
+    ///
+    /// Input Parameters
+    /// \param dev : the type of computing device
+    /// \param size : array size
+    /// \param record_string : label for memory record
+    ///
+    /// Output Parameters
+    /// \param arr : allocated array
+    void operator()(const Device* dev, FPTYPE*& arr, const size_t size, const char* record_in = nullptr);
+};
+
+template <typename FPTYPE, typename Device>
+struct delete_memory_op_mt
+{
+    /// @brief free memory for multi-device
+    ///
+    /// Input Parameters
+    /// \param dev : the type of computing device
+    /// \param arr : the input array
+    void operator()(const Device* dev, FPTYPE* arr);
+};
+
+#endif // __DSP
+
 } // end of namespace memory
 } // end of namespace base_device
 
@@ -233,5 +263,4 @@ using castmem_z2c_d2h_op = base_device::memory::
 
 static base_device::DEVICE_CPU* cpu_ctx = {};
 static base_device::DEVICE_GPU* gpu_ctx = {};
-
 #endif // MODULE_DEVICE_MEMORY_H_
@@ -6,6 +6,7 @@ namespace base_device
 
 struct DEVICE_CPU;
 struct DEVICE_GPU;
+struct DEVICE_DSP;
 
 enum AbacusDevice_t
 {
 
@@ -23,11 +23,6 @@ extern "C"
 namespace ModuleESolver
 {
 
-void ESolver::printname()
-{
-	std::cout << classname << std::endl;
-}
-
 std::string determine_type()
 {
     std::string esolver_type = "none";
 
@@ -41,23 +41,6 @@ class ESolver
     //! calcualte stress of given cell
     virtual void cal_stress(ModuleBase::matrix& stress) = 0;
 
-
-    // Print current classname.
-    void printname();
-
-    // temporarily
-    // get iterstep used in current scf
-    virtual int get_niter()
-    {
-        return 0;
-    }
-
-    // get maxniter used in current scf
-    virtual int get_maxniter()
-    {
-        return 0;
-    }
-
     bool conv_esolver = true; // whether esolver is converged
 
     std::string classname;
 
@@ -5,6 +5,8 @@
 #include "module_hamilt_pw/hamilt_pwdft/global.h"
 #include "module_io/cif_io.h"
 #include "module_io/cube_io.h"
+#include "module_io/json_output/init_info.h"
+#include "module_io/json_output/output_info.h"
 #include "module_io/output_log.h"
 #include "module_io/print_info.h"
 #include "module_io/rhog_io.h"
@@ -260,6 +262,14 @@ void ESolver_FP::after_scf(const int istep)
                 PARAM.inp.out_elf[1]);
         }
     }
+
+    // #ifdef __RAPIDJSON
+    //     // add Json of efermi energy converge
+    //     Json::add_output_efermi_converge(this->pelec->eferm.ef * ModuleBase::Ry_to_eV, this->conv_esolver);
+    //     // add nkstot,nkstot_ibz to output json
+    //     int Jnkstot = this->pelec->klist->get_nkstot();
+    //     Json::add_nkstot(Jnkstot);
+    // #endif //__RAPIDJSON
 }
 
 void ESolver_FP::init_after_vc(const Input_para& inp, UnitCell& cell)
Original file line number	Diff line number	Diff line change
`@@ -93,7 +93,7 @@ void BlasConnector::gemm(const char transa, const char transb, const int m, cons`
`93`	`93`	`}`
`94`	`94`	`#ifdef __DSP`
`95`	`95`	`else if (device_type == base_device::AbacusDevice_t::DspDevice){`
`96`		`- sgemm_mt_(&transb, &transa, &n, &m, &k,`
	`96`	`+ sgemm_mth_(&transb, &transa, &n, &m, &k,`
`97`	`97`	`&alpha, b, &ldb, a, &lda,`
`98`	`98`	`&beta, c, &ldc, GlobalV::MY_RANK);`
`99`	`99`	`}`
`@@ -111,7 +111,7 @@ void BlasConnector::gemm(const char transa, const char transb, const int m, cons`
`111`	`111`	`}`
`112`	`112`	`#ifdef __DSP`
`113`	`113`	`else if (device_type == base_device::AbacusDevice_t::DspDevice){`
`114`		`- dgemm_mt_(&transb, &transa, &n, &m, &k,`
	`114`	`+ dgemm_mth_(&transb, &transa, &n, &m, &k,`
`115`	`115`	`&alpha, b, &ldb, a, &lda,`
`116`	`116`	`&beta, c, &ldc, GlobalV::MY_RANK);`
`117`	`117`	`}`
`@@ -129,7 +129,7 @@ void BlasConnector::gemm(const char transa, const char transb, const int m, cons`
`129`	`129`	`}`
`130`	`130`	`#ifdef __DSP`
`131`	`131`	`else if (device_type == base_device::AbacusDevice_t::DspDevice) {`
`132`		`- cgemm_mt_(&transb, &transa, &n, &m, &k,`
	`132`	`+ cgemm_mth_(&transb, &transa, &n, &m, &k,`
`133`	`133`	`&alpha, b, &ldb, a, &lda,`
`134`	`134`	`&beta, c, &ldc, GlobalV::MY_RANK);`
`135`	`135`	`}`
`@@ -147,7 +147,7 @@ void BlasConnector::gemm(const char transa, const char transb, const int m, cons`
`147`	`147`	`}`
`148`	`148`	`#ifdef __DSP`
`149`	`149`	`else if (device_type == base_device::AbacusDevice_t::DspDevice) {`
`150`		`- zgemm_mt_(&transb, &transa, &n, &m, &k,`
	`150`	`+ zgemm_mth_(&transb, &transa, &n, &m, &k,`
`151`	`151`	`&alpha, b, &ldb, a, &lda,`
`152`	`152`	`&beta, c, &ldc, GlobalV::MY_RANK);`
`153`	`153`	`}`
Original file line number	Diff line number	Diff line change
`@@ -6,6 +6,7 @@ namespace base_device`
`6`	`6`
`7`	`7`	`struct DEVICE_CPU;`
`8`	`8`	`struct DEVICE_GPU;`
	`9`	`+struct DEVICE_DSP;`
`9`	`10`
`10`	`11`	`enum AbacusDevice_t`
`11`	`12`	`{`
Original file line number	Diff line number	Diff line change
`@@ -23,11 +23,6 @@ extern "C"`
`23`	`23`	`namespace ModuleESolver`
`24`	`24`	`{`
`25`	`25`
`26`		`-void ESolver::printname()`
`27`		`-{`
`28`		`- std::cout << classname << std::endl;`
`29`		`-}`
`30`		`-`
`31`	`26`	`std::string determine_type()`
`32`	`27`	`{`
`33`	`28`	`std::string esolver_type = "none";`