deepmodeling
diff --git a/‎source/CMakeLists.txt‎
Lines changed: 8 additions & 0 deletions b/‎source/CMakeLists.txt‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎source/Makefile.Objects‎
Lines changed: 4 additions & 0 deletions b/‎source/Makefile.Objects‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎source/source_base/module_container/ATen/kernels/cuda/lapack.cu‎
Lines changed: 0 additions & 2 deletions b/‎source/source_base/module_container/ATen/kernels/cuda/lapack.cu‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎source/source_base/module_container/base/third_party/cusolver.h‎
Lines changed: 18 additions & 6 deletions b/‎source/source_base/module_container/base/third_party/cusolver.h‎
Lines changed: 18 additions & 6 deletions
diff --git a/‎source/source_basis/module_pw/pw_basis.h‎
Lines changed: 3 additions & 0 deletions b/‎source/source_basis/module_pw/pw_basis.h‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎source/source_esolver/esolver_ks_pw.cpp‎
Lines changed: 1 addition & 0 deletions b/‎source/source_esolver/esolver_ks_pw.cpp‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎source/source_io/input_conv.cpp‎
Lines changed: 2 additions & 6 deletions b/‎source/source_io/input_conv.cpp‎
Lines changed: 2 additions & 6 deletions
diff --git a/‎source/source_pw/module_pwdft/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎source/source_pw/module_pwdft/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎source/source_pw/module_pwdft/kernels/cal_density_real_op.cpp‎
Lines changed: 25 additions & 0 deletions b/‎source/source_pw/module_pwdft/kernels/cal_density_real_op.cpp‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎source/source_pw/module_pwdft/kernels/cal_density_real_op.h‎
Lines changed: 14 additions & 0 deletions b/‎source/source_pw/module_pwdft/kernels/cal_density_real_op.h‎
Lines changed: 14 additions & 0 deletions
@@ -54,6 +54,10 @@ list(APPEND device_srcs
   source_pw/module_pwdft/kernels/vnl_op.cpp
   source_base/kernels/math_ylm_op.cpp
   source_hamilt/module_xc/kernels/xc_functional_op.cpp
+  source_pw/module_pwdft/kernels/cal_density_real_op.cpp
+  source_pw/module_pwdft/kernels/mul_potential_op.cpp
+  source_pw/module_pwdft/kernels/vec_mul_vec_complex_op.cpp
+  source_pw/module_pwdft/kernels/exx_cal_energy_op.cpp
 )
 
 if(USE_CUDA)
@@ -80,6 +84,10 @@ if(USE_CUDA)
     source_base/kernels/cuda/math_kernel_op.cu
     source_base/kernels/cuda/math_kernel_op_vec.cu
     source_hamilt/module_xc/kernels/cuda/xc_functional_op.cu
+    source_pw/module_pwdft/kernels/cuda/cal_density_real_op.cu
+    source_pw/module_pwdft/kernels/cuda/mul_potential_op.cu
+    source_pw/module_pwdft/kernels/cuda/vec_mul_vec_complex.cu
+    source_pw/module_pwdft/kernels/cuda/exx_cal_energy_op.cu
   )
 endif()
 
 
@@ -342,6 +342,10 @@ OBJS_HAMILT=hamilt_pw.o\
     velocity_pw.o\
     radial_proj.o\
     exx_helper.o\
+    vec_mul_vec_complex_op.o\
+    exx_cal_energy_op.o\
+    cal_density_real_op.o\
+    mul_potential_op.o\
 
 OBJS_HAMILT_OF=kedf_tf.o\
     kedf_vw.o\
 
@@ -70,8 +70,6 @@ struct lapack_trtri<T, DEVICE_GPU> {
     {
         // TODO: trtri is not implemented in this method yet
         // Cause the trtri in cuSolver is not stable for ABACUS!
-        // But why?! trtri and potri are different routines for different job! 
-        // How can BPCG work without using a proper routine? 
         cuSolverConnector::trtri(cusolver_handle, uplo, diag, dim, Mat, lda);
         // cuSolverConnector::potri(cusolver_handle, uplo, diag, dim, Mat, lda);
     }
 
@@ -87,45 +87,57 @@ static inline
 void potrf (cusolverDnHandle_t& cusolver_handle, const char& uplo, const int& n, float * A, const int& lda)
 {
     int lwork;
+    int *info = nullptr;
+    cudaErrcheck(cudaMalloc((void**)&info, 1 * sizeof(int)));
     cusolverErrcheck(cusolverDnSpotrf_bufferSize(cusolver_handle, cublas_fill_mode(uplo), n, A, n, &lwork));
     float* work;
     cudaErrcheck(cudaMalloc((void**)&work, lwork * sizeof(float)));
     // Perform Cholesky decomposition
-    cusolverErrcheck(cusolverDnSpotrf(cusolver_handle, cublas_fill_mode(uplo), n, A, n, work, lwork, nullptr));
+    cusolverErrcheck(cusolverDnSpotrf(cusolver_handle, cublas_fill_mode(uplo), n, A, n, work, lwork, info));
     cudaErrcheck(cudaFree(work));
+    cudaErrcheck(cudaFree(info));
 }
 static inline
 void potrf (cusolverDnHandle_t& cusolver_handle, const char& uplo, const int& n, double * A, const int& lda)
 {
     int lwork;
+    int *info = nullptr;
+    cudaErrcheck(cudaMalloc((void**)&info, 1 * sizeof(int)));
     cusolverErrcheck(cusolverDnDpotrf_bufferSize(cusolver_handle, cublas_fill_mode(uplo), n, A, n, &lwork));
     double* work;
     cudaErrcheck(cudaMalloc((void**)&work, lwork * sizeof(double)));
     // Perform Cholesky decomposition
-    cusolverErrcheck(cusolverDnDpotrf(cusolver_handle, cublas_fill_mode(uplo), n, A, n, work, lwork, nullptr));
+    cusolverErrcheck(cusolverDnDpotrf(cusolver_handle, cublas_fill_mode(uplo), n, A, n, work, lwork, info));
     cudaErrcheck(cudaFree(work));
+    cudaErrcheck(cudaFree(info));
 }
 static inline
 void potrf (cusolverDnHandle_t& cusolver_handle, const char& uplo, const int& n, std::complex<float> * A, const int& lda)
 {
     int lwork;
-    cusolverErrcheck(cusolverDnCpotrf_bufferSize(cusolver_handle, cublas_fill_mode(uplo), n, reinterpret_cast<cuComplex*>(A), n, &lwork));
+    int *info = nullptr;
+    cudaErrcheck(cudaMalloc((void**)&info, 1 * sizeof(int)));
+    cusolverErrcheck(cusolverDnCpotrf_bufferSize(cusolver_handle, cublas_fill_mode(uplo), n, reinterpret_cast<cuComplex*>(A), lda, &lwork));
     cuComplex* work;
     cudaErrcheck(cudaMalloc((void**)&work, lwork * sizeof(cuComplex)));
     // Perform Cholesky decomposition
-    cusolverErrcheck(cusolverDnCpotrf(cusolver_handle, cublas_fill_mode(uplo), n, reinterpret_cast<cuComplex*>(A), n, work, lwork, nullptr));
+    cusolverErrcheck(cusolverDnCpotrf(cusolver_handle, cublas_fill_mode(uplo), n, reinterpret_cast<cuComplex*>(A), lda, work, lwork, info));
     cudaErrcheck(cudaFree(work));
+    cudaErrcheck(cudaFree(info));
 }
 static inline
 void potrf (cusolverDnHandle_t& cusolver_handle, const char& uplo, const int& n, std::complex<double> * A, const int& lda)
 {
     int lwork;
-    cusolverErrcheck(cusolverDnZpotrf_bufferSize(cusolver_handle, cublas_fill_mode(uplo), n, reinterpret_cast<cuDoubleComplex*>(A), n, &lwork));
+    int *info = nullptr;
+    cudaErrcheck(cudaMalloc((void**)&info, 1 * sizeof(int)));
+    cusolverErrcheck(cusolverDnZpotrf_bufferSize(cusolver_handle, cublas_fill_mode(uplo), n, reinterpret_cast<cuDoubleComplex*>(A), lda, &lwork));
     cuDoubleComplex* work;
     cudaErrcheck(cudaMalloc((void**)&work, lwork * sizeof(cuDoubleComplex)));
     // Perform Cholesky decomposition
-    cusolverErrcheck(cusolverDnZpotrf(cusolver_handle, cublas_fill_mode(uplo), n, reinterpret_cast<cuDoubleComplex*>(A), n, work, lwork, nullptr));
+    cusolverErrcheck(cusolverDnZpotrf(cusolver_handle, cublas_fill_mode(uplo), n, reinterpret_cast<cuDoubleComplex*>(A), lda, work, lwork, info));
     cudaErrcheck(cudaFree(work));
+    cudaErrcheck(cudaFree(info));
 }
 
 
 
@@ -432,6 +432,9 @@ class PW_Basis
     void set_device(std::string device_);
     void set_precision(std::string precision_);
 
+    std::string get_device() const { return device; }
+    std::string get_precision() const { return precision; }
+
 protected:
 
   std::string device = "cpu";       ///< cpu or gpu
 
@@ -619,6 +619,7 @@ void ESolver_KS_PW<T, Device>::iter_finish(UnitCell& ucell, const int istep, int
             {
                 auto start = std::chrono::high_resolution_clock::now();
                 exx_helper.set_firstiter(false);
+                exx_helper.op_exx->first_iter = false;
                 exx_helper.set_psi(this->kspw_psi);
 
                 conv_esolver = exx_helper.exx_after_converge(iter);
 
@@ -488,19 +488,15 @@ void Input_Conv::Convert()
     {
         if (ModuleSymmetry::Symmetry::symm_flag != -1)
         {
-            ModuleBase::WARNING("Input_Conv", "EXX PW works only with symmetry=-1");
-            ModuleSymmetry::Symmetry::symm_flag = -1;
+            ModuleBase::WARNING_QUIT("Input_Conv", "EXX PW works only with symmetry=-1");
+            // ModuleSymmetry::Symmetry::symm_flag = -1;
         }
 
         if (PARAM.inp.nspin != 1 && PARAM.inp.nspin != 2)
         {
             ModuleBase::WARNING_QUIT("Input_Conv", "EXX PW works only with nspin=1 and 2");
         }
 
-        if (PARAM.inp.device != "cpu")
-        {
-            ModuleBase::WARNING_QUIT("Input_Conv", "EXX PW works only with device=cpu");
-        }
     }
 
     //----------------------------------------------------------
 
@@ -26,6 +26,7 @@ list(APPEND objects
     stress_func_nl.cpp
     stress_func_us.cpp
     stress_func_onsite.cpp
+    stress_func_exx.cpp
     stress_pw.cpp
     VL_in_pw.cpp
     VNL_in_pw.cpp
@@ -47,7 +48,6 @@ add_library(
     module_pwdft
     OBJECT
     ${objects}
-        stress_func_exx.cpp
 )
 
 if(ENABLE_COVERAGE)
 
@@ -0,0 +1,25 @@
+#include "source_pw/module_pwdft/kernels/cal_density_real_op.h"
+#include "source_psi/psi.h"
+namespace hamilt
+{
+template <typename T>
+struct cal_density_real_op<T, base_device::DEVICE_CPU>
+{
+    void operator()(const T *in1, const T *in2, T *out, double omega, int nrxx)
+    {
+#ifdef _OPENMP
+#pragma omp parallel for schedule(static)
+#endif
+        for (int ir = 0; ir < nrxx; ir++)
+        {
+            // assert(is_finite(psi_nk_real[ir]));
+            // assert(is_finite(psi_mq_real[ir]));
+            out[ir] = in1[ir] * std::conj(in2[ir]) / static_cast<T>(omega); // Phase e^(i(q-k)r)
+        }
+    }
+
+};
+
+template struct cal_density_real_op<std::complex<float>, base_device::DEVICE_CPU>;
+template struct cal_density_real_op<std::complex<double>, base_device::DEVICE_CPU>;
+}
@@ -0,0 +1,14 @@
+#include "source_base/macros.h"
+
+#ifndef CAL_DENSITY_REAL_OP_H
+#define CAL_DENSITY_REAL_OP_H
+namespace hamilt
+{
+template <typename T, typename Device>
+struct cal_density_real_op
+{
+    using Real = typename GetTypeReal<T>::type;
+    void operator()(const T *psi1, const T* psi2, T *out, double omega, int nrxx);
+};
+}
+#endif //CAL_DENSITY_REAL_OP_H
Original file line number	Diff line number	Diff line change
`@@ -70,8 +70,6 @@ struct lapack_trtri<T, DEVICE_GPU> {`
`70`	`70`	`{`
`71`	`71`	`// TODO: trtri is not implemented in this method yet`
`72`	`72`	`// Cause the trtri in cuSolver is not stable for ABACUS!`
`73`		`- // But why?! trtri and potri are different routines for different job!`
`74`		`- // How can BPCG work without using a proper routine?`
`75`	`73`	`cuSolverConnector::trtri(cusolver_handle, uplo, diag, dim, Mat, lda);`
`76`	`74`	`// cuSolverConnector::potri(cusolver_handle, uplo, diag, dim, Mat, lda);`
`77`	`75`	`}`
Original file line number	Diff line number	Diff line change
`@@ -619,6 +619,7 @@ void ESolver_KS_PW<T, Device>::iter_finish(UnitCell& ucell, const int istep, int`
`619`	`619`	`{`
`620`	`620`	`auto start = std::chrono::high_resolution_clock::now();`
`621`	`621`	`exx_helper.set_firstiter(false);`
	`622`	`+ exx_helper.op_exx->first_iter = false;`
`622`	`623`	`exx_helper.set_psi(this->kspw_psi);`
`623`	`624`
`624`	`625`	`conv_esolver = exx_helper.exx_after_converge(iter);`
Original file line number	Diff line number	Diff line change
`@@ -488,19 +488,15 @@ void Input_Conv::Convert()`
`488`	`488`	`{`
`489`	`489`	`if (ModuleSymmetry::Symmetry::symm_flag != -1)`
`490`	`490`	`{`
`491`		`- ModuleBase::WARNING("Input_Conv", "EXX PW works only with symmetry=-1");`
`492`		`- ModuleSymmetry::Symmetry::symm_flag = -1;`
	`491`	`+ ModuleBase::WARNING_QUIT("Input_Conv", "EXX PW works only with symmetry=-1");`
	`492`	`+ // ModuleSymmetry::Symmetry::symm_flag = -1;`
`493`	`493`	`}`
`494`	`494`
`495`	`495`	`if (PARAM.inp.nspin != 1 && PARAM.inp.nspin != 2)`
`496`	`496`	`{`
`497`	`497`	`ModuleBase::WARNING_QUIT("Input_Conv", "EXX PW works only with nspin=1 and 2");`
`498`	`498`	`}`
`499`	`499`
`500`		`- if (PARAM.inp.device != "cpu")`
`501`		`- {`
`502`		`- ModuleBase::WARNING_QUIT("Input_Conv", "EXX PW works only with device=cpu");`
`503`		`- }`
`504`	`500`	`}`
`505`	`501`
`506`	`502`	`//----------------------------------------------------------`