deepmodeling
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 1 deletion b/‎.gitignore‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎CMakeLists.txt‎
Lines changed: 12 additions & 2 deletions b/‎CMakeLists.txt‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎docs/quick_start/easy_install.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/quick_start/easy_install.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎source/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎source/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎source/Makefile.Objects‎
Lines changed: 3 additions & 1 deletion b/‎source/Makefile.Objects‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎source/module_base/blas_connector.cpp‎
Lines changed: 4 additions & 4 deletions b/‎source/module_base/blas_connector.cpp‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎source/module_base/kernels/cuda/math_kernel_op_vec.cu‎
Lines changed: 5 additions & 5 deletions b/‎source/module_base/kernels/cuda/math_kernel_op_vec.cu‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎source/module_base/kernels/math_kernel_op.h‎
Lines changed: 2 additions & 2 deletions b/‎source/module_base/kernels/math_kernel_op.h‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎source/module_base/kernels/math_kernel_op_vec.cpp‎
Lines changed: 4 additions & 4 deletions b/‎source/module_base/kernels/math_kernel_op_vec.cpp‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎source/module_base/kernels/rocm/math_kernel_op_vec.hip.cu‎
Lines changed: 6 additions & 6 deletions b/‎source/module_base/kernels/rocm/math_kernel_op_vec.hip.cu‎
Lines changed: 6 additions & 6 deletions
@@ -21,4 +21,6 @@ time.json
 *.pyc
 __pycache__
 abacus.json
-*.npy
+*.npy
+toolchain/install/
+toolchain/abacus_env.sh
@@ -352,9 +352,19 @@ if(USE_CUDA)
     endif()
     if (ENABLE_CUSOLVERMP)
       add_compile_definitions(__CUSOLVERMP)
+      find_library(CAL_LIBRARY
+          NAMES cal
+          PATHS ${CAL_CUSOLVERMP_PATH}
+          NO_DEFAULT_PATH
+      )
+      find_library(CUSOLVERMP_LIBRARY
+          NAMES cusolverMp
+          PATHS ${CAL_CUSOLVERMP_PATH}
+          NO_DEFAULT_PATH
+      )
       target_link_libraries(${ABACUS_BIN_NAME}
-          cal
-          cusolverMp
+          ${CAL_LIBRARY}
+          ${CUSOLVERMP_LIBRARY}
       )
     endif()
   endif()
 
@@ -106,7 +106,7 @@ Here, 'build' is the path for building ABACUS; and '-D' is used for setting up s
 - `CMAKE_INSTALL_PREFIX`: the path of ABACUS binary to install; `/usr/local/bin/abacus` by default
 - Compilers
   - `CMAKE_CXX_COMPILER`: C++ compiler; usually `g++`(GNU C++ compiler) or `icpx`(Intel C++ compiler). Can also set from environment variable `CXX`. It is OK to use MPI compiler here.
-  - `MPI_CXX_COMPILER`: MPI wrapper for C++ compiler; usually `mpicxx` or `mpiicpc`(for Intel MPI).
+  - `MPI_CXX_COMPILER`: MPI wrapper for C++ compiler; usually `mpicxx` or `mpiicpx`(for Intel toolkits) or `mpiicpc`(for classic Intel Compiler Classic MPI before 2024.0).
 - Requirements: Unless indicated, CMake will try to find under default paths.
   - `MKLROOT`: If environment variable `MKLROOT` exists, `cmake` will take MKL as a preference, i.e. not using `LAPACK`, `ScaLAPACK` and `FFTW`. To disable MKL, unset environment variable `MKLROOT`, or pass `-DMKLROOT=OFF` to `cmake`.
   - `LAPACK_DIR`: Path to OpenBLAS library `libopenblas.so`(including BLAS and LAPACK)
@@ -136,7 +136,7 @@ Here, 'build' is the path for building ABACUS; and '-D' is used for setting up s
 Here is an example:
 
 ```bash
-CXX=mpiicpc cmake -B build -DCMAKE_INSTALL_PREFIX=~/abacus -DELPA_DIR=~/elpa-2016.05.004/build -DCEREAL_INCLUDE_DIR=~/cereal/include
+CXX=mpiicpx cmake -B build -DCMAKE_INSTALL_PREFIX=~/abacus -DELPA_DIR=~/elpa-2025.01.001/build -DCEREAL_INCLUDE_DIR=~/cereal/include
 ```
 
 ## Build and Install
 
@@ -104,7 +104,7 @@ if(USE_ROCM)
     module_hamilt_pw/hamilt_pwdft/kernels/rocm/wf_op.hip.cu
     module_hamilt_pw/hamilt_pwdft/kernels/rocm/vnl_op.hip.cu
     module_base/kernels/rocm/math_kernel_op.hip.cu
-    module_base/kernels/rocm/math_kernel_op.hip_vec.cu
+    module_base/kernels/rocm/math_kernel_op_vec.hip.cu
     module_base/kernels/rocm/math_ylm_op.hip.cu
     module_hamilt_general/module_xc/kernels/rocm/xc_functional_op.hip.cu
   )
 
@@ -443,8 +443,10 @@ OBJS_RELAXATION=bfgs_basic.o\
     lattice_change_methods.o\
     relax_old.o\
     relax.o\
-    line_search.o\
     bfgs.o\
+    lbfgs.o\
+    matrix_methods.o\
+    line_search.o\
 
 
 OBJS_SURCHEM=surchem.o\
 
@@ -820,7 +820,7 @@ void vector_add_vector(const int& dim, float *result, const float *vector1, cons
 	}
 	else if (device_type == base_device::GpuDevice){
 #ifdef __CUDA
-		ModuleBase::constantvector_addORsub_constantVector_op<float, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2);
+		ModuleBase::vector_add_vector_op<float, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2);
 #endif
 	}
 }
@@ -838,7 +838,7 @@ void vector_add_vector(const int& dim, double *result, const double *vector1, co
 	}
 	else if (device_type == base_device::GpuDevice){
 #ifdef __CUDA
-		ModuleBase::constantvector_addORsub_constantVector_op<double, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2);
+		ModuleBase::vector_add_vector_op<double, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2);
 #endif
 	}
 }
@@ -856,7 +856,7 @@ void vector_add_vector(const int& dim, std::complex<float> *result, const std::c
 	}
 	else if (device_type == base_device::GpuDevice){
 #ifdef __CUDA
-		ModuleBase::constantvector_addORsub_constantVector_op<std::complex<float>, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2);
+		ModuleBase::vector_add_vector_op<std::complex<float>, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2);
 #endif
 	}
 }
@@ -874,7 +874,7 @@ void vector_add_vector(const int& dim, std::complex<double> *result, const std::
 	}
 	else if (device_type == base_device::GpuDevice){
 #ifdef __CUDA
-		ModuleBase::constantvector_addORsub_constantVector_op<std::complex<double>, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2);
+		ModuleBase::vector_add_vector_op<std::complex<double>, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2);
 #endif
 	}
 }
@@ -225,7 +225,7 @@ void vector_div_vector_op<std::complex<double>, base_device::DEVICE_GPU>::operat
 
 // vector operator: result[i] = vector1[i] * constant1 + vector2[i] * constant2
 template <typename T>
-void constantvector_addORsub_constantVector_op<T, base_device::DEVICE_GPU>::operator()(const int& dim,
+void vector_add_vector_op<T, base_device::DEVICE_GPU>::operator()(const int& dim,
                                                                                        T* result,
                                                                                        const T* vector1,
                                                                                        const Real constant1,
@@ -314,10 +314,10 @@ template struct vector_div_vector_op<std::complex<float>, base_device::DEVICE_GP
 template struct vector_div_vector_op<double, base_device::DEVICE_GPU>;
 template struct vector_div_vector_op<std::complex<double>, base_device::DEVICE_GPU>;
 
-template struct constantvector_addORsub_constantVector_op<float, base_device::DEVICE_GPU>;
-template struct constantvector_addORsub_constantVector_op<std::complex<float>, base_device::DEVICE_GPU>;
-template struct constantvector_addORsub_constantVector_op<double, base_device::DEVICE_GPU>;
-template struct constantvector_addORsub_constantVector_op<std::complex<double>, base_device::DEVICE_GPU>;
+template struct vector_add_vector_op<float, base_device::DEVICE_GPU>;
+template struct vector_add_vector_op<std::complex<float>, base_device::DEVICE_GPU>;
+template struct vector_add_vector_op<double, base_device::DEVICE_GPU>;
+template struct vector_add_vector_op<std::complex<double>, base_device::DEVICE_GPU>;
 
 template struct dot_real_op<std::complex<float>, base_device::DEVICE_GPU>;
 template struct dot_real_op<double, base_device::DEVICE_GPU>;
 
@@ -134,7 +134,7 @@ template <typename T, typename Device> struct axpy_op {
 
 // vector operator: result[i] = vector1[i] * constant1 + vector2[i] * constant2
 template <typename T, typename Device>
-struct constantvector_addORsub_constantVector_op {
+struct vector_add_vector_op {
   using Real = typename GetTypeReal<T>::type;
   /// @brief result[i] = vector1[i] * constant1 + vector2[i] * constant2
   ///
@@ -315,7 +315,7 @@ template <typename T> struct vector_div_vector_op<T, base_device::DEVICE_GPU> {
 
 // vector operator: result[i] = vector1[i] * constant1 + vector2[i] * constant2
 template <typename T>
-struct constantvector_addORsub_constantVector_op<T, base_device::DEVICE_GPU> {
+struct vector_add_vector_op<T, base_device::DEVICE_GPU> {
   using Real = typename GetTypeReal<T>::type;
   void operator()(const int &dim, T *result,
                   const T *vector1, const Real constant1, const T *vector2,
 
@@ -92,7 +92,7 @@ struct axpy_op<T, base_device::DEVICE_CPU>
 
 
 template <typename T>
-struct constantvector_addORsub_constantVector_op<T, base_device::DEVICE_CPU>
+struct vector_add_vector_op<T, base_device::DEVICE_CPU>
 {
     using Real = typename GetTypeReal<T>::type;
     void operator()(const int& dim,
@@ -167,9 +167,9 @@ template struct axpy_op<std::complex<float>, base_device::DEVICE_CPU>;
 template struct axpy_op<std::complex<double>, base_device::DEVICE_CPU>;
 template struct axpy_op<double, base_device::DEVICE_CPU>;
 
-template struct constantvector_addORsub_constantVector_op<std::complex<float>, base_device::DEVICE_CPU>;
-template struct constantvector_addORsub_constantVector_op<double, base_device::DEVICE_CPU>;
-template struct constantvector_addORsub_constantVector_op<std::complex<double>, base_device::DEVICE_CPU>;
+template struct vector_add_vector_op<std::complex<float>, base_device::DEVICE_CPU>;
+template struct vector_add_vector_op<double, base_device::DEVICE_CPU>;
+template struct vector_add_vector_op<std::complex<double>, base_device::DEVICE_CPU>;
 
 template struct dot_real_op<std::complex<float>, base_device::DEVICE_CPU>;
 template struct dot_real_op<std::complex<double>, base_device::DEVICE_CPU>;
 
@@ -87,7 +87,7 @@ void vector_mul_real_op<double, base_device::DEVICE_GPU>::operator()(const int d
 {
     int thread = 1024;
     int block = (dim + thread - 1) / thread;
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(vector_div_constant_kernel<double>),
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(vector_mul_real_kernel<double>),
                        dim3(block),
                        dim3(thread),
                        0,
@@ -275,7 +275,7 @@ void vector_div_vector_op<std::complex<double>, base_device::DEVICE_GPU>::operat
 
 // vector operator: result[i] = vector1[i] * constant1 + vector2[i] * constant2
 template <typename T>
-void constantvector_addORsub_constantVector_op<T, base_device::DEVICE_GPU>::operator()(const int& dim,
+void vector_add_vector_op<T, base_device::DEVICE_GPU>::operator()(const int& dim,
                                                                                        T* result,
                                                                                        const T* vector1,
                                                                                        const Real constant1,
@@ -365,10 +365,10 @@ template struct vector_div_vector_op<std::complex<float>, base_device::DEVICE_GP
 template struct vector_div_vector_op<double, base_device::DEVICE_GPU>;
 template struct vector_div_vector_op<std::complex<double>, base_device::DEVICE_GPU>;
 
-template struct constantvector_addORsub_constantVector_op<float, base_device::DEVICE_GPU>;
-template struct constantvector_addORsub_constantVector_op<std::complex<float>, base_device::DEVICE_GPU>;
-template struct constantvector_addORsub_constantVector_op<double, base_device::DEVICE_GPU>;
-template struct constantvector_addORsub_constantVector_op<std::complex<double>, base_device::DEVICE_GPU>;
+template struct vector_add_vector_op<float, base_device::DEVICE_GPU>;
+template struct vector_add_vector_op<std::complex<float>, base_device::DEVICE_GPU>;
+template struct vector_add_vector_op<double, base_device::DEVICE_GPU>;
+template struct vector_add_vector_op<std::complex<double>, base_device::DEVICE_GPU>;
 
 template struct dot_real_op<std::complex<float>, base_device::DEVICE_GPU>;
 template struct dot_real_op<double, base_device::DEVICE_GPU>;
Original file line number	Diff line number	Diff line change
`@@ -104,7 +104,7 @@ if(USE_ROCM)`
`104`	`104`	`module_hamilt_pw/hamilt_pwdft/kernels/rocm/wf_op.hip.cu`
`105`	`105`	`module_hamilt_pw/hamilt_pwdft/kernels/rocm/vnl_op.hip.cu`
`106`	`106`	`module_base/kernels/rocm/math_kernel_op.hip.cu`
`107`		`- module_base/kernels/rocm/math_kernel_op.hip_vec.cu`
	`107`	`+ module_base/kernels/rocm/math_kernel_op_vec.hip.cu`
`108`	`108`	`module_base/kernels/rocm/math_ylm_op.hip.cu`
`109`	`109`	`module_hamilt_general/module_xc/kernels/rocm/xc_functional_op.hip.cu`
`110`	`110`	`)`
Original file line number	Diff line number	Diff line change
`@@ -820,7 +820,7 @@ void vector_add_vector(const int& dim, float result, const float vector1, cons`
`820`	`820`	`}`
`821`	`821`	`else if (device_type == base_device::GpuDevice){`
`822`	`822`	`#ifdef __CUDA`
`823`		`- ModuleBase::constantvector_addORsub_constantVector_op<float, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2);`
	`823`	`+ ModuleBase::vector_add_vector_op<float, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2);`
`824`	`824`	`#endif`
`825`	`825`	`}`
`826`	`826`	`}`
`@@ -838,7 +838,7 @@ void vector_add_vector(const int& dim, double result, const double vector1, co`
`838`	`838`	`}`
`839`	`839`	`else if (device_type == base_device::GpuDevice){`
`840`	`840`	`#ifdef __CUDA`
`841`		`- ModuleBase::constantvector_addORsub_constantVector_op<double, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2);`
	`841`	`+ ModuleBase::vector_add_vector_op<double, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2);`
`842`	`842`	`#endif`
`843`	`843`	`}`
`844`	`844`	`}`
`@@ -856,7 +856,7 @@ void vector_add_vector(const int& dim, std::complex<float> *result, const std::c`
`856`	`856`	`}`
`857`	`857`	`else if (device_type == base_device::GpuDevice){`
`858`	`858`	`#ifdef __CUDA`
`859`		`- ModuleBase::constantvector_addORsub_constantVector_op<std::complex<float>, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2);`
	`859`	`+ ModuleBase::vector_add_vector_op<std::complex<float>, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2);`
`860`	`860`	`#endif`
`861`	`861`	`}`
`862`	`862`	`}`
`@@ -874,7 +874,7 @@ void vector_add_vector(const int& dim, std::complex<double> *result, const std::`
`874`	`874`	`}`
`875`	`875`	`else if (device_type == base_device::GpuDevice){`
`876`	`876`	`#ifdef __CUDA`
`877`		`- ModuleBase::constantvector_addORsub_constantVector_op<std::complex<double>, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2);`
	`877`	`+ ModuleBase::vector_add_vector_op<std::complex<double>, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2);`
`878`	`878`	`#endif`
`879`	`879`	`}`
`880`	`880`	`}`