Fix bugs

Critsium-xy · Critsium-xy · commit 91e8dc21bfb6 · 2025-01-14T01:57:10.000+08:00
diff --git a/source/module_base/blas_connector.cpp b/source/module_base/blas_connector.cpp
@@ -9,13 +9,10 @@
 #ifdef __CUDA
 #include <base/macros/macros.h>
 #include <cuda_runtime.h>
-#include <thrust/complex.h>
-#include <thrust/execution_policy.h>
-#include <thrust/inner_product.h>
-#include "module_base/tool_quit.h"
-#include "module_base/kernels/cuda/math_op.cu"
-
 #include "cublas_v2.h"
+#include "module_hsolver/kernels/math_kernel_op.h"
+#include "module_base/module_device/memory_op.h"
+
 
 namespace BlasUtils{
 
@@ -671,7 +668,7 @@ void vector_mul_vector(const int& dim, T* result, const T* vector1, const T* vec
 	}
 	else if (device_type == base_device::AbacusDevice_t::GpuDevice){
 #ifdef __CUDA
-		vector_mul_vector_gpu(dim, result, vector1, vector2);
+		hsolver::vector_mul_vector_op<T, base_device::DEVICE_GPU>()(gpu_ctx, dim, result, vector1, vector2);
 #endif
 	}
 }
@@ -691,7 +688,7 @@ void vector_div_vector(const int& dim, T* result, const T* vector1, const T* vec
 	}
 	else if (device_type == base_device::AbacusDevice_t::GpuDevice){
 #ifdef __CUDA
-		vector_mul_vector_gpu(dim, result, vector1, vector2);
+		hsolver::vector_div_vector_op<T, base_device::DEVICE_GPU>()(gpu_ctx, dim, result, vector1, vector2);
 #endif
 	}
 }
diff --git a/source/module_base/kernels/cuda/math_op.cu b/source/module_base/kernels/cuda/math_op.cu
@@ -1,12 +1,7 @@
-#include "cuda_runtime.h"
+#include <cuda_runtime.h>
 #include "module_base/kernels/math_op.h"
-#include "module_base/macros.h"
 
 #include <base/macros/macros.h>
-#include <cuda_runtime.h>
-#include <thrust/complex.h>
-#include <thrust/execution_policy.h>
-#include <thrust/inner_product.h>
 
 namespace ModuleBase {
 
@@ -159,134 +154,4 @@ void cal_ylm_real_op<FPTYPE, base_device::DEVICE_GPU>::operator()(const base_dev
 template struct cal_ylm_real_op<float, base_device::DEVICE_GPU>;
 template struct cal_ylm_real_op<double, base_device::DEVICE_GPU>;
 
-
-// The next are kernels for new blas_connector
-
-
-template <typename T>
-__global__ void vector_mul_vector_kernel(
-    const int size,
-    T* result,
-    const T* vector1,
-    const typename GetTypeReal<T>::type* vector2)
-{
-    int i = blockIdx.x * blockDim.x + threadIdx.x;
-    if (i < size)
-    {
-        result[i] = vector1[i] * vector2[i];
-    }
-}
-
-template <typename T>
-__global__ void vector_div_vector_kernel(
-    const int size,
-    T* result,
-    const T* vector1,
-    const typename GetTypeReal<T>::type* vector2)
-{
-    int i = blockIdx.x * blockDim.x + threadIdx.x;
-    if (i < size)
-    {
-        result[i] = vector1[i] / vector2[i];
-    }
-}
-
-template <typename FPTYPE>
-inline void vector_div_vector_complex_wrapper(const int dim,
-                                                std::complex<FPTYPE>* result,
-                                                const std::complex<FPTYPE>* vector,
-                                                const FPTYPE constant)
-{
-    thrust::complex<FPTYPE>* result_tmp = reinterpret_cast<thrust::complex<FPTYPE>*>(result);
-    const thrust::complex<FPTYPE>* vector1_tmp = reinterpret_cast<const thrust::complex<FPTYPE>*>(vector1);
-    int thread = THREADS_PER_BLOCK;
-    int block = (dim + thread - 1) / thread;
-    vector_div_vector_kernel<thrust::complex<FPTYPE>> <<<block, thread >>> (dim, result_tmp, vector1_tmp, vector2);
-
-    cudaCheckOnDebug();
-}
-
-template <typename FPTYPE>
-inline void vector_mul_vector_complex_wrapper(const int& dim,
-                                              std::complex<FPTYPE>* result,
-                                              const std::complex<FPTYPE>* vector1,
-                                              const FPTYPE* vector2)
-{
-    thrust::complex<FPTYPE>* result_tmp = reinterpret_cast<thrust::complex<FPTYPE>*>(result);
-    const thrust::complex<FPTYPE>* vector1_tmp = reinterpret_cast<const thrust::complex<FPTYPE>*>(vector1);
-    int thread = THREADS_PER_BLOCK;
-    int block = (dim + thread - 1) / thread;
-    vector_mul_vector_kernel<thrust::complex<FPTYPE>> <<<block, thread >>> (dim, result_tmp, vector1_tmp, vector2);
-
-    cudaCheckOnDebug();
-}
-
-void vector_div_vector_gpu(const int& dim,
-                            double* result,
-                            const double* vector1,
-                            const double* vector2)
-{
-    int thread = THREADS_PER_BLOCK;
-    int block = (dim + thread - 1) / thread;
-    vector_div_vector_kernel<double> <<<block, thread >>> (dim, result, vector1, vector2);
-
-    cudaCheckOnDebug();
-}
-
-void vector_div_vector_gpu(const int& dim,
-                            float* result,
-                            const float* vector1,
-                            const float* vector2)
-{
-    int thread = THREADS_PER_BLOCK;
-    int block = (dim + thread - 1) / thread;
-    vector_div_vector_kernel<float> <<<block, thread >>> (dim, result, vector1, vector2);
-
-    cudaCheckOnDebug();
-}
-
-void vector_div_vector_gpu(const int& dim, std::complex<float>* result, const std::complex<float>* vector1, const float* vector2)
-{
-    vector_div_vector_complex_wrapper(dim, result, vector1, vector2);
-}
-
-void vector_div_vector_gpu(const int& dim, std::complex<double>* result, const std::complex<double>* vector1, const double* vector2)
-{
-    vector_div_vector_complex_wrapper(dim, result, vector1, vector2);
-}
-
-void vector_mul_vector_gpu(const int& dim,
-                            double* result,
-                            const double* vector1,
-                            const double* vector2)
-{
-    int thread = THREADS_PER_BLOCK;
-    int block = (dim + thread - 1) / thread;
-    vector_mul_vector_kernel<double> <<<block, thread >>> (dim, result, vector1, vector2);
-
-    cudaCheckOnDebug();
-}
-
-void vector_mul_vector_gpu(const int& dim,
-                            float* result,
-                            const float* vector1,
-                            const float* vector2)
-{
-    int thread = THREADS_PER_BLOCK;
-    int block = (dim + thread - 1) / thread;
-    vector_mul_vector_kernel<float> <<<block, thread >>> (dim, result, vector1, vector2);
-
-    cudaCheckOnDebug();
-}
-
-void vector_mul_vector_gpu(const int& dim, std::complex<float>* result, const std::complex<float>* vector1, const float* vector2)
-{
-    vector_mul_vector_complex_wrapper(dim, result, vector1, vector2);
-}
-
-void vector_mul_vector_gpu(const int& dim, std::complex<double>* result, const std::complex<double>* vector1, const double* vector2)
-{
-    vector_mul_vector_complex_wrapper(dim, result, vector1, vector2);
-}
-
 }  // namespace ModuleBase

Original file line number	Diff line number	Diff line change
`@@ -9,13 +9,10 @@`
`9`	`9`	`#ifdef __CUDA`
`10`	`10`	`#include <base/macros/macros.h>`
`11`	`11`	`#include <cuda_runtime.h>`
`12`		`-#include <thrust/complex.h>`
`13`		`-#include <thrust/execution_policy.h>`
`14`		`-#include <thrust/inner_product.h>`
`15`		`-#include "module_base/tool_quit.h"`
`16`		`-#include "module_base/kernels/cuda/math_op.cu"`
`17`		`-`
`18`	`12`	`#include "cublas_v2.h"`
	`13`	`+#include "module_hsolver/kernels/math_kernel_op.h"`
	`14`	`+#include "module_base/module_device/memory_op.h"`
	`15`	`+`
`19`	`16`
`20`	`17`	`namespace BlasUtils{`
`21`	`18`
`@@ -671,7 +668,7 @@ void vector_mul_vector(const int& dim, T* result, const T* vector1, const T* vec`
`671`	`668`	`}`
`672`	`669`	`else if (device_type == base_device::AbacusDevice_t::GpuDevice){`
`673`	`670`	`#ifdef __CUDA`
`674`		`- vector_mul_vector_gpu(dim, result, vector1, vector2);`
	`671`	`+ hsolver::vector_mul_vector_op<T, base_device::DEVICE_GPU>()(gpu_ctx, dim, result, vector1, vector2);`
`675`	`672`	`#endif`
`676`	`673`	`}`
`677`	`674`	`}`
`@@ -691,7 +688,7 @@ void vector_div_vector(const int& dim, T* result, const T* vector1, const T* vec`
`691`	`688`	`}`
`692`	`689`	`else if (device_type == base_device::AbacusDevice_t::GpuDevice){`
`693`	`690`	`#ifdef __CUDA`
`694`		`- vector_mul_vector_gpu(dim, result, vector1, vector2);`
	`691`	`+ hsolver::vector_div_vector_op<T, base_device::DEVICE_GPU>()(gpu_ctx, dim, result, vector1, vector2);`
`695`	`692`	`#endif`
`696`	`693`	`}`
`697`	`694`	`}`