Remove ctx in axpy_op

Critsium-xy · Critsium-xy · commit 3540ea29b297 · 2025-02-12T13:05:19.000+08:00
diff --git a/source/module_base/kernels/cuda/math_kernel_op.cu b/source/module_base/kernels/cuda/math_kernel_op.cu
@@ -665,8 +665,7 @@ void constantvector_addORsub_constantVector_op<T, base_device::DEVICE_GPU>::oper
 }
 
 template <>
-void axpy_op<double, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* d,
-                                                          const int& N,
+void axpy_op<double, base_device::DEVICE_GPU>::operator()(const int& N,
                                                           const double* alpha,
                                                           const double* X,
                                                           const int& incX,
@@ -677,8 +676,7 @@ void axpy_op<double, base_device::DEVICE_GPU>::operator()(const base_device::DEV
 }
 
 template <>
-void axpy_op<std::complex<float>, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* d,
-                                                                       const int& N,
+void axpy_op<std::complex<float>, base_device::DEVICE_GPU>::operator()(const int& N,
                                                                        const std::complex<float>* alpha,
                                                                        const std::complex<float>* X,
                                                                        const int& incX,
@@ -689,8 +687,7 @@ void axpy_op<std::complex<float>, base_device::DEVICE_GPU>::operator()(const bas
 }
 
 template <>
-void axpy_op<std::complex<double>, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* d,
-                                                                        const int& N,
+void axpy_op<std::complex<double>, base_device::DEVICE_GPU>::operator()(const int& N,
                                                                         const std::complex<double>* alpha,
                                                                         const std::complex<double>* X,
                                                                         const int& incX,
diff --git a/source/module_base/kernels/math_kernel_op.cpp b/source/module_base/kernels/math_kernel_op.cpp
@@ -251,8 +251,7 @@ struct gemv_op<T, base_device::DEVICE_CPU>
 template <typename T>
 struct axpy_op<T, base_device::DEVICE_CPU>
 {
-    void operator()(const base_device::DEVICE_CPU* /*ctx*/,
-                    const int& dim,
+    void operator()(const int& dim,
                     const T* alpha,
                     const T* X,
                     const int& incX,
diff --git a/source/module_base/kernels/math_kernel_op.h b/source/module_base/kernels/math_kernel_op.h
@@ -190,7 +190,6 @@ template <typename T, typename Device> struct axpy_op {
   /// @brief Y = alpha * X + Y
   ///
   /// Input Parameters
-  /// \param d : the type of computing device
   /// \param N : array size
   /// \param alpha : input constant alpha
   /// \param X : input array X
@@ -200,7 +199,7 @@ template <typename T, typename Device> struct axpy_op {
   ///
   /// Output Parameters
   /// \param Y : output array Y
-  void operator()(const Device *d, const int &N, const T *alpha, const T *X,
+  void operator()(const int &N, const T *alpha, const T *X,
                   const int &incX, T *Y, const int &incY);
 };
 
diff --git a/source/module_base/kernels/rocm/math_kernel_op.hip.cu b/source/module_base/kernels/rocm/math_kernel_op.hip.cu
@@ -589,8 +589,7 @@ void constantvector_addORsub_constantVector_op<T, base_device::DEVICE_GPU>::oper
 }
 
 template <>
-void axpy_op<double, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* d,
-                                                          const int& N,
+void axpy_op<double, base_device::DEVICE_GPU>::operator()(const int& N,
                                                           const double* alpha,
                                                           const double* X,
                                                           const int& incX,
@@ -601,8 +600,7 @@ void axpy_op<double, base_device::DEVICE_GPU>::operator()(const base_device::DEV
 }
 
 template <>
-void axpy_op<std::complex<float>, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* d,
-                                                                       const int& N,
+void axpy_op<std::complex<float>, base_device::DEVICE_GPU>::operator()(const int& N,
                                                                        const std::complex<float>* alpha,
                                                                        const std::complex<float>* X,
                                                                        const int& incX,
@@ -613,8 +611,7 @@ void axpy_op<std::complex<float>, base_device::DEVICE_GPU>::operator()(const bas
 }
 
 template <>
-void axpy_op<std::complex<double>, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* d,
-                                                                        const int& N,
+void axpy_op<std::complex<double>, base_device::DEVICE_GPU>::operator()(const int& N,
                                                                         const std::complex<double>* alpha,
                                                                         const std::complex<double>* X,
                                                                         const int& incX,
diff --git a/source/module_base/kernels/test/math_kernel_test.cpp b/source/module_base/kernels/test/math_kernel_test.cpp
@@ -312,7 +312,7 @@ TEST_F(TestModuleHsolverMathKernel, constantvector_addORsub_constantVector_op_cp
 
 TEST_F(TestModuleHsolverMathKernel, axpy_op_cpu)
 {
-    axpy_op_cpu()(cpu_ctx, dim, &alpha_axpy, X_axpy.data(), 1, Y_axpy.data(), 1);
+    axpy_op_cpu()(dim, &alpha_axpy, X_axpy.data(), 1, Y_axpy.data(), 1);
     for (int i = 0; i < input.size(); i++)
     {
         EXPECT_LT(fabs(Y_axpy[i].imag() - output_axpy_op[i].imag()), 1e-8);
@@ -536,7 +536,7 @@ TEST_F(TestModuleHsolverMathKernel, axpy_op_gpu)
 
     // run
     ModuleBase::createGpuBlasHandle();
-    axpy_op_gpu()(gpu_ctx, dim, &alpha_axpy, X_axpy_dev, 1, Y_axpy_dev, 1);
+    axpy_op_gpu()(dim, &alpha_axpy, X_axpy_dev, 1, Y_axpy_dev, 1);
     ModuleBase::destoryBLAShandle();
 
     // syn the output data in GPU to CPU
diff --git a/source/module_hsolver/diago_cg.cpp b/source/module_hsolver/diago_cg.cpp
@@ -386,7 +386,7 @@ void DiagoCG<T, Device>::calc_gamma_cg(const int& iter,
         {
             pcg[i] -= norma * pphi_m[i];
         }*/
-        ModuleBase::axpy_op<T, Device>()(ctx_, this->n_basis_, &znorma, phi_m.data<T>(), 1, cg.data<T>(), 1);
+        ModuleBase::axpy_op<T, Device>()(this->n_basis_, &znorma, phi_m.data<T>(), 1, cg.data<T>(), 1);
     }
 }
 
diff --git a/source/module_hsolver/kernels/test/perf_math_kernel.cpp b/source/module_hsolver/kernels/test/perf_math_kernel.cpp
@@ -193,7 +193,7 @@ BENCHMARK_DEFINE_F(PerfModuleHsolverMathKernel, BM_constantvector_addORsub_const
 
 BENCHMARK_DEFINE_F(PerfModuleHsolverMathKernel, BM_axpy_op_cpu)(benchmark::State& state) {
     for (auto _ : state) {
-        axpy_op_cpu()(cpu_ctx, dim_vector, &zconstant_a, test_zvector_a, 1 ,test_zvector_b, 1);
+        axpy_op_cpu()(dim_vector, &zconstant_a, test_zvector_a, 1 ,test_zvector_b, 1);
     }
 }
 
@@ -262,7 +262,7 @@ BENCHMARK_DEFINE_F(PerfModuleHsolverMathKernel, BM_constantvector_addORsub_const
 
 BENCHMARK_DEFINE_F(PerfModuleHsolverMathKernel, BM_axpy_op_gpu)(benchmark::State& state) {
     for (auto _ : state) {
-        axpy_op_gpu()(gpu_ctx, dim_vector, &zconstant_a, test_zvector_a_gpu, 1 ,test_zvector_b_gpu, 1);
+        axpy_op_gpu()(dim_vector, &zconstant_a, test_zvector_a_gpu, 1 ,test_zvector_b_gpu, 1);
     }
 }
 
diff --git a/source/module_hsolver/para_linear_transform.cpp b/source/module_hsolver/para_linear_transform.cpp
@@ -141,7 +141,7 @@ void PLinearTransform<T, Device>::act(const T alpha, const T* A, const T* U, con
             }
             // sum all the results
             T one = 1.0;
-            ModuleBase::axpy_op<T, Device>()(ctx, ncolB * LDA, &one, B_tmp, 1, B, 1);
+            ModuleBase::axpy_op<T, Device>()(ncolB * LDA, &one, B_tmp, 1, B, 1);
         }
         delmem_dev_op()(U_tmp);
         delmem_dev_op()(B_tmp);

Original file line number	Diff line number	Diff line change
`@@ -312,7 +312,7 @@ TEST_F(TestModuleHsolverMathKernel, constantvector_addORsub_constantVector_op_cp`
`312`	`312`
`313`	`313`	`TEST_F(TestModuleHsolverMathKernel, axpy_op_cpu)`
`314`	`314`	`{`
`315`		`- axpy_op_cpu()(cpu_ctx, dim, &alpha_axpy, X_axpy.data(), 1, Y_axpy.data(), 1);`
	`315`	`+ axpy_op_cpu()(dim, &alpha_axpy, X_axpy.data(), 1, Y_axpy.data(), 1);`
`316`	`316`	`for (int i = 0; i < input.size(); i++)`
`317`	`317`	`{`
`318`	`318`	`EXPECT_LT(fabs(Y_axpy[i].imag() - output_axpy_op[i].imag()), 1e-8);`
`@@ -536,7 +536,7 @@ TEST_F(TestModuleHsolverMathKernel, axpy_op_gpu)`
`536`	`536`
`537`	`537`	`// run`
`538`	`538`	`ModuleBase::createGpuBlasHandle();`
`539`		`- axpy_op_gpu()(gpu_ctx, dim, &alpha_axpy, X_axpy_dev, 1, Y_axpy_dev, 1);`
	`539`	`+ axpy_op_gpu()(dim, &alpha_axpy, X_axpy_dev, 1, Y_axpy_dev, 1);`
`540`	`540`	`ModuleBase::destoryBLAShandle();`
`541`	`541`
`542`	`542`	`// syn the output data in GPU to CPU`
Original file line number	Diff line number	Diff line change
`@@ -386,7 +386,7 @@ void DiagoCG<T, Device>::calc_gamma_cg(const int& iter,`
`386`	`386`	`{`
`387`	`387`	`pcg[i] -= norma * pphi_m[i];`
`388`	`388`	`}*/`
`389`		`- ModuleBase::axpy_op<T, Device>()(ctx_, this->n_basis_, &znorma, phi_m.data<T>(), 1, cg.data<T>(), 1);`
	`389`	`+ ModuleBase::axpy_op<T, Device>()(this->n_basis_, &znorma, phi_m.data<T>(), 1, cg.data<T>(), 1);`
`390`	`390`	`}`
`391`	`391`	`}`
`392`	`392`
Original file line number	Diff line number	Diff line change
`@@ -193,7 +193,7 @@ BENCHMARK_DEFINE_F(PerfModuleHsolverMathKernel, BM_constantvector_addORsub_const`
`193`	`193`
`194`	`194`	`BENCHMARK_DEFINE_F(PerfModuleHsolverMathKernel, BM_axpy_op_cpu)(benchmark::State& state) {`
`195`	`195`	`for (auto _ : state) {`
`196`		`- axpy_op_cpu()(cpu_ctx, dim_vector, &zconstant_a, test_zvector_a, 1 ,test_zvector_b, 1);`
	`196`	`+ axpy_op_cpu()(dim_vector, &zconstant_a, test_zvector_a, 1 ,test_zvector_b, 1);`
`197`	`197`	`}`
`198`	`198`	`}`
`199`	`199`
`@@ -262,7 +262,7 @@ BENCHMARK_DEFINE_F(PerfModuleHsolverMathKernel, BM_constantvector_addORsub_const`
`262`	`262`
`263`	`263`	`BENCHMARK_DEFINE_F(PerfModuleHsolverMathKernel, BM_axpy_op_gpu)(benchmark::State& state) {`
`264`	`264`	`for (auto _ : state) {`
`265`		`- axpy_op_gpu()(gpu_ctx, dim_vector, &zconstant_a, test_zvector_a_gpu, 1 ,test_zvector_b_gpu, 1);`
	`265`	`+ axpy_op_gpu()(dim_vector, &zconstant_a, test_zvector_a_gpu, 1 ,test_zvector_b_gpu, 1);`
`266`	`266`	`}`
`267`	`267`	`}`
`268`	`268`
Original file line number	Diff line number	Diff line change
`@@ -141,7 +141,7 @@ void PLinearTransform<T, Device>::act(const T alpha, const T* A, const T* U, con`
`141`	`141`	`}`
`142`	`142`	`// sum all the results`
`143`	`143`	`T one = 1.0;`
`144`		`- ModuleBase::axpy_op<T, Device>()(ctx, ncolB * LDA, &one, B_tmp, 1, B, 1);`
	`144`	`+ ModuleBase::axpy_op<T, Device>()(ncolB * LDA, &one, B_tmp, 1, B, 1);`
`145`	`145`	`}`
`146`	`146`	`delmem_dev_op()(U_tmp);`
`147`	`147`	`delmem_dev_op()(B_tmp);`