Remove ctx in vector_mul_vector_op

Critsium-xy · Critsium-xy · commit a7766bf769ec · 2025-02-11T17:29:34.000+08:00
diff --git a/source/module_base/blas_connector.cpp b/source/module_base/blas_connector.cpp
@@ -668,7 +668,7 @@ void vector_mul_vector(const int& dim, T* result, const T* vector1, const T* vec
 	}
 	else if (device_type == base_device::AbacusDevice_t::GpuDevice){
 #ifdef __CUDA
-		ModuleBase::vector_mul_vector_op<T, base_device::DEVICE_GPU>()(gpu_ctx, dim, result, vector1, vector2);
+		ModuleBase::vector_mul_vector_op<T, base_device::DEVICE_GPU>()(dim, result, vector1, vector2);
 #endif
 	}
 }
diff --git a/source/module_base/kernels/cuda/math_kernel_op.cu b/source/module_base/kernels/cuda/math_kernel_op.cu
@@ -552,8 +552,7 @@ void vector_div_constant_op<std::complex<double>, base_device::DEVICE_GPU>::oper
 }
 // vector operator: result[i] = vector1[i](not complex) * vector2[i](not complex)
 template <>
-void vector_mul_vector_op<double, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* d,
-                                                                       const int& dim,
+void vector_mul_vector_op<double, base_device::DEVICE_GPU>::operator()(const int& dim,
                                                                        double* result,
                                                                        const double* vector1,
                                                                        const double* vector2)
@@ -566,8 +565,7 @@ void vector_mul_vector_op<double, base_device::DEVICE_GPU>::operator()(const bas
 }
 // vector operator: result[i] = vector1[i](complex) * vector2[i](not complex)
 template <typename FPTYPE>
-inline void vector_mul_vector_complex_wrapper(const base_device::DEVICE_GPU* d,
-                                              const int& dim,
+inline void vector_mul_vector_complex_wrapper(const int& dim,
                                               std::complex<FPTYPE>* result,
                                               const std::complex<FPTYPE>* vector1,
                                               const FPTYPE* vector2)
@@ -581,23 +579,21 @@ inline void vector_mul_vector_complex_wrapper(const base_device::DEVICE_GPU* d,
     cudaCheckOnDebug();
 }
 template <>
-void vector_mul_vector_op<std::complex<float>, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* d,
-                                                                                    const int& dim,
+void vector_mul_vector_op<std::complex<float>, base_device::DEVICE_GPU>::operator()(const int& dim,
                                                                                     std::complex<float>* result,
                                                                                     const std::complex<float>* vector1,
                                                                                     const float* vector2)
 {
-    vector_mul_vector_complex_wrapper(d, dim, result, vector1, vector2);
+    vector_mul_vector_complex_wrapper(dim, result, vector1, vector2);
 }
 template <>
 void vector_mul_vector_op<std::complex<double>, base_device::DEVICE_GPU>::operator()(
-    const base_device::DEVICE_GPU* d,
     const int& dim,
     std::complex<double>* result,
     const std::complex<double>* vector1,
     const double* vector2)
 {
-    vector_mul_vector_complex_wrapper(d, dim, result, vector1, vector2);
+    vector_mul_vector_complex_wrapper(dim, result, vector1, vector2);
 }
 
 // vector operator: result[i] = vector1[i](not complex) / vector2[i](not complex)
diff --git a/source/module_base/kernels/math_kernel_op.cpp b/source/module_base/kernels/math_kernel_op.cpp
@@ -167,7 +167,7 @@ template <typename T>
 struct vector_mul_vector_op<T, base_device::DEVICE_CPU>
 {
     using Real = typename GetTypeReal<T>::type;
-    void operator()(const base_device::DEVICE_CPU* d, const int& dim, T* result, const T* vector1, const Real* vector2)
+    void operator()(const int& dim, T* result, const T* vector1, const Real* vector2)
     {
 #ifdef _OPENMP
 #pragma omp parallel for schedule(static, 4096 / sizeof(Real))
diff --git a/source/module_base/kernels/math_kernel_op.h b/source/module_base/kernels/math_kernel_op.h
@@ -140,14 +140,13 @@ template <typename T, typename Device> struct vector_mul_vector_op {
   /// @brief result[i] = vector1[i](complex) * vector2[i](not complex)
   ///
   /// Input Parameters
-  /// \param d : the type of computing device
   /// \param dim : array size
   /// \param vector1 : input array A
   /// \param vector2 : input array B
   ///
   /// Output Parameters
   /// \param result : output array
-  void operator()(const Device *d, const int &dim, T *result, const T *vector1,
+  void operator()(const int &dim, T *result, const T *vector1,
                   const Real *vector2);
 };
 
@@ -359,7 +358,7 @@ struct vector_div_constant_op<T, base_device::DEVICE_GPU> {
 // vector operator: result[i] = vector1[i](complex) * vector2[i](not complex)
 template <typename T> struct vector_mul_vector_op<T, base_device::DEVICE_GPU> {
   using Real = typename GetTypeReal<T>::type;
-  void operator()(const base_device::DEVICE_GPU *d, const int &dim, T *result,
+  void operator()(const int &dim, T *result,
                   const T *vector1, const Real *vector2);
 };
 
diff --git a/source/module_base/kernels/rocm/math_kernel_op.hip.cu b/source/module_base/kernels/rocm/math_kernel_op.hip.cu
@@ -475,8 +475,7 @@ void vector_div_constant_op<std::complex<double>, base_device::DEVICE_GPU>::oper
 }
 // vector operator: result[i] = vector1[i](not complex) * vector2[i](not complex)
 template <>
-void vector_mul_vector_op<double, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* d,
-                                                                       const int& dim,
+void vector_mul_vector_op<double, base_device::DEVICE_GPU>::operator()(const int& dim,
                                                                        double* result,
                                                                        const double* vector1,
                                                                        const double* vector2)
@@ -490,8 +489,7 @@ void vector_mul_vector_op<double, base_device::DEVICE_GPU>::operator()(const bas
 
 // vector operator: result[i] = vector1[i](complex) * vector2[i](not complex)
 template <typename FPTYPE>
-inline void vector_mul_vector_complex_wrapper(const base_device::DEVICE_GPU* d,
-                                              const int& dim,
+inline void vector_mul_vector_complex_wrapper(const int& dim,
                                               std::complex<FPTYPE>* result,
                                               const std::complex<FPTYPE>* vector1,
                                               const FPTYPE* vector2)
@@ -505,23 +503,21 @@ inline void vector_mul_vector_complex_wrapper(const base_device::DEVICE_GPU* d,
     hipCheckOnDebug();
 }
 template <>
-void vector_mul_vector_op<std::complex<float>, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* d,
-                                                                                    const int& dim,
+void vector_mul_vector_op<std::complex<float>, base_device::DEVICE_GPU>::operator()(const int& dim,
                                                                                     std::complex<float>* result,
                                                                                     const std::complex<float>* vector1,
                                                                                     const float* vector2)
 {
-    vector_mul_vector_complex_wrapper(d, dim, result, vector1, vector2);
+    vector_mul_vector_complex_wrapper(dim, result, vector1, vector2);
 }
 template <>
 void vector_mul_vector_op<std::complex<double>, base_device::DEVICE_GPU>::operator()(
-    const base_device::DEVICE_GPU* d,
     const int& dim,
     std::complex<double>* result,
     const std::complex<double>* vector1,
     const double* vector2)
 {
-    vector_mul_vector_complex_wrapper(d, dim, result, vector1, vector2);
+    vector_mul_vector_complex_wrapper(dim, result, vector1, vector2);
 }
 // vector operator: result[i] = vector1[i](complex) / vector2[i](not complex)
 template <>
diff --git a/source/module_base/kernels/test/math_kernel_test.cpp b/source/module_base/kernels/test/math_kernel_test.cpp
@@ -275,7 +275,7 @@ TEST_F(TestModuleHsolverMathKernel, vector_div_constant_op_cpu)
 TEST_F(TestModuleHsolverMathKernel, vector_mul_vector_op_cpu)
 {
     std::vector<std::complex<double>> output(input.size());
-    vector_mul_vector_op_cpu()(cpu_ctx, dim, output.data(), input.data(), input_double.data());
+    vector_mul_vector_op_cpu()(dim, output.data(), input.data(), input_double.data());
     for (int i = 0; i < input.size(); i++)
     {
         EXPECT_LT(fabs(output[i].imag() - output_vector_mul_vector_op[i].imag()), 1e-8);
@@ -428,7 +428,7 @@ TEST_F(TestModuleHsolverMathKernel, vector_mul_vector_op_gpu)
     synchronize_memory_op_double()(input_double_dev, input_double.data(), input.size());
 
     // run
-    vector_mul_vector_op_gpu()(gpu_ctx, dim, output_dev, input_dev, input_double_dev);
+    vector_mul_vector_op_gpu()(dim, output_dev, input_dev, input_double_dev);
 
     // syn the output data in GPU to CPU
     synchronize_memory_op_gpu()(output.data(), output_dev, output.size());
diff --git a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/meta_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/meta_pw.cpp
@@ -70,7 +70,7 @@ void Meta<OperatorPW<T, Device>>::act(
             wfcpw->recip_to_real(this->ctx, this->porter, this->porter, this->ik);
 
             if(this->vk_col != 0) {
-                vector_mul_vector_op()(this->ctx, this->vk_col, this->porter, this->porter, this->vk + current_spin * this->vk_col);
+                vector_mul_vector_op()(this->vk_col, this->porter, this->porter, this->vk + current_spin * this->vk_col);
             }
 
             wfcpw->real_to_recip(this->ctx, this->porter, this->porter, this->ik);
diff --git a/source/module_hsolver/diago_cg.cpp b/source/module_hsolver/diago_cg.cpp
@@ -342,7 +342,7 @@ void DiagoCG<T, Device>::calc_gamma_cg(const int& iter,
     // }
     // denghui replace this 20221106
     // TODO: use GPU precondition instead
-    ModuleBase::vector_mul_vector_op<T, Device>()(ctx_, this->n_basis_, g0.data<T>(), scg.data<T>(), prec.data<Real>());
+    ModuleBase::vector_mul_vector_op<T, Device>()(this->n_basis_, g0.data<T>(), scg.data<T>(), prec.data<Real>());
 
     // (3) Update gg_now!
     // gg_now = < g|P|scg > = < g|g0 >
diff --git a/source/module_hsolver/diago_dav_subspace.cpp b/source/module_hsolver/diago_dav_subspace.cpp
@@ -309,8 +309,7 @@ void Diago_DavSubspace<T, Device>::cal_grad(const HPsiFunc& hpsi_func,
         {
             syncmem_var_h2d_op()(e_temp_hd, e_temp_cpu.data(), nbase);
         }
-        ModuleBase::vector_mul_vector_op<T, Device>()(this->ctx,
-                                                      nbase,
+        ModuleBase::vector_mul_vector_op<T, Device>()(nbase,
                                                       vcc + m * this->nbase_x,
                                                       vcc + m * this->nbase_x,
                                                       e_temp_hd);
diff --git a/source/module_hsolver/diago_david.cpp b/source/module_hsolver/diago_david.cpp
@@ -416,8 +416,7 @@ void DiagoDavid<T, Device>::cal_grad(const HPsiFunc& hpsi_func,
             Real* e_temp_gpu = nullptr;
             resmem_var_op()(e_temp_gpu, nbase);
             syncmem_var_h2d_op()(e_temp_gpu, e_temp_cpu.data(), nbase);
-            ModuleBase::vector_mul_vector_op<T, Device>()(this->ctx,
-                                                          nbase,
+            ModuleBase::vector_mul_vector_op<T, Device>()(nbase,
                                                           vc_ev_vector + m * nbase,
                                                           vc_ev_vector + m * nbase,
                                                           e_temp_gpu);
@@ -426,8 +425,7 @@ void DiagoDavid<T, Device>::cal_grad(const HPsiFunc& hpsi_func,
         }
         else
         {
-            ModuleBase::vector_mul_vector_op<T, Device>()(this->ctx,
-                                                          nbase,
+            ModuleBase::vector_mul_vector_op<T, Device>()(nbase,
                                                           vc_ev_vector + m * nbase,
                                                           vc_ev_vector + m * nbase,
                                                           e_temp_cpu.data());
diff --git a/source/module_hsolver/kernels/test/perf_math_kernel.cpp b/source/module_hsolver/kernels/test/perf_math_kernel.cpp
@@ -175,7 +175,7 @@ BENCHMARK_DEFINE_F(PerfModuleHsolverMathKernel, BM_vector_div_constant_op_cpu)(b
 
 BENCHMARK_DEFINE_F(PerfModuleHsolverMathKernel, BM_vector_mul_vector_op_cpu)(benchmark::State& state) {
     for (auto _ : state) {
-        vector_mul_vector_op_cpu()(cpu_ctx, dim_vector, result_zvector, test_zvector_a, test_dvector_a);
+        vector_mul_vector_op_cpu()(dim_vector, result_zvector, test_zvector_a, test_dvector_a);
     }
 }
 
@@ -244,7 +244,7 @@ BENCHMARK_DEFINE_F(PerfModuleHsolverMathKernel, BM_vector_div_constant_op_gpu)(b
 
 BENCHMARK_DEFINE_F(PerfModuleHsolverMathKernel, BM_vector_mul_vector_op_gpu)(benchmark::State& state) {
     for (auto _ : state) {
-        vector_mul_vector_op_gpu()(gpu_ctx, dim_vector, result_zvector_gpu, test_zvector_a_gpu, test_dvector_a_gpu);
+        vector_mul_vector_op_gpu()(dim_vector, result_zvector_gpu, test_zvector_a_gpu, test_dvector_a_gpu);
     }
 }
 
diff --git a/source/module_lr/operator_casida/operator_lr_diag.h b/source/module_lr/operator_casida/operator_lr_diag.h
@@ -46,8 +46,7 @@ namespace LR
             const bool is_first_node = false)const override
         {
             ModuleBase::TITLE("OperatorLRDiag", "act");
-            ModuleBase::vector_mul_vector_op<T, Device>()(this->ctx,
-                nk * pX.get_local_size(),   // local size of particle-hole basis
+            ModuleBase::vector_mul_vector_op<T, Device>()(nk * pX.get_local_size(),   // local size of particle-hole basis
                 hpsi,
                 psi_in,
                 this->eig_ks_diff.c);

Original file line number	Diff line number	Diff line change
`@@ -668,7 +668,7 @@ void vector_mul_vector(const int& dim, T* result, const T* vector1, const T* vec`
`668`	`668`	`}`
`669`	`669`	`else if (device_type == base_device::AbacusDevice_t::GpuDevice){`
`670`	`670`	`#ifdef __CUDA`
`671`		`- ModuleBase::vector_mul_vector_op<T, base_device::DEVICE_GPU>()(gpu_ctx, dim, result, vector1, vector2);`
	`671`	`+ ModuleBase::vector_mul_vector_op<T, base_device::DEVICE_GPU>()(dim, result, vector1, vector2);`
`672`	`672`	`#endif`
`673`	`673`	`}`
`674`	`674`	`}`
Original file line number	Diff line number	Diff line change
`@@ -167,7 +167,7 @@ template <typename T>`
`167`	`167`	`struct vector_mul_vector_op<T, base_device::DEVICE_CPU>`
`168`	`168`	`{`
`169`	`169`	`using Real = typename GetTypeReal<T>::type;`
`170`		`- void operator()(const base_device::DEVICE_CPU* d, const int& dim, T* result, const T* vector1, const Real* vector2)`
	`170`	`+ void operator()(const int& dim, T* result, const T* vector1, const Real* vector2)`
`171`	`171`	`{`
`172`	`172`	`#ifdef _OPENMP`
`173`	`173`	`#pragma omp parallel for schedule(static, 4096 / sizeof(Real))`
Original file line number	Diff line number	Diff line change
`@@ -70,7 +70,7 @@ void Meta<OperatorPW<T, Device>>::act(`
`70`	`70`	`wfcpw->recip_to_real(this->ctx, this->porter, this->porter, this->ik);`
`71`	`71`
`72`	`72`	`if(this->vk_col != 0) {`
`73`		`- vector_mul_vector_op()(this->ctx, this->vk_col, this->porter, this->porter, this->vk + current_spin * this->vk_col);`
	`73`	`+ vector_mul_vector_op()(this->vk_col, this->porter, this->porter, this->vk + current_spin * this->vk_col);`
`74`	`74`	`}`
`75`	`75`
`76`	`76`	`wfcpw->real_to_recip(this->ctx, this->porter, this->porter, this->ik);`
Original file line number	Diff line number	Diff line change
`@@ -309,8 +309,7 @@ void Diago_DavSubspace<T, Device>::cal_grad(const HPsiFunc& hpsi_func,`
`309`	`309`	`{`
`310`	`310`	`syncmem_var_h2d_op()(e_temp_hd, e_temp_cpu.data(), nbase);`
`311`	`311`	`}`
`312`		`- ModuleBase::vector_mul_vector_op<T, Device>()(this->ctx,`
`313`		`- nbase,`
	`312`	`+ ModuleBase::vector_mul_vector_op<T, Device>()(nbase,`
`314`	`313`	`vcc + m * this->nbase_x,`
`315`	`314`	`vcc + m * this->nbase_x,`
`316`	`315`	`e_temp_hd);`