Remove ctx in gemm_op

Critsium-xy · Critsium-xy · commit 52fc451d40ee · 2025-02-12T13:20:06.000+08:00
diff --git a/source/module_base/kernels/cuda/math_kernel_op.cu b/source/module_base/kernels/cuda/math_kernel_op.cu
@@ -793,8 +793,7 @@ void scal_op<double, base_device::DEVICE_GPU>::operator()(const int& N,
 }
 
 template <>
-void gemm_op<float, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* d,
-                                                         const char& transa,
+void gemm_op<float, base_device::DEVICE_GPU>::operator()(const char& transa,
                                                          const char& transb,
                                                          const int& m,
                                                          const int& n,
@@ -814,8 +813,7 @@ void gemm_op<float, base_device::DEVICE_GPU>::operator()(const base_device::DEVI
 }
 
 template <>
-void gemm_op<double, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* d,
-                                                          const char& transa,
+void gemm_op<double, base_device::DEVICE_GPU>::operator()(const char& transa,
                                                           const char& transb,
                                                           const int& m,
                                                           const int& n,
@@ -834,8 +832,7 @@ void gemm_op<double, base_device::DEVICE_GPU>::operator()(const base_device::DEV
     cublasErrcheck(cublasDgemm(cublas_handle, cutransA, cutransB, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc));
 }
 template <>
-void gemm_op<std::complex<float>, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* d,
-                                                                       const char& transa,
+void gemm_op<std::complex<float>, base_device::DEVICE_GPU>::operator()(const char& transa,
                                                                        const char& transb,
                                                                        const int& m,
                                                                        const int& n,
@@ -855,8 +852,7 @@ void gemm_op<std::complex<float>, base_device::DEVICE_GPU>::operator()(const bas
 }
 
 template <>
-void gemm_op<std::complex<double>, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* d,
-                                                                        const char& transa,
+void gemm_op<std::complex<double>, base_device::DEVICE_GPU>::operator()(const char& transa,
                                                                         const char& transb,
                                                                         const int& m,
                                                                         const int& n,
diff --git a/source/module_base/kernels/math_kernel_op.cpp b/source/module_base/kernels/math_kernel_op.cpp
@@ -264,8 +264,7 @@ struct axpy_op<T, base_device::DEVICE_CPU>
 template <typename T>
 struct gemm_op<T, base_device::DEVICE_CPU>
 {
-    void operator()(const base_device::DEVICE_CPU* /*ctx*/,
-                    const char& transa,
+    void operator()(const char& transa,
                     const char& transb,
                     const int& m,
                     const int& n,
@@ -287,8 +286,7 @@ struct gemm_op<T, base_device::DEVICE_CPU>
 template <typename T>
 struct gemm_op_mt<T, base_device::DEVICE_CPU>
 {
-    void operator()(const base_device::DEVICE_CPU* /*ctx*/,
-                    const char& transa,
+    void operator()(const char& transa,
                     const char& transb,
                     const int& m,
                     const int& n,
diff --git a/source/module_base/kernels/math_kernel_op.h b/source/module_base/kernels/math_kernel_op.h
@@ -233,7 +233,6 @@ template <typename T, typename Device> struct gemm_op {
   /// @brief C = alpha * op(A) * op(B) + beta * C
   ///
   /// Input Parameters
-  /// \param d : the type of computing device
   /// \param transa : whether to transpose matrix A
   /// \param transb : whether to transpose matrix B
   /// \param m : first dimension of matrix mulplication
@@ -250,7 +249,7 @@ template <typename T, typename Device> struct gemm_op {
   ///
   /// Output Parameters
   /// \param c : output matrix C
-  void operator()(const Device *d, const char &transa, const char &transb,
+  void operator()(const char &transa, const char &transb,
                   const int &m, const int &n, const int &k, const T *alpha,
                   const T *a, const int &lda, const T *b, const int &ldb,
                   const T *beta, T *c, const int &ldc);
@@ -262,7 +261,6 @@ template <typename T, typename Device> struct gemm_op_mt {
   /// @brief C = alpha * op(A) * op(B) + beta * C
   ///
   /// Input Parameters
-  /// \param d : the type of computing device
   /// \param transa : whether to transpose matrix A
   /// \param transb : whether to transpose matrix B
   /// \param m : first dimension of matrix mulplication
@@ -279,7 +277,7 @@ template <typename T, typename Device> struct gemm_op_mt {
   ///
   /// Output Parameters
   /// \param c : output matrix C
-  void operator()(const Device *d, const char &transa, const char &transb,
+  void operator()(const char &transa, const char &transb,
                   const int &m, const int &n, const int &k, const T *alpha,
                   const T *a, const int &lda, const T *b, const int &ldb,
                   const T *beta, T *c, const int &ldc);
diff --git a/source/module_base/kernels/rocm/math_kernel_op.hip.cu b/source/module_base/kernels/rocm/math_kernel_op.hip.cu
@@ -711,8 +711,7 @@ void scal_op<double, base_device::DEVICE_GPU>::operator()(const int& N,
 }
 
 template <>
-void gemm_op<float, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* d,
-                                                         const char& transa,
+void gemm_op<float, base_device::DEVICE_GPU>::operator()(const char& transa,
                                                          const char& transb,
                                                          const int& m,
                                                          const int& n,
@@ -732,8 +731,7 @@ void gemm_op<float, base_device::DEVICE_GPU>::operator()(const base_device::DEVI
 }
 
 template <>
-void gemm_op<double, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* d,
-                                                          const char& transa,
+void gemm_op<double, base_device::DEVICE_GPU>::operator()(const char& transa,
                                                           const char& transb,
                                                           const int& m,
                                                           const int& n,
@@ -753,8 +751,7 @@ void gemm_op<double, base_device::DEVICE_GPU>::operator()(const base_device::DEV
 }
 
 template <>
-void gemm_op<std::complex<float>, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* d,
-                                                                       const char& transa,
+void gemm_op<std::complex<float>, base_device::DEVICE_GPU>::operator()(const char& transa,
                                                                        const char& transb,
                                                                        const int& m,
                                                                        const int& n,
@@ -774,8 +771,7 @@ void gemm_op<std::complex<float>, base_device::DEVICE_GPU>::operator()(const bas
 }
 
 template <>
-void gemm_op<std::complex<double>, base_device::DEVICE_GPU>::operator()(const base_device::DEVICE_GPU* d,
-                                                                        const char& transa,
+void gemm_op<std::complex<double>, base_device::DEVICE_GPU>::operator()(const char& transa,
                                                                         const char& transb,
                                                                         const int& m,
                                                                         const int& n,
diff --git a/source/module_base/para_gemm.cpp b/source/module_base/para_gemm.cpp
@@ -137,7 +137,7 @@ void PGemmCN<T, Device>::multiply_single(const T alpha, const T* A, const T* B,
 #else
     T real_beta = beta;
 #endif
-    ModuleBase::gemm_op<T, Device>()(ctx, 'C', 'N', ncolA, ncolB, nrow, &alpha, A, LDA, B, LDB, &real_beta, C, LDC);
+    ModuleBase::gemm_op<T, Device>()('C', 'N', ncolA, ncolB, nrow, &alpha, A, LDA, B, LDB, &real_beta, C, LDC);
 #ifdef __MPI
     if (this->row_nproc > 1)
     {
@@ -201,8 +201,7 @@ void PGemmCN<T, Device>::multiply_col(const T alpha, const T* A, const T* B, con
         T* C_start = C_local + shift;
         if (col_rank == ip)
         {
-            ModuleBase::gemm_op<T, Device>()(ctx,
-                                             'C',
+            ModuleBase::gemm_op<T, Device>()('C',
                                              'N',
                                              ncolA,
                                              ncolB,
@@ -224,8 +223,7 @@ void PGemmCN<T, Device>::multiply_col(const T alpha, const T* A, const T* B, con
             MPI_Status status;
             Parallel_Common::recv_dev<T, Device>(Atmp_device, size, ip, 0, col_world, &status, B_tmp.data());
             MPI_Wait(&requests[ip], &status);
-            ModuleBase::gemm_op<T, Device>()(ctx,
-                                             'C',
+            ModuleBase::gemm_op<T, Device>()('C',
                                              'N',
                                              m,
                                              ncolB,
@@ -321,8 +319,7 @@ void PGemmCN<T, Device>::multiply_row(const T alpha, const T* A, const T* B, con
         T* C_start = C + shift;
         if (col_rank == ip)
         {
-            ModuleBase::gemm_op<T, Device>()(ctx,
-                                             'C',
+            ModuleBase::gemm_op<T, Device>()('C',
                                              'N',
                                              ncolA,
                                              ncolB,
@@ -344,8 +341,7 @@ void PGemmCN<T, Device>::multiply_row(const T alpha, const T* A, const T* B, con
             MPI_Status status;
             Parallel_Common::recv_dev<T, Device>(Btmp_device, size, ip, 0, col_world, &status, B_tmp.data());
             MPI_Wait(&requests[ip], &status);
-            ModuleBase::gemm_op<T, Device>()(ctx,
-                                             'C',
+            ModuleBase::gemm_op<T, Device>()('C',
                                              'N',
                                              ncolA,
                                              m,
diff --git a/source/module_base/test_parallel/test_para_gemm.cpp b/source/module_base/test_parallel/test_para_gemm.cpp
@@ -141,8 +141,7 @@ class PgemmTest : public ::testing::Test
             const base_device::DEVICE_CPU* ctx = {};
             char transC = 'C';
             char transN = 'N';
-            ModuleBase::gemm_op<T, base_device::DEVICE_CPU>()(ctx,
-                                                              transC,
+            ModuleBase::gemm_op<T, base_device::DEVICE_CPU>()(transC,
                                                               transN,
                                                               ncolA_global,
                                                               ncolB_global,
diff --git a/source/module_elecstate/elecstate_pw.cpp b/source/module_elecstate/elecstate_pw.cpp
@@ -316,8 +316,7 @@ void ElecStatePW<T, Device>::cal_becsum(const psi::Psi<T, Device>& psi)
         }
         else
         {
-            gemm_op()(this->ctx,
-                      transa,
+            gemm_op()(transa,
                       transb,
                       this->ppcell->nkb,
                       nbands,
@@ -367,8 +366,7 @@ void ElecStatePW<T, Device>::cal_becsum(const psi::Psi<T, Device>& psi)
 
                         char transa = 'C';
                         char transb = 'N';
-                        gemm_op()(this->ctx,
-                                  transa,
+                        gemm_op()(transa,
                                   transb,
                                   atom->ncpp.nh,
                                   atom->ncpp.nh,
@@ -517,8 +515,7 @@ void ElecStatePW<T, Device>::addusdens_g(const Real* becsum, T** rhog)
                 // sum over atoms
                 char transa = 'N';
                 char transb = 'T';
-                gemm_op()(this->ctx,
-                          transa,
+                gemm_op()(transa,
                           transb,
                           npw,
                           nij,
diff --git a/source/module_hamilt_lcao/module_deltaspin/cal_mw_from_lambda.cpp b/source/module_hamilt_lcao/module_deltaspin/cal_mw_from_lambda.cpp
@@ -85,7 +85,6 @@ void spinconstrain::SpinConstrain<std::complex<double>>::calculate_delta_hcc(std
 #if ((defined __CUDA) || (defined __ROCM))
         base_device::DEVICE_GPU* ctx = {};
         ModuleBase::gemm_op<std::complex<double>, base_device::DEVICE_GPU>()(
-            ctx,
             transa,
             transb,
             nbands,
@@ -109,7 +108,6 @@ void spinconstrain::SpinConstrain<std::complex<double>>::calculate_delta_hcc(std
     {
         base_device::DEVICE_CPU* ctx = {};
         ModuleBase::gemm_op<std::complex<double>, base_device::DEVICE_CPU>()(
-            ctx,
             transa,
             transb,
             nbands,
diff --git a/source/module_hamilt_pw/hamilt_pwdft/fs_nonlocal_tools.cpp b/source/module_hamilt_pw/hamilt_pwdft/fs_nonlocal_tools.cpp
@@ -269,8 +269,7 @@ void FS_Nonlocal_tools<FPTYPE, Device>::cal_becp(const int& ik,
     const char transb = 'N';
     const int npm_npol = npm * npol;
     const int index0 = nbd0 * npol * nkb;
-    gemm_op()(this->ctx,
-              transa,
+    gemm_op()(transa,
               transb,
               this->nkb,
               npm_npol,
@@ -433,8 +432,7 @@ void FS_Nonlocal_tools<FPTYPE, Device>::cal_dbecp_s(const int& ik,
     // 2.b calculate dbecp = dbecp_noevc * psi
     const char transa = 'C';
     const char transb = 'N';
-    gemm_op()(this->ctx,
-              transa,
+    gemm_op()(transa,
               transb,
               this->nkb,
               npm_npol,
@@ -587,8 +585,7 @@ void FS_Nonlocal_tools<FPTYPE, Device>::cal_dbecp_f(const int& ik,
     // do gemm to get dbecp and revert the ppcell_vkb for next ipol
     const char transa = 'C';
     const char transb = 'N';
-    gemm_op()(this->ctx,
-              transa,
+    gemm_op()(transa,
               transb,
               this->nkb,
               npm_npol,
diff --git a/source/module_hamilt_pw/hamilt_pwdft/hamilt_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/hamilt_pw.cpp
@@ -274,8 +274,7 @@ void HamiltPW<T, Device>::sPsi(const T* psi_in, // psi
             }
             else
             {
-                gemm_op()(this->ctx,
-                          transa,
+                gemm_op()(transa,
                           transb,
                           this->ppcell->nkb,
                           nbands,
@@ -328,8 +327,7 @@ void HamiltPW<T, Device>::sPsi(const T* psi_in, // psi
                     for (int ia = 0; ia < atoms->na; ia++)
                     {
                         const int iat = ucell->itia2iat(it, ia);
-                        gemm_op()(this->ctx,
-                                  transa,
+                        gemm_op()(transa,
                                   transb,
                                   nh,
                                   nbands,
@@ -364,8 +362,7 @@ void HamiltPW<T, Device>::sPsi(const T* psi_in, // psi
             }
             else
             {
-                gemm_op()(this->ctx,
-                          transa,
+                gemm_op()(transa,
                           transb,
                           npw,
                           nbands,
diff --git a/source/module_hamilt_pw/hamilt_pwdft/onsite_proj_tools.cpp b/source/module_hamilt_pw/hamilt_pwdft/onsite_proj_tools.cpp
@@ -425,8 +425,7 @@ void Onsite_Proj_tools<FPTYPE, Device>::cal_becp(int ik,
     const char transa = 'C';
     const char transb = 'N';
     int npm_npol = npm * npol;
-    gemm_op()(this->ctx,
-              transa,
+    gemm_op()(transa,
               transb,
               this->nkb,
               npm_npol, // nbands(occ)*npol
@@ -576,8 +575,7 @@ void Onsite_Proj_tools<FPTYPE, Device>::cal_dbecp_s(int ik, int npm, int ipol, i
     const char transa = 'C';
     const char transb = 'N';
 
-    gemm_op()(this->ctx,
-              transa,
+    gemm_op()(transa,
               transb,
               nkb,
               npm_npol,
@@ -655,8 +653,7 @@ void Onsite_Proj_tools<FPTYPE, Device>::cal_dbecp_f(int ik, int npm, int ipol)
     std::complex<FPTYPE>* dbecp_ptr = this->dbecp + ipol * size_becp; // [out]
     const char transa = 'C';
     const char transb = 'N';
-    gemm_op()(this->ctx,
-              transa,
+    gemm_op()(transa,
               transb,
               this->nkb,
               npm_npol,
diff --git a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/nonlocal_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/nonlocal_pw.cpp
@@ -188,7 +188,6 @@ void Nonlocal<OperatorPW<T, Device>>::add_nonlocal_pp(T *hpsi_in, const T *becp,
         //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
         // denghui replace 2022-10-20
         gemm_op()(
-            this->ctx,
             transa,
             transb,
             this->npw,
@@ -263,7 +262,6 @@ void Nonlocal<OperatorPW<T, Device>>::act(
                 //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
                 // denghui replace 2022-10-20
                 gemm_op()(
-                    this->ctx,
                     transa,
                     transb,
                     nkb,
diff --git a/source/module_hamilt_pw/hamilt_pwdft/operator_pw/onsite_proj_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/operator_pw/onsite_proj_pw.cpp
@@ -85,7 +85,6 @@ void OnsiteProj<OperatorPW<T, Device>>::add_onsite_proj(T *hpsi_in, const int np
     char transb = 'T';
     int npm = m;
     gemm_op()(
-        this->ctx,
         transa,
         transb,
         npw,
diff --git a/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp b/source/module_hamilt_pw/hamilt_stodft/sto_iter.cpp
@@ -98,8 +98,7 @@ void Stochastic_Iter<T, Device>::orthog(const int& ik, psi::Psi<T, Device>& psi,
         else
         {
             // sum(b<NBANDS, a<nchi) = < psi_b | chi_a >
-            ModuleBase::gemm_op<T, Device>()(ctx,
-                                             'C',
+            ModuleBase::gemm_op<T, Device>()('C',
                                              'N',
                                              nbands,
                                              nchipk,
@@ -115,8 +114,7 @@ void Stochastic_Iter<T, Device>::orthog(const int& ik, psi::Psi<T, Device>& psi,
             Parallel_Reduce::reduce_pool(sum, nbands * nchipk);
 
             // psi -= psi * sum
-            ModuleBase::gemm_op<T, Device>()(ctx,
-                                             'N',
+            ModuleBase::gemm_op<T, Device>()('N',
                                              'N',
                                              npw,
                                              nchipk,
@@ -428,7 +426,7 @@ void Stochastic_Iter<T, Device>::calPn(const int& ik, Stochastic_WF<T, Device>&
         const int N = norder;
         const Real kweight = this->pkv->wk[ik];
         
-        ModuleBase::gemm_op<Real, Device>()(this->ctx, trans, normal, N, N, M, &kweight, vec_all, LDA, vec_all, LDA, &one, spolyv, N);
+        ModuleBase::gemm_op<Real, Device>()(trans, normal, N, N, M, &kweight, vec_all, LDA, vec_all, LDA, &one, spolyv, N);
         // dgemm_(&trans, &normal, &N, &N, &M, &kweight, vec_all, &LDA, vec_all, &LDA, &one, spolyv, &N);
     }
     ModuleBase::timer::tick("Stochastic_Iter", "calPn");
diff --git a/source/module_hsolver/diago_dav_subspace.cpp b/source/module_hsolver/diago_dav_subspace.cpp
diff --git a/source/module_hsolver/diago_david.cpp b/source/module_hsolver/diago_david.cpp
diff --git a/source/module_hsolver/diago_iter_assist.cpp b/source/module_hsolver/diago_iter_assist.cpp
diff --git a/source/module_hsolver/para_linear_transform.cpp b/source/module_hsolver/para_linear_transform.cpp
diff --git a/source/module_hsolver/test/diago_bpcg_test.cpp b/source/module_hsolver/test/diago_bpcg_test.cpp
diff --git a/source/module_hsolver/test/test_para_linear_trans.cpp b/source/module_hsolver/test/test_para_linear_trans.cpp

Original file line number	Diff line number	Diff line change
`@@ -274,8 +274,7 @@ void HamiltPW<T, Device>::sPsi(const T* psi_in, // psi`
`274`	`274`	`}`
`275`	`275`	`else`
`276`	`276`	`{`
`277`		`- gemm_op()(this->ctx,`
`278`		`- transa,`
	`277`	`+ gemm_op()(transa,`
`279`	`278`	`transb,`
`280`	`279`	`this->ppcell->nkb,`
`281`	`280`	`nbands,`
`@@ -328,8 +327,7 @@ void HamiltPW<T, Device>::sPsi(const T* psi_in, // psi`
`328`	`327`	`for (int ia = 0; ia < atoms->na; ia++)`
`329`	`328`	`{`
`330`	`329`	`const int iat = ucell->itia2iat(it, ia);`
`331`		`- gemm_op()(this->ctx,`
`332`		`- transa,`
	`330`	`+ gemm_op()(transa,`
`333`	`331`	`transb,`
`334`	`332`	`nh,`
`335`	`333`	`nbands,`
`@@ -364,8 +362,7 @@ void HamiltPW<T, Device>::sPsi(const T* psi_in, // psi`
`364`	`362`	`}`
`365`	`363`	`else`
`366`	`364`	`{`
`367`		`- gemm_op()(this->ctx,`
`368`		`- transa,`
	`365`	`+ gemm_op()(transa,`
`369`	`366`	`transb,`
`370`	`367`	`npw,`
`371`	`368`	`nbands,`