revert convulution

A-006 · A-006 · commit 848a52a36354 · 2025-08-03T21:45:10.000+08:00
diff --git a/source/source_basis/module_pw/pw_basis_k.h b/source/source_basis/module_pw/pw_basis_k.h
@@ -135,18 +135,9 @@ class PW_Basis_K : public PW_Basis
                     const int ik,
                     const bool add = false,
                     const FPTYPE factor = 1.0) const; // in:(nz, ns)  ; out(nplane,nx*ny)
-    template <typename FPTYPE, typename Device>
-    void convolution(const Device* ctx,
-                      const int ik,
-                      const int size,
-                      const std::complex<FPTYPE>* input,
-                      const FPTYPE*               input1,
-                      std::complex<FPTYPE>*       output,
-                      const bool add = false,
-                      const FPTYPE factor =1.0) const ;
     #if defined(__DSP)
     template <typename FPTYPE, typename Device>
-    void convolution_dsp(const Device* ctx,
+    void convolution(const Device* ctx,
                       const int ik,
                       const int size,
                       const std::complex<FPTYPE>* input,
diff --git a/source/source_basis/module_pw/pw_transform_k.cpp b/source/source_basis/module_pw/pw_transform_k.cpp
@@ -337,79 +337,6 @@ void PW_Basis_K::recip_to_real(const base_device::DEVICE_CPU* /*dev*/,
         this->recip2real(in, out, ik, add, factor);
     #endif
 }
-template <>
-void PW_Basis_K::convolution(const base_device::DEVICE_CPU* ctx,
-                             const int ik,
-                             const int size,
-                             const std::complex<float>* input,
-                             const float* input1,
-                             std::complex<float>* output,
-                             const bool add,
-                             const float factor) const
-{
-}
-
-template <>
-void PW_Basis_K::convolution(const base_device::DEVICE_CPU* ctx,
-                             const int ik,
-                             const int size,
-                             const std::complex<double>* input,
-                             const double* input1,
-                             std::complex<double>* output,
-                             const bool add,
-                             const double factor) const
-{
-    ModuleBase::timer::tick(this->classname, "convolution");
-    assert(this->gamma_only == false);
-    // ModuleBase::GlobalFunc::ZEROS(fft_bundle.get_auxg_data<double>(), this->nst * this->nz);
-    // memset the auxr of 0 in the auxr,here the len of the auxr is nxyz
-    auto* auxg = this->fft_bundle.get_auxg_data<double>();
-    auto* auxr=this->fft_bundle.get_auxr_data<double>();
-
-    memset(auxg, 0, this->nst * this->nz * 2 * 8);
-    const int startig = ik * this->npwk_max;
-    const int npwk = this->npwk[ik];
-
-    // copy the mapping form the type of stick to the 3dfft
-    #ifdef _OPENMP
-    #pragma omp parallel for schedule(static, 4096 / sizeof(double))
-    #endif
-    for (int igl = 0; igl < npwk; ++igl)
-    {
-        auxg[this->igl2isz_k[igl + startig]] = input[igl];
-    }
-
-    // use 3d fft backward
-    this->fft_bundle.fftzbac(auxg, auxg);
-
-    this->gathers_scatterp(auxg, auxr);
-
-    this->fft_bundle.fftxybac(auxr, auxr);
-
-    #ifdef _OPENMP
-    #pragma omp parallel for simd schedule(static) aligned(auxr, input1: 64)
-    #endif
-    for (int ir = 0; ir < size; ir++)
-    {
-        auxr[ir] *= input1[ir];
-    }
-    // 3d fft
-    this->fft_bundle.fftxyfor(auxr, auxr);
-
-    this->gatherp_scatters(auxr, auxg);
-
-    this->fft_bundle.fftzfor(auxg, auxg);
-    // copy the result from the auxr to the out ,while consider the add
-    double tmpfac = factor / double(this->nxyz);
-#ifdef _OPENMP
-#pragma omp parallel for schedule(static, 4096 / sizeof(double))
-#endif
-    for (int igl = 0; igl < npwk; ++igl)
-    {
-        output[igl] += tmpfac * auxg[this->igl2isz_k[igl + startig]];
-    }
-    ModuleBase::timer::tick(this->classname, "convolution");
-}
 
 #if (defined(__CUDA) || defined(__ROCM))
 template <>
diff --git a/source/source_basis/module_pw/pw_transform_k_dsp.cpp b/source/source_basis/module_pw/pw_transform_k_dsp.cpp
@@ -91,7 +91,7 @@ void PW_Basis_K::recip2real_dsp(const std::complex<double>* in,
     }
 }
 template <>
-void PW_Basis_K::convolution_dsp(const base_device::DEVICE_CPU* ctx,
+void PW_Basis_K::convolution(const base_device::DEVICE_CPU* ctx,
                              const int ik,
                              const int size,
                              const std::complex<float>* input,
@@ -103,7 +103,7 @@ void PW_Basis_K::convolution_dsp(const base_device::DEVICE_CPU* ctx,
 }
 
 template <>
-void PW_Basis_K::convolution_dsp(const base_device::DEVICE_CPU* ctx,
+void PW_Basis_K::convolution(const base_device::DEVICE_CPU* ctx,
                              const int ik,
                              const int size,
                              const std::complex<double>* input,
diff --git a/source/source_pw/module_pwdft/operator_pw/veff_pw.cpp b/source/source_pw/module_pwdft/operator_pw/veff_pw.cpp
@@ -61,7 +61,7 @@ void Veff<OperatorPW<T, Device>>::act(
         ModulePW::FFT_Guard guard(wfcpw->fft_bundle);
         for (int ib = 0; ib < nbands; ib += npol)
         {
-            wfcpw->convolution_dsp(this->ctx,
+            wfcpw->convolution(this->ctx,
                                this->ik,
                                this->veff_col,
                                tmpsi_in,
@@ -96,19 +96,12 @@ void Veff<OperatorPW<T, Device>>::act(
     {
         for (int ib = 0; ib < nbands; ib += npol)
         {
-            wfcpw->convolution(this->ctx,
-                    this->ik,
-                    this->veff_col,
-                    tmpsi_in,
-                    this->veff + current_spin * this->veff_col,
-                    tmhpsi,
-                    true);
-            // wfcpw->recip_to_real<T, Device>(tmpsi_in, this->porter, this->ik);
-            // // NOTICE: when MPI threads are larger than the number of Z grids
-            // // veff would contain nothing, and nothing should be done in real space
-            // // but the 3DFFT can not be skipped, it will cause hanging
-            // veff_op()(this->ctx, this->veff_col, this->porter, this->veff + current_spin * this->veff_col);
-            // wfcpw->real_to_recip<T, Device>(this->porter, tmhpsi, this->ik, true);
+            wfcpw->recip_to_real<T, Device>(tmpsi_in, this->porter, this->ik);
+            // NOTICE: when MPI threads are larger than the number of Z grids
+            // veff would contain nothing, and nothing should be done in real space
+            // but the 3DFFT can not be skipped, it will cause hanging
+            veff_op()(this->ctx, this->veff_col, this->porter, this->veff + current_spin * this->veff_col);
+            wfcpw->real_to_recip<T, Device>(this->porter, tmhpsi, this->ik, true);
             tmhpsi   += psi_offset;
             tmpsi_in += psi_offset;
         }

Original file line number	Diff line number	Diff line change
`@@ -91,7 +91,7 @@ void PW_Basis_K::recip2real_dsp(const std::complex<double>* in,`
`91`	`91`	`}`
`92`	`92`	`}`
`93`	`93`	`template <>`
`94`		`-void PW_Basis_K::convolution_dsp(const base_device::DEVICE_CPU* ctx,`
	`94`	`+void PW_Basis_K::convolution(const base_device::DEVICE_CPU* ctx,`
`95`	`95`	`const int ik,`
`96`	`96`	`const int size,`
`97`	`97`	`const std::complex<float>* input,`
`@@ -103,7 +103,7 @@ void PW_Basis_K::convolution_dsp(const base_device::DEVICE_CPU* ctx,`
`103`	`103`	`}`
`104`	`104`
`105`	`105`	`template <>`
`106`		`-void PW_Basis_K::convolution_dsp(const base_device::DEVICE_CPU* ctx,`
	`106`	`+void PW_Basis_K::convolution(const base_device::DEVICE_CPU* ctx,`
`107`	`107`	`const int ik,`
`108`	`108`	`const int size,`
`109`	`109`	`const std::complex<double>* input,`