diff --git a/source/Makefile.Objects b/source/Makefile.Objects index 9a5243f1e3..cdde558dc3 100644 --- a/source/Makefile.Objects +++ b/source/Makefile.Objects @@ -401,9 +401,7 @@ OBJS_PSI_INITIALIZER=psi_initializer.o\ psi_initializer_nao.o\ psi_initializer_nao_random.o\ -OBJS_PW=fft.o\ - fft_bundle.o\ - fft_base.o\ +OBJS_PW=fft_bundle.o\ fft_cpu.o\ pw_basis.o\ pw_basis_k.o\ diff --git a/source/module_basis/module_pw/CMakeLists.txt b/source/module_basis/module_pw/CMakeLists.txt index a95eca4917..549e41c93c 100644 --- a/source/module_basis/module_pw/CMakeLists.txt +++ b/source/module_basis/module_pw/CMakeLists.txt @@ -15,7 +15,6 @@ if (USE_ROCM) endif() list(APPEND objects - fft.cpp pw_basis.cpp pw_basis_k.cpp pw_basis_sup.cpp @@ -26,7 +25,6 @@ list(APPEND objects pw_init.cpp pw_transform.cpp pw_transform_k.cpp - module_fft/fft_base.cpp module_fft/fft_bundle.cpp module_fft/fft_cpu.cpp ${FFT_SRC} diff --git a/source/module_basis/module_pw/fft.cpp b/source/module_basis/module_pw/fft.cpp deleted file mode 100644 index fa94bd6442..0000000000 --- a/source/module_basis/module_pw/fft.cpp +++ /dev/null @@ -1,881 +0,0 @@ -#include "fft.h" - -#include "module_base/memory.h" -#include "module_base/tool_quit.h" -#include "module_hamilt_pw/hamilt_pwdft/global.h" - -namespace ModulePW -{ - -FFT::FFT() -{ -} - -FFT::~FFT() -{ - this->clear(); -} -void FFT::clear() -{ - this->cleanFFT(); - if (z_auxg != nullptr) - { - fftw_free(z_auxg); - z_auxg = nullptr; - } - if (z_auxr != nullptr) - { - fftw_free(z_auxr); - z_auxr = nullptr; - } - d_rspace = nullptr; -#if defined(__CUDA) || defined(__ROCM) - if (this->device == "gpu") - { - if (c_auxr_3d != nullptr) - { - delmem_cd_op()(gpu_ctx, c_auxr_3d); - c_auxr_3d = nullptr; - } - if (z_auxr_3d != nullptr) - { - delmem_zd_op()(gpu_ctx, z_auxr_3d); - z_auxr_3d = nullptr; - } - } -#endif // defined(__CUDA) || defined(__ROCM) -#if defined(__ENABLE_FLOAT_FFTW) - if (this->precision == "single") - { - this->cleanfFFT(); - if (c_auxg != nullptr) - { - fftw_free(c_auxg); - c_auxg = nullptr; - } - if (c_auxr != nullptr) - { - fftw_free(c_auxr); - c_auxr = nullptr; - } - s_rspace = nullptr; - } -#endif // defined(__ENABLE_FLOAT_FFTW) -} - -void FFT::initfft(int nx_in, int ny_in, int nz_in, int lixy_in, int rixy_in, int ns_in, int nplane_in, int nproc_in, - bool gamma_only_in, bool xprime_in, bool mpifft_in) -{ - this->gamma_only = gamma_only_in; - this->xprime = xprime_in; - this->fftnx = this->nx = nx_in; - this->fftny = this->ny = ny_in; - if (this->gamma_only) - { - if (xprime) { - this->fftnx = int(nx / 2) + 1; - } else { - this->fftny = int(ny / 2) + 1; -} - } - this->nz = nz_in; - this->ns = ns_in; - this->lixy = lixy_in; - this->rixy = rixy_in; - this->nplane = nplane_in; - this->nproc = nproc_in; - this->mpifft = mpifft_in; - this->nxy = this->nx * this->ny; - this->fftnxy = this->fftnx * this->fftny; - // this->maxgrids = (this->nz * this->ns > this->nxy * nplane) ? this->nz * this->ns : this->nxy * nplane; - const int nrxx = this->nxy * this->nplane; - const int nsz = this->nz * this->ns; - int maxgrids = (nsz > nrxx) ? nsz : nrxx; - if (!this->mpifft) - { - // z_auxg = (std::complex*)fftw_malloc(sizeof(fftw_complex) * maxgrids); - // z_auxr = (std::complex*)fftw_malloc(sizeof(fftw_complex) * maxgrids); - // ModuleBase::Memory::record("FFT::grid", 2 * sizeof(fftw_complex) * maxgrids); - // d_rspace = (double*)z_auxg; - // auxr_3d = static_cast *>( - // fftw_malloc(sizeof(fftw_complex) * (this->nx * this->ny * this->nz))); -#if defined(__CUDA) || defined(__ROCM) - if (this->device == "gpu") - { - resmem_cd_op()(gpu_ctx, this->c_auxr_3d, this->nx * this->ny * this->nz); - resmem_zd_op()(gpu_ctx, this->z_auxr_3d, this->nx * this->ny * this->nz); - } -#endif // defined(__CUDA) || defined(__ROCM) -// #if defined(__ENABLE_FLOAT_FFTW) -// if (this->precision == "single") -// { -// c_auxg = (std::complex*)fftw_malloc(sizeof(fftwf_complex) * maxgrids); -// c_auxr = (std::complex*)fftw_malloc(sizeof(fftwf_complex) * maxgrids); -// ModuleBase::Memory::record("FFT::grid_s", 2 * sizeof(fftwf_complex) * maxgrids); -// s_rspace = (float*)c_auxg; -// } -// #endif // defined(__ENABLE_FLOAT_FFTW) - } - else - { - } -} - -void FFT::setupFFT() -{ - unsigned int flag = FFTW_ESTIMATE; - switch (this->fft_mode) - { - case 0: - flag = FFTW_ESTIMATE; - break; - case 1: - flag = FFTW_MEASURE; - break; - case 2: - flag = FFTW_PATIENT; - break; - case 3: - flag = FFTW_EXHAUSTIVE; - break; - default: - break; - } - if (!this->mpifft) - { - this->initplan(flag); -#if defined(__ENABLE_FLOAT_FFTW) - if (this->precision == "single") - { - this->initplanf(flag); - } -#endif // defined(__ENABLE_FLOAT_FFTW) - } -#if defined(__FFTW3_MPI) && defined(__MPI) - else - { - // this->initplan_mpi(); - // if (this->precision == "single") { - // this->initplanf_mpi(); - // } - } -#endif - return; -} - -void FFT ::initplan(const unsigned int& flag) -{ - //--------------------------------------------------------- - // 1 D - Z - //--------------------------------------------------------- - - // fftw_plan_many_dft(int rank, const int *n, int howmany, - // fftw_complex *in, const int *inembed, int istride, int idist, - // fftw_complex *out, const int *onembed, int ostride, int odist, int sign, unsigned - //flags); - - this->planzfor = fftw_plan_many_dft(1, &this->nz, this->ns, (fftw_complex*)z_auxg, &this->nz, 1, this->nz, - (fftw_complex*)z_auxg, &this->nz, 1, this->nz, FFTW_FORWARD, flag); - - this->planzbac = fftw_plan_many_dft(1, &this->nz, this->ns, (fftw_complex*)z_auxg, &this->nz, 1, this->nz, - (fftw_complex*)z_auxg, &this->nz, 1, this->nz, FFTW_BACKWARD, flag); - - //--------------------------------------------------------- - // 2 D - XY - //--------------------------------------------------------- - // 1D+1D is much faster than 2D FFT! - // in-place fft is better for c2c and out-of-place fft is better for c2r - int* embed = nullptr; - int npy = this->nplane * this->ny; - if (this->xprime) - { - this->planyfor = fftw_plan_many_dft(1, &this->ny, this->nplane, (fftw_complex*)z_auxr, embed, nplane, 1, - (fftw_complex*)z_auxr, embed, nplane, 1, FFTW_FORWARD, flag); - this->planybac = fftw_plan_many_dft(1, &this->ny, this->nplane, (fftw_complex*)z_auxr, embed, nplane, 1, - (fftw_complex*)z_auxr, embed, nplane, 1, FFTW_BACKWARD, flag); - if (this->gamma_only) - { - this->planxr2c = fftw_plan_many_dft_r2c(1, &this->nx, npy, d_rspace, embed, npy, 1, (fftw_complex*)z_auxr, - embed, npy, 1, flag); - this->planxc2r = fftw_plan_many_dft_c2r(1, &this->nx, npy, (fftw_complex*)z_auxr, embed, npy, 1, d_rspace, - embed, npy, 1, flag); - } - else - { - this->planxfor1 = fftw_plan_many_dft(1, &this->nx, npy, (fftw_complex*)z_auxr, embed, npy, 1, - (fftw_complex*)z_auxr, embed, npy, 1, FFTW_FORWARD, flag); - this->planxbac1 = fftw_plan_many_dft(1, &this->nx, npy, (fftw_complex*)z_auxr, embed, npy, 1, - (fftw_complex*)z_auxr, embed, npy, 1, FFTW_BACKWARD, flag); - } - } - else - { - this->planxfor1 = fftw_plan_many_dft(1, &this->nx, this->nplane * (lixy + 1), (fftw_complex*)z_auxr, embed, npy, - 1, (fftw_complex*)z_auxr, embed, npy, 1, FFTW_FORWARD, flag); - this->planxbac1 = fftw_plan_many_dft(1, &this->nx, this->nplane * (lixy + 1), (fftw_complex*)z_auxr, embed, npy, - 1, (fftw_complex*)z_auxr, embed, npy, 1, FFTW_BACKWARD, flag); - if (this->gamma_only) - { - this->planyr2c = fftw_plan_many_dft_r2c(1, &this->ny, this->nplane, d_rspace, embed, this->nplane, 1, - (fftw_complex*)z_auxr, embed, this->nplane, 1, flag); - this->planyc2r = fftw_plan_many_dft_c2r(1, &this->ny, this->nplane, (fftw_complex*)z_auxr, embed, - this->nplane, 1, d_rspace, embed, this->nplane, 1, flag); - } - else - { - - this->planxfor2 = fftw_plan_many_dft(1, &this->nx, this->nplane * (ny - rixy), (fftw_complex*)z_auxr, embed, - npy, 1, (fftw_complex*)z_auxr, embed, npy, 1, FFTW_FORWARD, flag); - this->planxbac2 = fftw_plan_many_dft(1, &this->nx, this->nplane * (ny - rixy), (fftw_complex*)z_auxr, embed, - npy, 1, (fftw_complex*)z_auxr, embed, npy, 1, FFTW_BACKWARD, flag); - this->planyfor = fftw_plan_many_dft(1, &this->ny, this->nplane, (fftw_complex*)z_auxr, embed, this->nplane, - 1, (fftw_complex*)z_auxr, embed, this->nplane, 1, FFTW_FORWARD, flag); - this->planybac = fftw_plan_many_dft(1, &this->ny, this->nplane, (fftw_complex*)z_auxr, embed, this->nplane, - 1, (fftw_complex*)z_auxr, embed, this->nplane, 1, FFTW_BACKWARD, flag); - } - } - - //--------------------------------------------------------- - // 3 D - XYZ - //--------------------------------------------------------- - // in-place fft test - // this->plan3dforward = fftw_plan_dft_3d( - // this->nx, this->ny, this->nz, - // reinterpret_cast(auxr_3d), - // reinterpret_cast(auxr_3d), - // FFTW_FORWARD, flag); - // this->plan3dbackward = fftw_plan_dft_3d( - // this->nx, this->ny, this->nz, - // reinterpret_cast(auxr_3d), - // reinterpret_cast(auxr_3d), - // FFTW_BACKWARD, flag); - -#if defined(__CUDA) || defined(__ROCM) - if (this->device == "gpu") - { -#if defined(__CUDA) - cufftPlan3d(&c_handle, this->nx, this->ny, this->nz, CUFFT_C2C); - cufftPlan3d(&z_handle, this->nx, this->ny, this->nz, CUFFT_Z2Z); -#elif defined(__ROCM) - hipfftPlan3d(&c_handle, this->nx, this->ny, this->nz, HIPFFT_C2C); - hipfftPlan3d(&z_handle, this->nx, this->ny, this->nz, HIPFFT_Z2Z); -#endif - } -#endif -} - -#if defined(__ENABLE_FLOAT_FFTW) -void FFT ::initplanf(const unsigned int& flag) -{ - //--------------------------------------------------------- - // 1 D - //--------------------------------------------------------- - - // fftw_plan_many_dft(int rank, const int *n, int howmany, - // fftw_complex *in, const int *inembed, int istride, int idist, - // fftw_complex *out, const int *onembed, int ostride, int odist, int sign, unsigned - //flags); - - this->planfzfor = fftwf_plan_many_dft(1, &this->nz, this->ns, (fftwf_complex*)c_auxg, &this->nz, 1, this->nz, - (fftwf_complex*)c_auxg, &this->nz, 1, this->nz, FFTW_FORWARD, flag); - - this->planfzbac = fftwf_plan_many_dft(1, &this->nz, this->ns, (fftwf_complex*)c_auxg, &this->nz, 1, this->nz, - (fftwf_complex*)c_auxg, &this->nz, 1, this->nz, FFTW_BACKWARD, flag); - //--------------------------------------------------------- - // 2 D - //--------------------------------------------------------- - - int* embed = nullptr; - int npy = this->nplane * this->ny; - if (this->xprime) - { - this->planfyfor = fftwf_plan_many_dft(1, &this->ny, this->nplane, (fftwf_complex*)c_auxr, embed, nplane, 1, - (fftwf_complex*)c_auxr, embed, nplane, 1, FFTW_FORWARD, flag); - this->planfybac = fftwf_plan_many_dft(1, &this->ny, this->nplane, (fftwf_complex*)c_auxr, embed, nplane, 1, - (fftwf_complex*)c_auxr, embed, nplane, 1, FFTW_BACKWARD, flag); - if (this->gamma_only) - { - this->planfxr2c = fftwf_plan_many_dft_r2c(1, &this->nx, npy, s_rspace, embed, npy, 1, - (fftwf_complex*)c_auxr, embed, npy, 1, flag); - this->planfxc2r = fftwf_plan_many_dft_c2r(1, &this->nx, npy, (fftwf_complex*)c_auxr, embed, npy, 1, - s_rspace, embed, npy, 1, flag); - } - else - { - this->planfxfor1 = fftwf_plan_many_dft(1, &this->nx, npy, (fftwf_complex*)c_auxr, embed, npy, 1, - (fftwf_complex*)c_auxr, embed, npy, 1, FFTW_FORWARD, flag); - this->planfxbac1 = fftwf_plan_many_dft(1, &this->nx, npy, (fftwf_complex*)c_auxr, embed, npy, 1, - (fftwf_complex*)c_auxr, embed, npy, 1, FFTW_BACKWARD, flag); - } - } - else - { - this->planfxfor1 = fftwf_plan_many_dft(1, &this->nx, this->nplane * (lixy + 1), (fftwf_complex*)c_auxr, embed, - npy, 1, (fftwf_complex*)c_auxr, embed, npy, 1, FFTW_FORWARD, flag); - this->planfxbac1 = fftwf_plan_many_dft(1, &this->nx, this->nplane * (lixy + 1), (fftwf_complex*)c_auxr, embed, - npy, 1, (fftwf_complex*)c_auxr, embed, npy, 1, FFTW_BACKWARD, flag); - if (this->gamma_only) - { - this->planfyr2c = fftwf_plan_many_dft_r2c(1, &this->ny, this->nplane, s_rspace, embed, this->nplane, 1, - (fftwf_complex*)c_auxr, embed, this->nplane, 1, flag); - this->planfyc2r = fftwf_plan_many_dft_c2r(1, &this->ny, this->nplane, (fftwf_complex*)c_auxr, embed, - this->nplane, 1, s_rspace, embed, this->nplane, 1, flag); - } - else - { - this->planfxfor2 - = fftwf_plan_many_dft(1, &this->nx, this->nplane * (this->ny - rixy), (fftwf_complex*)c_auxr, embed, - npy, 1, (fftwf_complex*)c_auxr, embed, npy, 1, FFTW_FORWARD, flag); - this->planfxbac2 - = fftwf_plan_many_dft(1, &this->nx, this->nplane * (this->ny - rixy), (fftwf_complex*)c_auxr, embed, - npy, 1, (fftwf_complex*)c_auxr, embed, npy, 1, FFTW_BACKWARD, flag); - this->planfyfor - = fftwf_plan_many_dft(1, &this->ny, this->nplane, (fftwf_complex*)c_auxr, embed, this->nplane, 1, - (fftwf_complex*)c_auxr, embed, this->nplane, 1, FFTW_FORWARD, flag); - this->planfybac - = fftwf_plan_many_dft(1, &this->ny, this->nplane, (fftwf_complex*)c_auxr, embed, this->nplane, 1, - (fftwf_complex*)c_auxr, embed, this->nplane, 1, FFTW_BACKWARD, flag); - } - } -} -#endif // defined(__ENABLE_FLOAT_FFTW) -// void FFT :: initplan_mpi() -// { - -// } - -// void FFT :: initplanf_mpi() -// { - -// } - -void FFT::cleanFFT() -{ - if (planzfor) - { - fftw_destroy_plan(planzfor); - planzfor = nullptr; - } - if (planzbac) - { - fftw_destroy_plan(planzbac); - planzbac = nullptr; - } - if (planxfor1) - { - fftw_destroy_plan(planxfor1); - planxfor1 = nullptr; - } - if (planxbac1) - { - fftw_destroy_plan(planxbac1); - planxbac1 = nullptr; - } - if (planxfor2) - { - fftw_destroy_plan(planxfor2); - planxfor2 = nullptr; - } - if (planxbac2) - { - fftw_destroy_plan(planxbac2); - planxbac2 = nullptr; - } - if (planyfor) - { - fftw_destroy_plan(planyfor); - planyfor = nullptr; - } - if (planybac) - { - fftw_destroy_plan(planybac); - planybac = nullptr; - } - if (planxr2c) - { - fftw_destroy_plan(planxr2c); - planxr2c = nullptr; - } - if (planxc2r) - { - fftw_destroy_plan(planxc2r); - planxc2r = nullptr; - } - if (planyr2c) - { - fftw_destroy_plan(planyr2c); - planyr2c = nullptr; - } - if (planyc2r) - { - fftw_destroy_plan(planyc2r); - planyc2r = nullptr; - } - - // fftw_destroy_plan(this->plan3dforward); - // fftw_destroy_plan(this->plan3dbackward); -#if defined(__CUDA) || defined(__ROCM) - if (this->device == "gpu") - { -#if defined(__CUDA) - if (c_handle) - { - cufftDestroy(c_handle); - c_handle = {}; - } - if (z_handle) - { - cufftDestroy(z_handle); - z_handle = {}; - } -#elif defined(__ROCM) - if (c_handle) - { - hipfftDestroy(c_handle); - c_handle = {}; - } - if (z_handle) - { - hipfftDestroy(z_handle); - z_handle = {}; - } -#endif - } -#endif -} - -#if defined(__ENABLE_FLOAT_FFTW) -void FFT::cleanfFFT() -{ - if (planfzfor) - { - fftwf_destroy_plan(planfzfor); - planfzfor = NULL; - } - if (planfzbac) - { - fftwf_destroy_plan(planfzbac); - planfzbac = NULL; - } - if (planfxfor1) - { - fftwf_destroy_plan(planfxfor1); - planfxfor1 = NULL; - } - if (planfxbac1) - { - fftwf_destroy_plan(planfxbac1); - planfxbac1 = NULL; - } - if (planfxfor2) - { - fftwf_destroy_plan(planfxfor2); - planfxfor2 = NULL; - } - if (planfxbac2) - { - fftwf_destroy_plan(planfxbac2); - planfxbac2 = NULL; - } - if (planfyfor) - { - fftwf_destroy_plan(planfyfor); - planfyfor = NULL; - } - if (planfybac) - { - fftwf_destroy_plan(planfybac); - planfybac = NULL; - } - if (planfxr2c) - { - fftwf_destroy_plan(planfxr2c); - planfxr2c = NULL; - } - if (planfxc2r) - { - fftwf_destroy_plan(planfxc2r); - planfxc2r = NULL; - } - if (planfyr2c) - { - fftwf_destroy_plan(planfyr2c); - planfyr2c = NULL; - } - if (planfyc2r) - { - fftwf_destroy_plan(planfyc2r); - planfyc2r = NULL; - } - return; -} -#endif // defined(__ENABLE_FLOAT_FFTW) - -template <> -void FFT::fftzfor(std::complex* in, std::complex* out) const -{ -#if defined(__ENABLE_FLOAT_FFTW) - fftwf_execute_dft(this->planfzfor, (fftwf_complex*)in, (fftwf_complex*)out); -#else - ModuleBase::WARNING_QUIT("fft", "Please compile ABACUS using the ENABLE_FLOAT_FFTW flag!"); -#endif // defined(__ENABLE_FLOAT_FFTW) -} - -template <> -void FFT::fftzfor(std::complex* in, std::complex* out) const -{ - fftw_execute_dft(this->planzfor, (fftw_complex*)in, (fftw_complex*)out); -} - -template <> -void FFT::fftzbac(std::complex* in, std::complex* out) const -{ -#if defined(__ENABLE_FLOAT_FFTW) - fftwf_execute_dft(this->planfzbac, (fftwf_complex*)in, (fftwf_complex*)out); -#else - ModuleBase::WARNING_QUIT("fft", "Please compile ABACUS using the ENABLE_FLOAT_FFTW flag!"); -#endif // defined(__ENABLE_FLOAT_FFTW) -} - -template <> -void FFT::fftzbac(std::complex* in, std::complex* out) const -{ - fftw_execute_dft(this->planzbac, (fftw_complex*)in, (fftw_complex*)out); -} - -template <> -void FFT::fftxyfor(std::complex* in, std::complex* out) const -{ -#if defined(__ENABLE_FLOAT_FFTW) - int npy = this->nplane * this->ny; - if (this->xprime) - { - fftwf_execute_dft(this->planfxfor1, (fftwf_complex*)in, (fftwf_complex*)out); - - for (int i = 0; i < this->lixy + 1; ++i) - { - fftwf_execute_dft(this->planfyfor, (fftwf_complex*)&in[i * npy], (fftwf_complex*)&out[i * npy]); - } - for (int i = rixy; i < this->nx; ++i) - { - fftwf_execute_dft(this->planfyfor, (fftwf_complex*)&in[i * npy], (fftwf_complex*)&out[i * npy]); - } - } - else - { - for (int i = 0; i < this->nx; ++i) - { - fftwf_execute_dft(this->planfyfor, (fftwf_complex*)&in[i * npy], (fftwf_complex*)&out[i * npy]); - } - - fftwf_execute_dft(this->planfxfor1, (fftwf_complex*)in, (fftwf_complex*)out); - fftwf_execute_dft(this->planfxfor2, (fftwf_complex*)&in[rixy * nplane], (fftwf_complex*)&out[rixy * nplane]); - } -#else - ModuleBase::WARNING_QUIT("fft", "Please compile ABACUS using the ENABLE_FLOAT_FFTW flag!"); -#endif // defined(__ENABLE_FLOAT_FFTW) -} - -template <> -void FFT::fftxyfor(std::complex* in, std::complex* out) const -{ - int npy = this->nplane * this->ny; - if (this->xprime) - { - fftw_execute_dft(this->planxfor1, (fftw_complex*)in, (fftw_complex*)out); - - for (int i = 0; i < this->lixy + 1; ++i) - { - fftw_execute_dft(this->planyfor, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]); - } - for (int i = rixy; i < this->nx; ++i) - { - fftw_execute_dft(this->planyfor, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]); - } - } - else - { - for (int i = 0; i < this->nx; ++i) - { - fftw_execute_dft(this->planyfor, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]); - } - - fftw_execute_dft(this->planxfor1, (fftw_complex*)in, (fftw_complex*)out); - fftw_execute_dft(this->planxfor2, (fftw_complex*)&in[rixy * nplane], (fftw_complex*)&out[rixy * nplane]); - } -} - -template <> -void FFT::fftxybac(std::complex* in, std::complex* out) const -{ -#if defined(__ENABLE_FLOAT_FFTW) - int npy = this->nplane * this->ny; - if (this->xprime) - { - for (int i = 0; i < this->lixy + 1; ++i) - { - fftwf_execute_dft(this->planfybac, (fftwf_complex*)&in[i * npy], (fftwf_complex*)&out[i * npy]); - } - for (int i = rixy; i < this->nx; ++i) - { - fftwf_execute_dft(this->planfybac, (fftwf_complex*)&in[i * npy], (fftwf_complex*)&out[i * npy]); - } - - fftwf_execute_dft(this->planfxbac1, (fftwf_complex*)in, (fftwf_complex*)out); - } - else - { - fftwf_execute_dft(this->planfxbac1, (fftwf_complex*)in, (fftwf_complex*)out); - fftwf_execute_dft(this->planfxbac2, (fftwf_complex*)&in[rixy * nplane], (fftwf_complex*)&out[rixy * nplane]); - - for (int i = 0; i < this->nx; ++i) - { - fftwf_execute_dft(this->planfybac, (fftwf_complex*)&in[i * npy], (fftwf_complex*)&out[i * npy]); - } - } -#else - ModuleBase::WARNING_QUIT("fft", "Please compile ABACUS using the ENABLE_FLOAT_FFTW flag!"); -#endif // defined(__ENABLE_FLOAT_FFTW) -} - -template <> -void FFT::fftxybac(std::complex* in, std::complex* out) const -{ - int npy = this->nplane * this->ny; - if (this->xprime) - { - for (int i = 0; i < this->lixy + 1; ++i) - { - fftw_execute_dft(this->planybac, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]); - } - for (int i = rixy; i < this->nx; ++i) - { - fftw_execute_dft(this->planybac, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]); - } - - fftw_execute_dft(this->planxbac1, (fftw_complex*)in, (fftw_complex*)out); - } - else - { - fftw_execute_dft(this->planxbac1, (fftw_complex*)in, (fftw_complex*)out); - fftw_execute_dft(this->planxbac2, (fftw_complex*)&in[rixy * nplane], (fftw_complex*)&out[rixy * nplane]); - - for (int i = 0; i < this->nx; ++i) - { - fftw_execute_dft(this->planybac, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]); - } - } -} - -template <> -void FFT::fftxyr2c(float* in, std::complex* out) const -{ -#if defined(__ENABLE_FLOAT_FFTW) - int npy = this->nplane * this->ny; - if (this->xprime) - { - fftwf_execute_dft_r2c(this->planfxr2c, in, (fftwf_complex*)out); - - for (int i = 0; i < this->lixy + 1; ++i) - { - fftwf_execute_dft(this->planfyfor, (fftwf_complex*)&out[i * npy], (fftwf_complex*)&out[i * npy]); - } - } - else - { - for (int i = 0; i < this->nx; ++i) - { - fftwf_execute_dft_r2c(this->planfyr2c, &in[i * npy], (fftwf_complex*)&out[i * npy]); - } - - fftwf_execute_dft(this->planfxfor1, (fftwf_complex*)out, (fftwf_complex*)out); - } -#else - ModuleBase::WARNING_QUIT("fft", "Please compile ABACUS using the ENABLE_FLOAT_FFTW flag!"); -#endif // defined(__ENABLE_FLOAT_FFTW) -} - -template <> -void FFT::fftxyr2c(double* in, std::complex* out) const -{ - int npy = this->nplane * this->ny; - if (this->xprime) - { - fftw_execute_dft_r2c(this->planxr2c, in, (fftw_complex*)out); - - for (int i = 0; i < this->lixy + 1; ++i) - { - fftw_execute_dft(this->planyfor, (fftw_complex*)&out[i * npy], (fftw_complex*)&out[i * npy]); - } - } - else - { - for (int i = 0; i < this->nx; ++i) - { - fftw_execute_dft_r2c(this->planyr2c, &in[i * npy], (fftw_complex*)&out[i * npy]); - } - - fftw_execute_dft(this->planxfor1, (fftw_complex*)out, (fftw_complex*)out); - } -} - -template <> -void FFT::fftxyc2r(std::complex* in, float* out) const -{ -#if defined(__ENABLE_FLOAT_FFTW) - int npy = this->nplane * this->ny; - if (this->xprime) - { - for (int i = 0; i < this->lixy + 1; ++i) - { - fftwf_execute_dft(this->planfybac, (fftwf_complex*)&in[i * npy], (fftwf_complex*)&in[i * npy]); - } - - fftwf_execute_dft_c2r(this->planfxc2r, (fftwf_complex*)in, out); - } - else - { - fftwf_execute_dft(this->planfxbac1, (fftwf_complex*)in, (fftwf_complex*)in); - - for (int i = 0; i < this->nx; ++i) - { - fftwf_execute_dft_c2r(this->planfyc2r, (fftwf_complex*)&in[i * npy], &out[i * npy]); - } - } -#else - ModuleBase::WARNING_QUIT("fft", "Please compile ABACUS using the ENABLE_FLOAT_FFTW flag!"); -#endif // defined(__ENABLE_FLOAT_FFTW) -} - -template <> -void FFT::fftxyc2r(std::complex* in, double* out) const -{ - int npy = this->nplane * this->ny; - if (this->xprime) - { - for (int i = 0; i < this->lixy + 1; ++i) - { - fftw_execute_dft(this->planybac, (fftw_complex*)&in[i * npy], (fftw_complex*)&in[i * npy]); - } - - fftw_execute_dft_c2r(this->planxc2r, (fftw_complex*)in, out); - } - else - { - fftw_execute_dft(this->planxbac1, (fftw_complex*)in, (fftw_complex*)in); - - for (int i = 0; i < this->nx; ++i) - { - fftw_execute_dft_c2r(this->planyc2r, (fftw_complex*)&in[i * npy], &out[i * npy]); - } - } -} - -#if defined(__CUDA) || defined(__ROCM) -template <> -void FFT::fft3D_forward(const base_device::DEVICE_GPU* /*ctx*/, std::complex* in, std::complex* out) const -{ -#if defined(__CUDA) - CHECK_CUFFT(cufftExecC2C(this->c_handle, reinterpret_cast(in), reinterpret_cast(out), - CUFFT_FORWARD)); -#elif defined(__ROCM) - CHECK_CUFFT(hipfftExecC2C(this->c_handle, reinterpret_cast(in), - reinterpret_cast(out), HIPFFT_FORWARD)); -#endif -} -template <> -void FFT::fft3D_forward(const base_device::DEVICE_GPU* /*ctx*/, std::complex* in, - std::complex* out) const -{ -#if defined(__CUDA) - CHECK_CUFFT(cufftExecZ2Z(this->z_handle, reinterpret_cast(in), - reinterpret_cast(out), CUFFT_FORWARD)); -#elif defined(__ROCM) - CHECK_CUFFT(hipfftExecZ2Z(this->z_handle, reinterpret_cast(in), - reinterpret_cast(out), HIPFFT_FORWARD)); -#endif -} - -template <> -void FFT::fft3D_backward(const base_device::DEVICE_GPU* /*ctx*/, std::complex* in, - std::complex* out) const -{ -#if defined(__CUDA) - CHECK_CUFFT(cufftExecC2C(this->c_handle, reinterpret_cast(in), reinterpret_cast(out), - CUFFT_INVERSE)); -#elif defined(__ROCM) - CHECK_CUFFT(hipfftExecC2C(this->c_handle, reinterpret_cast(in), - reinterpret_cast(out), HIPFFT_BACKWARD)); -#endif -} -template <> -void FFT::fft3D_backward(const base_device::DEVICE_GPU* /*ctx*/, std::complex* in, - std::complex* out) const -{ -#if defined(__CUDA) - CHECK_CUFFT(cufftExecZ2Z(this->z_handle, reinterpret_cast(in), - reinterpret_cast(out), CUFFT_INVERSE)); -#elif defined(__ROCM) - CHECK_CUFFT(hipfftExecZ2Z(this->z_handle, reinterpret_cast(in), - reinterpret_cast(out), HIPFFT_BACKWARD)); -#endif -} -#endif - -template <> -float* FFT::get_rspace_data() const -{ - return this->s_rspace; -} -template <> -double* FFT::get_rspace_data() const -{ - return this->d_rspace; -} - -template <> -std::complex* FFT::get_auxr_data() const -{ - return this->c_auxr; -} -template <> -std::complex* FFT::get_auxr_data() const -{ - return this->z_auxr; -} - -template <> -std::complex* FFT::get_auxg_data() const -{ - return this->c_auxg; -} -template <> -std::complex* FFT::get_auxg_data() const -{ - return this->z_auxg; -} - -#if defined(__CUDA) || defined(__ROCM) -template <> -std::complex* FFT::get_auxr_3d_data() const -{ - return this->c_auxr_3d; -} -template <> -std::complex* FFT::get_auxr_3d_data() const -{ - return this->z_auxr_3d; -} -#endif - -void FFT::set_device(std::string device_) -{ - this->device = std::move(device_); -} - -void FFT::set_precision(std::string precision_) -{ - this->precision = std::move(precision_); -} - -} // namespace ModulePW diff --git a/source/module_basis/module_pw/fft.h b/source/module_basis/module_pw/fft.h deleted file mode 100644 index 3581d01d18..0000000000 --- a/source/module_basis/module_pw/fft.h +++ /dev/null @@ -1,173 +0,0 @@ -#ifndef FFT_H -#define FFT_H - -#include -#include - -#include "fftw3.h" -#if defined(__FFTW3_MPI) && defined(__MPI) -#include -//#include "fftw3-mpi_mkl.h" -#endif - -#if defined(__CUDA) || defined(__UT_USE_CUDA) -#include "cufft.h" -#include "cuda_runtime.h" -#endif - -#if defined(__ROCM) || defined(__UT_USE_ROCM) -#include -#include -#endif - -//Temporary: we donot need psi. However some GPU ops are defined in psi, which should be moved into module_base or module_gpu -#include "module_psi/psi.h" -// #ifdef __ENABLE_FLOAT_FFTW -// #include "fftw3f.h" -// #if defined(__FFTW3_MPI) && defined(__MPI) -// #include "fftw3f-mpi.h" -// //#include "fftw3-mpi_mkl.h" -// #endif -// #endif - -namespace ModulePW -{ - -class FFT -{ -public: - - FFT(); - ~FFT(); - void clear(); //reset fft - - // init parameters of fft - void initfft(int nx_in, int ny_in, int nz_in, int lixy_in, int rixy_in, int ns_in, int nplane_in, - int nproc_in, bool gamma_only_in, bool xprime_in = true, bool mpifft_in = false); - - //init fftw_plans - void setupFFT(); - - //destroy fftw_plans - void cleanFFT(); - -#if defined(__ENABLE_FLOAT_FFTW) - void cleanfFFT(); -#endif // defined(__ENABLE_FLOAT_FFTW) - - template - void fftzfor(std::complex* in, std::complex* out) const; - template - void fftzbac(std::complex* in, std::complex* out) const; - template - void fftxyfor(std::complex* in, std::complex* out) const; - template - void fftxybac(std::complex* in, std::complex* out) const; - template - void fftxyr2c(FPTYPE* in, std::complex* out) const; - template - void fftxyc2r(std::complex* in, FPTYPE* out) const; - - template - void fft3D_forward(const Device* ctx, std::complex* in, std::complex* out) const; - template - void fft3D_backward(const Device* ctx, std::complex* in, std::complex* out) const; - - public: - //init fftw_plans - void initplan(const unsigned int& flag = 0); - // We have not support mpi fftw yet. - // void initplan_mpi(); - //init fftwf_plans -#if defined(__ENABLE_FLOAT_FFTW) - void initplanf(const unsigned int& flag = 0); -#endif // defined(__ENABLE_FLOAT_FFTW) - // void initplanf_mpi(); - -private: - int fftnx=0, fftny=0; - int fftnxy=0; - int ny=0, nx=0, nz=0; - int nxy=0; -public : - bool xprime = true; // true: when do recip2real, x-fft will be done last and when doing real2recip, x-fft will be done first; false: y-fft - // For gamma_only, true: we use half x; false: we use half y - int lixy=0,rixy=0;// lixy: the left edge of the pw ball in the y direction; rixy: the right edge of the pw ball in the x or y direction - int ns=0; //number of sticks - int nplane=0; //number of x-y planes - int nproc=1; // number of proc. - - template - FPTYPE* get_rspace_data() const; - template - std::complex* get_auxr_data() const; - template - std::complex* get_auxg_data() const; - template - std::complex* get_auxr_3d_data() const; - - int fft_mode = 0; ///< fftw mode 0: estimate, 1: measure, 2: patient, 3: exhaustive - - private: - bool gamma_only = false; - bool mpifft = false; // if use mpi fft, only used when define __FFTW3_MPI -//add by A.s 202406 considering that no all people are familiar with fftw3,some comments should be added. - fftw_plan planzfor = NULL;//create a special pointer pointing to the fftw_plan class as a plan for performing FFT - fftw_plan planzbac = NULL; - fftw_plan planxfor1 = NULL; - fftw_plan planxbac1 = NULL; - fftw_plan planxfor2 = NULL; - fftw_plan planxbac2 = NULL; - fftw_plan planyfor = NULL; - fftw_plan planybac = NULL; - fftw_plan planxr2c = NULL; - fftw_plan planxc2r = NULL; - fftw_plan planyr2c = NULL; - fftw_plan planyc2r = NULL; -// fftw_plan plan3dforward; -// fftw_plan plan3dbackward; - -#if defined(__CUDA) - cufftHandle c_handle = {}; - cufftHandle z_handle = {}; -#elif defined(__ROCM) - hipfftHandle c_handle = {}; - hipfftHandle z_handle = {}; -#endif - -#if defined(__ENABLE_FLOAT_FFTW) - fftwf_plan planfzfor = NULL; - fftwf_plan planfzbac = NULL; - fftwf_plan planfxfor1= NULL; - fftwf_plan planfxbac1= NULL; - fftwf_plan planfxfor2= NULL; - fftwf_plan planfxbac2= NULL; - fftwf_plan planfyfor = NULL; - fftwf_plan planfybac = NULL; - fftwf_plan planfxr2c = NULL; - fftwf_plan planfxc2r = NULL; - fftwf_plan planfyr2c = NULL; - fftwf_plan planfyc2r = NULL; -#endif // defined(__ENABLE_FLOAT_FFTW) - - mutable std::complex* c_auxr_3d = nullptr; // fft space - mutable std::complex* z_auxr_3d = nullptr; // fft space - - mutable std::complex*c_auxg = nullptr, *c_auxr = nullptr; // fft space, - mutable std::complex*z_auxg = nullptr, *z_auxr = nullptr; // fft space - - mutable float* s_rspace = nullptr; // real number space for r, [nplane * nx *ny] - mutable double* d_rspace = nullptr; // real number space for r, [nplane * nx *ny] - - std::string device = "cpu"; - std::string precision = "double"; - -public: - void set_device(std::string device_); - void set_precision(std::string precision_); - -}; -} - -#endif - diff --git a/source/module_basis/module_pw/module_fft/fft_base.cpp b/source/module_basis/module_pw/module_fft/fft_base.cpp deleted file mode 100644 index 4c91d4d7b4..0000000000 --- a/source/module_basis/module_pw/module_fft/fft_base.cpp +++ /dev/null @@ -1,8 +0,0 @@ -#include "fft_base.h" -namespace ModulePW -{ -template FFT_BASE::FFT_BASE(); -template FFT_BASE::FFT_BASE(); -template FFT_BASE::~FFT_BASE(); -template FFT_BASE::~FFT_BASE(); -} \ No newline at end of file diff --git a/source/module_basis/module_pw/module_fft/fft_base.h b/source/module_basis/module_pw/module_fft/fft_base.h index c1b105f1fd..b64b6f4e00 100644 --- a/source/module_basis/module_pw/module_fft/fft_base.h +++ b/source/module_basis/module_pw/module_fft/fft_base.h @@ -1,8 +1,7 @@ -#include -#include -#include "fftw3.h" #ifndef FFT_BASE_H #define FFT_BASE_H + +#include namespace ModulePW { template @@ -164,5 +163,10 @@ class FFT_BASE int ny=0; int nz=0; }; + +template FFT_BASE::FFT_BASE(); +template FFT_BASE::FFT_BASE(); +template FFT_BASE::~FFT_BASE(); +template FFT_BASE::~FFT_BASE(); } #endif // FFT_BASE_H diff --git a/source/module_basis/module_pw/module_fft/fft_bundle.cpp b/source/module_basis/module_pw/module_fft/fft_bundle.cpp index a7be7d988d..204bf8f81b 100644 --- a/source/module_basis/module_pw/module_fft/fft_bundle.cpp +++ b/source/module_basis/module_pw/module_fft/fft_bundle.cpp @@ -1,7 +1,8 @@ #include #include "fft_bundle.h" -#include "fft_cpu.h" + #include "module_base/module_device/device.h" +#include "module_base/module_device/memory_op.h" #if defined(__CUDA) #include "fft_cuda.h" #endif diff --git a/source/module_basis/module_pw/module_fft/fft_bundle.h b/source/module_basis/module_pw/module_fft/fft_bundle.h index 6da2419245..71ce5192f3 100644 --- a/source/module_basis/module_pw/module_fft/fft_bundle.h +++ b/source/module_basis/module_pw/module_fft/fft_bundle.h @@ -1,8 +1,9 @@ -#include "fft_base.h" -#include -// #include "module_psi/psi.h" #ifndef FFT_TEMP_H #define FFT_TEMP_H + +#include +#include "fft_base.h" +#include "fft_cpu.h" namespace ModulePW { class FFT_Bundle diff --git a/source/module_basis/module_pw/module_fft/fft_cpu.h b/source/module_basis/module_pw/module_fft/fft_cpu.h index 27c7e862a2..c0fe9992eb 100644 --- a/source/module_basis/module_pw/module_fft/fft_cpu.h +++ b/source/module_basis/module_pw/module_fft/fft_cpu.h @@ -1,12 +1,8 @@ -#include "fft_base.h" -#include "fftw3.h" - -// #ifdef __ENABLE_FLOAT_FFTW - -// #endif -// #endif #ifndef FFT_CPU_H #define FFT_CPU_H + +#include "fft_base.h" +#include "fftw3.h" namespace ModulePW { template @@ -33,7 +29,6 @@ class FFT_CPU : public FFT_BASE * @param gamma_only_in whether only gamma point is used. * @param xprime_in whether xprime is used. */ - __attribute__((weak)) void initfft(int nx_in, int ny_in, int nz_in, @@ -44,6 +39,7 @@ class FFT_CPU : public FFT_BASE int nproc_in, bool gamma_only_in, bool xprime_in = true) override; + __attribute__((weak)) void setupFFT() override; diff --git a/source/module_basis/module_pw/module_fft/fft_cpu_float.cpp b/source/module_basis/module_pw/module_fft/fft_cpu_float.cpp index c13d47f762..b3e8d7d572 100644 --- a/source/module_basis/module_pw/module_fft/fft_cpu_float.cpp +++ b/source/module_basis/module_pw/module_fft/fft_cpu_float.cpp @@ -267,11 +267,11 @@ void FFT_CPU::setupFFT() } template <> -void FFT_CPU::clearfft(fftw_plan& plan) +void FFT_CPU::clearfft(fftwf_plan& plan) { if (plan) { - fftw_destroy_plan(plan); + fftwf_destroy_plan(plan); plan = nullptr; } } @@ -279,18 +279,18 @@ void FFT_CPU::clearfft(fftw_plan& plan) template <> void FFT_CPU::cleanFFT() { - clearfft(planzfor); - clearfft(planzbac); - clearfft(planxfor1); - clearfft(planxbac1); - clearfft(planxfor2); - clearfft(planxbac2); - clearfft(planyfor); - clearfft(planybac); - clearfft(planxr2c); - clearfft(planxc2r); - clearfft(planyr2c); - clearfft(planyc2r); + clearfft(planfzfor); + clearfft(planfzbac); + clearfft(planfxfor1); + clearfft(planfxbac1); + clearfft(planfxfor2); + clearfft(planfxbac2); + clearfft(planfyfor); + clearfft(planfybac); + clearfft(planfxr2c); + clearfft(planfxc2r); + clearfft(planfyr2c); + clearfft(planfyc2r); } diff --git a/source/module_basis/module_pw/module_fft/fft_cuda.cpp b/source/module_basis/module_pw/module_fft/fft_cuda.cpp index f9fc5df74b..db93fb07fb 100644 --- a/source/module_basis/module_pw/module_fft/fft_cuda.cpp +++ b/source/module_basis/module_pw/module_fft/fft_cuda.cpp @@ -1,6 +1,7 @@ #include "fft_cuda.h" #include "module_base/module_device/memory_op.h" #include "module_hamilt_pw/hamilt_pwdft/global.h" + namespace ModulePW { template @@ -105,4 +106,9 @@ template <> std::complex* FFT_CUDA::get_auxr_3d_data() const {return this->c_auxr_3d;} template <> std::complex* FFT_CUDA::get_auxr_3d_data() const {return this->z_auxr_3d;} + +template FFT_CUDA::FFT_CUDA(); +template FFT_CUDA::~FFT_CUDA(); +template FFT_CUDA::FFT_CUDA(); +template FFT_CUDA::~FFT_CUDA(); }// namespace ModulePW \ No newline at end of file diff --git a/source/module_basis/module_pw/module_fft/fft_cuda.h b/source/module_basis/module_pw/module_fft/fft_cuda.h index 90192d24dc..4942ee33f2 100644 --- a/source/module_basis/module_pw/module_fft/fft_cuda.h +++ b/source/module_basis/module_pw/module_fft/fft_cuda.h @@ -1,9 +1,9 @@ +#ifndef FFT_CUDA_H +#define FFT_CUDA_H + #include "fft_base.h" #include "cufft.h" #include "cuda_runtime.h" - -#ifndef FFT_CUDA_H -#define FFT_CUDA_H namespace ModulePW { template @@ -62,9 +62,6 @@ class FFT_CUDA : public FFT_BASE std::complex* z_auxr_3d = nullptr; // fft space }; -template FFT_CUDA::FFT_CUDA(); -template FFT_CUDA::~FFT_CUDA(); -template FFT_CUDA::FFT_CUDA(); -template FFT_CUDA::~FFT_CUDA(); + } // namespace ModulePW #endif \ No newline at end of file diff --git a/source/module_basis/module_pw/module_fft/fft_rocm.h b/source/module_basis/module_pw/module_fft/fft_rocm.h index 2d2cbd0c21..10e7751da6 100644 --- a/source/module_basis/module_pw/module_fft/fft_rocm.h +++ b/source/module_basis/module_pw/module_fft/fft_rocm.h @@ -1,8 +1,10 @@ + +#ifndef FFT_ROCM_H +#define FFT_ROCM_H + #include "fft_base.h" #include #include -#ifndef FFT_ROCM_H -#define FFT_ROCM_H namespace ModulePW { template diff --git a/source/module_basis/module_pw/pw_basis.h b/source/module_basis/module_pw/pw_basis.h index 00aba50971..9bd48d270f 100644 --- a/source/module_basis/module_pw/pw_basis.h +++ b/source/module_basis/module_pw/pw_basis.h @@ -1,11 +1,11 @@ #ifndef PWBASIS_H #define PWBASIS_H +#include "module_base/module_device/memory_op.h" #include "module_base/matrix.h" #include "module_base/matrix3.h" #include "module_base/vector3.h" #include -#include "fft.h" #include "module_fft/fft_bundle.h" #include #ifdef __MPI diff --git a/source/module_basis/module_pw/pw_transform.cpp b/source/module_basis/module_pw/pw_transform.cpp index d8534c7f0a..8e458b2561 100644 --- a/source/module_basis/module_pw/pw_transform.cpp +++ b/source/module_basis/module_pw/pw_transform.cpp @@ -1,4 +1,3 @@ -#include "fft.h" #include "module_fft/fft_bundle.h" #include #include "pw_basis.h" diff --git a/source/module_basis/module_pw/test/Makefile b/source/module_basis/module_pw/test/Makefile index 884f0f74c0..df91138107 100644 --- a/source/module_basis/module_pw/test/Makefile +++ b/source/module_basis/module_pw/test/Makefile @@ -2,7 +2,7 @@ # Please set # e.g. make CXX=mpiicpc or make CXX=icpc #====================================================================== -CXX = mpiicpx +CXX = mpiicpc # mpiicpc: compile intel parallel version # icpc: compile intel sequential version # mpicxx: compile gnu parallel version @@ -94,7 +94,7 @@ endif ##========================== ## GTEST ##========================== -GTESTOPTS = -I${GTEST_DIR}/include -L${GTEST_DIR}/lib -lgtest -lpthread +GTESTOPTS = -I${GTEST_DIR}/include -L${GTEST_DIR}/lib -lgtest -lpthread -w @@ -106,6 +106,7 @@ VPATH=../../../module_base\ ../../../module_base/module_container/ATen/core\ ../../../module_base/module_container/ATen\ ../../../module_parameter\ +../module_fft\ ../\ MATH_OBJS0=matrix.o\ @@ -120,7 +121,6 @@ pw_transform.o\ pw_distributeg.o\ pw_distributeg_method1.o\ pw_distributeg_method2.o\ -fft.o\ pw_basis_k.o\ pw_basis_sup.o\ pw_transform_k.o\ @@ -128,9 +128,10 @@ memory.o\ memory_op.o\ depend_mock.o\ parameter.o\ -fft_base.o\ -fft_bundle.o\ fft_cpu.o\ +fft_cpu_float.o\ +fft_bundle.o\ + OTHER_OBJS0= diff --git a/source/module_basis/module_pw/test_serial/CMakeLists.txt b/source/module_basis/module_pw/test_serial/CMakeLists.txt index 028d5b3a0e..7e9356d022 100644 --- a/source/module_basis/module_pw/test_serial/CMakeLists.txt +++ b/source/module_basis/module_pw/test_serial/CMakeLists.txt @@ -9,8 +9,6 @@ remove_definitions(-D__DEEPKS) add_library( planewave_serial OBJECT - ../fft.cpp - ../module_fft/fft_base.cpp ../module_fft/fft_bundle.cpp ../module_fft/fft_cpu.cpp ../pw_basis.cpp diff --git a/source/module_basis/module_pw/test_serial/pw_basis_k_test.cpp b/source/module_basis/module_pw/test_serial/pw_basis_k_test.cpp index e5fac0ef4c..75f352f474 100644 --- a/source/module_basis/module_pw/test_serial/pw_basis_k_test.cpp +++ b/source/module_basis/module_pw/test_serial/pw_basis_k_test.cpp @@ -27,7 +27,6 @@ #define private public #include "../pw_basis_k.h" #include "../pw_basis.h" -#include "../fft.h" #undef private #undef protected diff --git a/source/module_basis/module_pw/test_serial/pw_basis_test.cpp b/source/module_basis/module_pw/test_serial/pw_basis_test.cpp index 89a84c43b3..eeff14b8f9 100644 --- a/source/module_basis/module_pw/test_serial/pw_basis_test.cpp +++ b/source/module_basis/module_pw/test_serial/pw_basis_test.cpp @@ -38,7 +38,6 @@ #define protected public #define private public #include "../pw_basis.h" -#include "../fft.h" #undef private #undef protected diff --git a/source/module_elecstate/test/charge_extra_test.cpp b/source/module_elecstate/test/charge_extra_test.cpp index 63478bf724..15673e2a5e 100644 --- a/source/module_elecstate/test/charge_extra_test.cpp +++ b/source/module_elecstate/test/charge_extra_test.cpp @@ -64,12 +64,6 @@ PW_Basis::PW_Basis() PW_Basis::~PW_Basis() { } -FFT::FFT() -{ -} -FFT::~FFT() -{ -} FFT_Bundle::~FFT_Bundle(){}; void PW_Basis::initgrids(const double lat0_in, const ModuleBase::Matrix3 latvec_in, const double gridecut) { diff --git a/source/module_elecstate/test/elecstate_base_test.cpp b/source/module_elecstate/test/elecstate_base_test.cpp index 6115b58a9b..c0da5a82ea 100644 --- a/source/module_elecstate/test/elecstate_base_test.cpp +++ b/source/module_elecstate/test/elecstate_base_test.cpp @@ -50,12 +50,6 @@ ModulePW::PW_Basis::~PW_Basis() ModulePW::PW_Basis_Sup::~PW_Basis_Sup() { } -ModulePW::FFT::FFT() -{ -} -ModulePW::FFT::~FFT() -{ -} ModulePW::FFT_Bundle::~FFT_Bundle(){}; void ModulePW::PW_Basis::initgrids(double, ModuleBase::Matrix3, double) { diff --git a/source/module_hamilt_general/module_xc/test/CMakeLists.txt b/source/module_hamilt_general/module_xc/test/CMakeLists.txt index b93e3a6ddb..0dda934ac6 100644 --- a/source/module_hamilt_general/module_xc/test/CMakeLists.txt +++ b/source/module_hamilt_general/module_xc/test/CMakeLists.txt @@ -43,7 +43,6 @@ AddTest( ../../../module_base/libm/branred.cpp ../../../module_base/libm/sincos.cpp ../../../module_base/blas_connector.cpp - ../../../module_basis/module_pw/module_fft/fft_base.cpp ../../../module_basis/module_pw/module_fft/fft_bundle.cpp ../../../module_basis/module_pw/module_fft/fft_cpu.cpp ${FFT_SRC} @@ -82,7 +81,6 @@ AddTest( ../../../module_base/timer.cpp ../../../module_base/libm/branred.cpp ../../../module_base/libm/sincos.cpp - ../../../module_basis/module_pw/module_fft/fft_base.cpp ../../../module_basis/module_pw/module_fft/fft_bundle.cpp ../../../module_basis/module_pw/module_fft/fft_cpu.cpp ${FFT_SRC} diff --git a/source/module_hamilt_general/module_xc/test/xc3_mock.h b/source/module_hamilt_general/module_xc/test/xc3_mock.h index 8ecccd932d..6f812f52a0 100644 --- a/source/module_hamilt_general/module_xc/test/xc3_mock.h +++ b/source/module_hamilt_general/module_xc/test/xc3_mock.h @@ -132,8 +132,6 @@ namespace ModulePW const double factor) const; #endif - FFT::FFT(){}; - FFT::~FFT(){}; void PW_Basis::initgrids(double, ModuleBase::Matrix3, double){}; void PW_Basis::distribute_r(){}; @@ -165,7 +163,7 @@ namespace ModuleBase namespace GlobalV { std::string BASIS_TYPE = ""; - bool CAL_STRESS = 0; + bool CAL_STRESS = false; int CAL_FORCE = 0; int NSPIN; int NPOL; diff --git a/source/module_hamilt_lcao/module_gint/kernels/cuda/gemm_selector.cuh b/source/module_hamilt_lcao/module_gint/kernels/cuda/gemm_selector.cuh index 380a16c842..f52b7a8643 100644 --- a/source/module_hamilt_lcao/module_gint/kernels/cuda/gemm_selector.cuh +++ b/source/module_hamilt_lcao/module_gint/kernels/cuda/gemm_selector.cuh @@ -2,7 +2,7 @@ #define GEMM_SELECTOR_H #include "module_cell/unitcell.h" - +#include "cuda_runtime.h" typedef std::functiongk2[igl]; } -FFT::FFT() {} - -FFT::~FFT() {} } // namespace ModulePW