deepmodeling
diff --git a/‎source/Makefile.Objects‎
Lines changed: 0 additions & 1 deletion b/‎source/Makefile.Objects‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎source/module_base/test/CMakeLists.txt‎
Lines changed: 5 additions & 0 deletions b/‎source/module_base/test/CMakeLists.txt‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎source/module_base/test/test_lebedev_laikov.cpp‎
Lines changed: 152 additions & 0 deletions b/‎source/module_base/test/test_lebedev_laikov.cpp‎
Lines changed: 152 additions & 0 deletions
diff --git a/‎source/module_basis/module_pw/CMakeLists.txt‎
Lines changed: 11 additions & 0 deletions b/‎source/module_basis/module_pw/CMakeLists.txt‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎source/module_basis/module_pw/module_fft/fft_base.h‎
Lines changed: 5 additions & 0 deletions b/‎source/module_basis/module_pw/module_fft/fft_base.h‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎source/module_basis/module_pw/module_fft/fft_bundle.cpp‎
Lines changed: 22 additions & 13 deletions b/‎source/module_basis/module_pw/module_fft/fft_bundle.cpp‎
Lines changed: 22 additions & 13 deletions
diff --git a/‎source/module_basis/module_pw/module_fft/fft_bundle.h‎
Lines changed: 1 addition & 1 deletion b/‎source/module_basis/module_pw/module_fft/fft_bundle.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎source/module_basis/module_pw/module_fft/fft_cpu_float.cpp‎
Lines changed: 1 addition & 1 deletion b/‎source/module_basis/module_pw/module_fft/fft_cpu_float.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎source/module_basis/module_pw/module_fft/fft_cuda.cpp‎
Lines changed: 108 additions & 0 deletions b/‎source/module_basis/module_pw/module_fft/fft_cuda.cpp‎
Lines changed: 108 additions & 0 deletions
@@ -253,7 +253,6 @@ OBJS_ESOLVER=esolver.o\
 OBJS_ESOLVER_LCAO=esolver_ks_lcao.o\
       esolver_ks_lcao_tddft.o\
       dpks_cal_e_delta_band.o\
-      dftu_cal_occup_m.o\
       set_matrix_grid.o\
       lcao_before_scf.o\
       lcao_gets.o\
 
@@ -230,6 +230,11 @@ AddTest(
   SOURCES formatter_test.cpp
 )
 
+AddTest(
+  TARGET lebedev_laikov
+  SOURCES test_lebedev_laikov.cpp ../ylm.cpp ../math_lebedev_laikov.cpp
+)
+
 if(ENABLE_GOOGLEBENCH)
   AddTest(
     TARGET perf_sphbes
 
@@ -0,0 +1,152 @@
+#include "module_base/math_lebedev_laikov.h"
+#include "module_base/ylm.h"
+
+#include "gtest/gtest.h"
+#include <random>
+#ifdef __MPI
+#include <mpi.h>
+#endif
+
+using ModuleBase::Lebedev_laikov_grid;
+
+// mock the function to prevent unnecessary dependency
+namespace ModuleBase {
+void WARNING_QUIT(const std::string&, const std::string&) {}
+}
+
+class LebedevLaikovTest: public ::testing::Test {
+protected:
+    void randgen(int lmax, std::vector<double>& coef);
+    const double tol = 1e-12;
+};
+
+
+void LebedevLaikovTest::randgen(int lmax, std::vector<double>& coef) {
+    coef.resize((lmax + 1) * (lmax + 1));
+
+    // fill coef with uniformly distributed random numbers
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_real_distribution<double> dis(0.0, 1.0);
+    for (size_t i = 0; i < coef.size(); ++i) {
+        coef[i] = dis(gen);
+    }
+
+    // normalize the coefficients
+    double fac = 0.0;
+    for (size_t i = 0; i < coef.size(); ++i) {
+        fac += coef[i] * coef[i];
+    }
+
+    fac = 1.0 / std::sqrt(fac);
+    for (size_t i = 0; i < coef.size(); ++i) {
+        coef[i] *= fac;
+    }
+}
+
+
+TEST_F(LebedevLaikovTest, Accuracy) {
+    /* 
+     * Given
+     *
+     *      f = c[0]*Y00 + c[1]*Y10 + c[2]*Y11 + ...,
+     *
+     * where c[0], c[1], c[2], ... are some random numbers, the integration
+     * of |f|^2 on the unit sphere
+     *
+     *      \int |f|^2 d\Omega = c[0]^2 + c[1]^2 + c[2]^2 + ... .
+     *
+     * This test verifies with the above integral that quadrature with
+     * Lebedev grid is exact up to floating point errors.
+     *
+     */
+
+    // (ngrid, lmax)
+    std::set<std::pair<int, int>> supported = {
+        {6, 3},
+        {14, 5},
+        {26, 7},
+        {38, 9},
+        {50, 11},
+        {74, 13},
+        {86, 15},
+        {110, 17},
+        {146, 19},
+        {170, 21},
+        {194, 23},
+        {230, 25},
+        {266, 27},
+        {302, 29},
+        {350, 31},
+        {434, 35},
+        {590, 41},
+        {770, 47},
+        {974, 53},
+        {1202, 59},
+        {1454, 65},
+        {1730, 71},
+        {2030, 77},
+        {2354, 83},
+        {2702, 89},
+        {3074, 95},
+        {3470, 101},
+        {3890, 107},
+        {4334, 113},
+        {4802, 119},
+        {5294, 125},
+        {5810, 131},
+    };
+
+    std::vector<double> coef;
+
+    for (auto& grid_info: supported) {
+        int ngrid = grid_info.first;
+        int grid_lmax = grid_info.second;
+
+        Lebedev_laikov_grid lebgrid(ngrid);
+        lebgrid.generate_grid_points();
+        
+        const double* weight = lebgrid.get_weight();
+        const ModuleBase::Vector3<double>* grid = lebgrid.get_grid_coor();
+
+        int func_lmax = grid_lmax / 2;
+        randgen(func_lmax, coef);
+
+        double val = 0.0;
+        std::vector<double> ylm_real;
+        for (int i = 0; i < ngrid; i++) {
+            ModuleBase::Ylm::sph_harm(func_lmax,
+                    grid[i].x, grid[i].y, grid[i].z, ylm_real);
+            double tmp = 0.0;
+            for (size_t j = 0; j < coef.size(); ++j) {
+                tmp += coef[j] * ylm_real[j];
+            }
+            val += weight[i] * tmp * tmp;
+        }
+
+        double val_ref = 0.0;
+        for (size_t i = 0; i < coef.size(); ++i) {
+            val_ref += coef[i] * coef[i];
+        }
+
+        double abs_diff = std::abs(val - val_ref);
+        EXPECT_LT(abs_diff, tol);
+    }
+}
+
+
+int main(int argc, char** argv)
+{
+#ifdef __MPI
+    MPI_Init(&argc, &argv);
+#endif
+
+    testing::InitGoogleTest(&argc, argv);
+    int result = RUN_ALL_TESTS();
+
+#ifdef __MPI
+    MPI_Finalize();
+#endif
+
+    return result;
+}
@@ -3,6 +3,17 @@ if (ENABLE_FLOAT_FFTW)
     module_fft/fft_cpu_float.cpp
   )
 endif()
+if (USE_CUDA)
+  list (APPEND FFT_SRC
+    module_fft/fft_cuda.cpp
+  )
+endif()
+if (USE_ROCM)
+  list (APPEND FFT_SRC
+    module_fft/fft_rcom.cpp
+  )
+endif()
+
 list(APPEND objects
     fft.cpp
     pw_basis.cpp
 
@@ -30,6 +30,11 @@ class FFT_BASE
                  bool gamma_only_in, 
                  bool xprime_in = true);
 
+    virtual __attribute__((weak))
+    void initfft(int nx_in, 
+                 int ny_in, 
+                 int nz_in);
+
     /**
      * @brief Setup the fft Plan and data As pure virtual function.
      * 
 
@@ -2,12 +2,12 @@
 #include "fft_bundle.h"
 #include "fft_cpu.h"
 #include "module_base/module_device/device.h"
-// #if defined(__CUDA)
-// #include "fft_cuda.h"
-// #endif
-// #if defined(__ROCM)
-// #include "fft_rcom.h"
-// #endif
+#if defined(__CUDA)
+#include "fft_cuda.h"
+#endif
+#if defined(__ROCM)
+#include "fft_rcom.h"
+#endif
 
 template<typename FFT_BASE, typename... Args>
 std::unique_ptr<FFT_BASE> make_unique(Args &&... args)
@@ -16,6 +16,11 @@ std::unique_ptr<FFT_BASE> make_unique(Args &&... args)
 }
 namespace ModulePW
 {
+FFT_Bundle::~FFT_Bundle()
+{
+    this->clear();
+}
+
 void FFT_Bundle::setfft(std::string device_in,std::string precision_in)
 {
     this->device = device_in;
@@ -83,13 +88,17 @@ void FFT_Bundle::initfft(int nx_in,
     }
     if (device=="gpu")
     {
-        // #if defined(__ROCM)
-        //     fft_float = new FFT_RCOM<float>();
-        //     fft_double = new FFT_RCOM<double>();
-        // #elif defined(__CUDA)
-        //     fft_float = make_unique<FFT_CUDA<float>>();
-        //     fft_double = make_unique<FFT_CUDA<double>>();
-        // #endif
+        #if defined(__ROCM)
+            fft_float = new FFT_RCOM<float>();
+            fft_float->initfft(nx_in,ny_in,nz_in);
+            fft_double = new FFT_RCOM<double>();
+            fft_double->initfft(nx_in,ny_in,nz_in);
+        #elif defined(__CUDA)
+            fft_float = make_unique<FFT_CUDA<float>>();
+            fft_float->initfft(nx_in,ny_in,nz_in);
+            fft_double = make_unique<FFT_CUDA<double>>();
+            fft_double->initfft(nx_in,ny_in,nz_in);
+        #endif
     }
 
 }
 
@@ -9,7 +9,7 @@ class FFT_Bundle
 {
     public:
         FFT_Bundle(){};
-        ~FFT_Bundle(){};
+        ~FFT_Bundle();
         /**
          * @brief Constructor with device and precision.
          * @param device_in  device type, cpu or gpu.
 
@@ -303,7 +303,7 @@ void FFT_CPU<float>::clear()
         fftw_free(c_auxg);
         c_auxg = nullptr;
     }
-    if (z_auxr != nullptr)
+    if (c_auxr != nullptr)
     {
         fftw_free(c_auxr);
         c_auxr = nullptr;
 
@@ -0,0 +1,108 @@
+#include "fft_cuda.h"
+#include "module_base/module_device/memory_op.h"
+#include "module_hamilt_pw/hamilt_pwdft/global.h"
+namespace ModulePW
+{
+template <typename FPTYPE>
+void FFT_CUDA<FPTYPE>::initfft(int nx_in, 
+                               int ny_in, 
+                               int nz_in)
+{
+    this->nx = nx_in;
+    this->ny = ny_in;
+    this->nz = nz_in;
+}
+template <>
+void FFT_CUDA<float>::setupFFT()
+{
+    cufftPlan3d(&c_handle, this->nx, this->ny, this->nz, CUFFT_C2C);
+    resmem_cd_op()(gpu_ctx, this->c_auxr_3d, this->nx * this->ny * this->nz);
+        
+}
+template <>  
+void FFT_CUDA<double>::setupFFT()
+{
+    cufftPlan3d(&z_handle, this->nx, this->ny, this->nz, CUFFT_Z2Z);
+    resmem_zd_op()(gpu_ctx, this->z_auxr_3d, this->nx * this->ny * this->nz);
+}
+template <>
+void FFT_CUDA<float>::cleanFFT()
+{
+    if (c_handle)
+    {
+        cufftDestroy(c_handle);
+        c_handle = {};
+    }
+}
+template <>
+void FFT_CUDA<double>::cleanFFT()
+{
+    if (z_handle)
+    {
+        cufftDestroy(z_handle);
+        z_handle = {};
+    }
+}
+template <>
+void FFT_CUDA<float>::clear()
+{
+    this->cleanFFT();
+    if (c_auxr_3d != nullptr)
+    {
+        delmem_cd_op()(gpu_ctx, c_auxr_3d);
+        c_auxr_3d = nullptr;
+    }
+}
+template <>
+void FFT_CUDA<double>::clear()
+{
+    this->cleanFFT();
+    if (z_auxr_3d != nullptr)
+    {
+        delmem_zd_op()(gpu_ctx, z_auxr_3d);
+        z_auxr_3d = nullptr;
+    }
+}
+
+template <>
+void FFT_CUDA<float>::fft3D_forward(std::complex<float>* in, 
+                                    std::complex<float>* out) const
+{
+    CHECK_CUFFT(cufftExecC2C(this->c_handle, 
+                             reinterpret_cast<cufftComplex*>(in), 
+                             reinterpret_cast<cufftComplex*>(out),
+                             CUFFT_FORWARD));
+}
+template <>
+void FFT_CUDA<double>::fft3D_forward(std::complex<double>* in, 
+                                     std::complex<double>* out) const
+{
+    CHECK_CUFFT(cufftExecZ2Z(this->z_handle, 
+                             reinterpret_cast<cufftDoubleComplex*>(in),
+                             reinterpret_cast<cufftDoubleComplex*>(out), 
+                             CUFFT_FORWARD));
+}
+template <>
+void FFT_CUDA<float>::fft3D_backward(std::complex<float>* in, 
+                                     std::complex<float>* out) const
+{
+    CHECK_CUFFT(cufftExecC2C(this->c_handle, 
+                             reinterpret_cast<cufftComplex*>(in), 
+                             reinterpret_cast<cufftComplex*>(out),
+                             CUFFT_INVERSE));
+}
+
+template <>
+void FFT_CUDA<double>::fft3D_backward(std::complex<double>* in, 
+                                      std::complex<double>* out) const
+{
+    CHECK_CUFFT(cufftExecZ2Z(this->z_handle, 
+                             reinterpret_cast<cufftDoubleComplex*>(in),
+                             reinterpret_cast<cufftDoubleComplex*>(out), 
+                             CUFFT_INVERSE));
+}
+template <> std::complex<float>* 
+FFT_CUDA<float>::get_auxr_3d_data()  const {return this->c_auxr_3d;}
+template <> std::complex<double>* 
+FFT_CUDA<double>::get_auxr_3d_data() const {return this->z_auxr_3d;}
+}// namespace ModulePW
Original file line number	Diff line number	Diff line change
`@@ -9,7 +9,7 @@ class FFT_Bundle`
`9`	`9`	`{`
`10`	`10`	`public:`
`11`	`11`	`FFT_Bundle(){};`
`12`		`- ~FFT_Bundle(){};`
	`12`	`+ ~FFT_Bundle();`
`13`	`13`	`/**`
`14`	`14`	`* @brief Constructor with device and precision.`
`15`	`15`	`* @param device_in device type, cpu or gpu.`
Original file line number	Diff line number	Diff line change
`@@ -303,7 +303,7 @@ void FFT_CPU<float>::clear()`
`303`	`303`	`fftw_free(c_auxg);`
`304`	`304`	`c_auxg = nullptr;`
`305`	`305`	`}`
`306`		`- if (z_auxr != nullptr)`
	`306`	`+ if (c_auxr != nullptr)`
`307`	`307`	`{`
`308`	`308`	`fftw_free(c_auxr);`
`309`	`309`	`c_auxr = nullptr;`