deepmodeling
diff --git a/‎source/Makefile.Objects‎
Lines changed: 0 additions & 2 deletions b/‎source/Makefile.Objects‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎source/module_base/formatter.h‎
Lines changed: 14 additions & 0 deletions b/‎source/module_base/formatter.h‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎source/module_base/module_mixing/test/mixing_test.cpp‎
Lines changed: 8 additions & 0 deletions b/‎source/module_base/module_mixing/test/mixing_test.cpp‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎source/module_base/test/CMakeLists.txt‎
Lines changed: 5 additions & 0 deletions b/‎source/module_base/test/CMakeLists.txt‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎source/module_base/test/test_lebedev_laikov.cpp‎
Lines changed: 152 additions & 0 deletions b/‎source/module_base/test/test_lebedev_laikov.cpp‎
Lines changed: 152 additions & 0 deletions
diff --git a/‎source/module_basis/module_ao/parallel_orbitals.cpp‎
Lines changed: 4 additions & 1 deletion b/‎source/module_basis/module_ao/parallel_orbitals.cpp‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎source/module_basis/module_pw/CMakeLists.txt‎
Lines changed: 11 additions & 0 deletions b/‎source/module_basis/module_pw/CMakeLists.txt‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎source/module_basis/module_pw/module_fft/fft_base.h‎
Lines changed: 5 additions & 0 deletions b/‎source/module_basis/module_pw/module_fft/fft_base.h‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎source/module_basis/module_pw/module_fft/fft_bundle.cpp‎
Lines changed: 22 additions & 13 deletions b/‎source/module_basis/module_pw/module_fft/fft_bundle.cpp‎
Lines changed: 22 additions & 13 deletions
diff --git a/‎source/module_basis/module_pw/module_fft/fft_bundle.h‎
Lines changed: 1 addition & 1 deletion b/‎source/module_basis/module_pw/module_fft/fft_bundle.h‎
Lines changed: 1 addition & 1 deletion
@@ -253,7 +253,6 @@ OBJS_ESOLVER=esolver.o\
 OBJS_ESOLVER_LCAO=esolver_ks_lcao.o\
       esolver_ks_lcao_tddft.o\
       dpks_cal_e_delta_band.o\
-      dftu_cal_occup_m.o\
       set_matrix_grid.o\
       lcao_before_scf.o\
       lcao_gets.o\
@@ -582,7 +581,6 @@ OBJS_LCAO=evolve_elec.o\
       LCAO_set_fs.o\
       LCAO_set_st.o\
       LCAO_nl_mu.o\
-      LCAO_nnr.o\
       LCAO_set_zero.o\
       LCAO_allocate.o\
       LCAO_set_mat2d.o\
 
@@ -145,6 +145,20 @@ class FmtCore
             [&delim](const std::string& acc, const std::string& s) { return acc + delim + s; });
     }
 
+    static std::string upper(const std::string& in)
+    {
+        std::string dst = in;
+        std::transform(dst.begin(), dst.end(), dst.begin(), ::toupper);
+        return dst;
+    }
+
+    static std::string lower(const std::string& in)
+    {
+        std::string dst = in;
+        std::transform(dst.begin(), dst.end(), dst.begin(), ::tolower);
+        return dst;
+    }
+
 private:
     std::string fmt_;
     template<typename T>
 
@@ -1,4 +1,6 @@
+#ifdef _OPENMP
 #include <omp.h>
+#endif
 
 #include "../broyden_mixing.h"
 #include "../plain_mixing.h"
@@ -151,7 +153,9 @@ class Mixing_Test : public testing::Test
 
 TEST_F(Mixing_Test, BroydenSolveLinearEq)
 {
+#ifdef _OPENMP
     omp_set_num_threads(1);
+#endif
     init_method("broyden");
     std::vector<double> x_in = xd_ref;
     std::vector<double> x_out(3);
@@ -196,7 +200,9 @@ TEST_F(Mixing_Test, BroydenSolveLinearEq)
 
 TEST_F(Mixing_Test, PulaySolveLinearEq)
 {
+#ifdef _OPENMP
     omp_set_num_threads(1);
+#endif
     init_method("pulay");
     std::vector<double> x_in = xd_ref;
     std::vector<double> x_out(3);
@@ -242,7 +248,9 @@ TEST_F(Mixing_Test, PulaySolveLinearEq)
 
 TEST_F(Mixing_Test, PlainSolveLinearEq)
 {
+#ifdef _OPENMP
     omp_set_num_threads(1);
+#endif
     init_method("plain");
     std::vector<double> x_in = xd_ref;
     std::vector<double> x_out(3);
 
@@ -230,6 +230,11 @@ AddTest(
   SOURCES formatter_test.cpp
 )
 
+AddTest(
+  TARGET lebedev_laikov
+  SOURCES test_lebedev_laikov.cpp ../ylm.cpp ../math_lebedev_laikov.cpp
+)
+
 if(ENABLE_GOOGLEBENCH)
   AddTest(
     TARGET perf_sphbes
 
@@ -0,0 +1,152 @@
+#include "module_base/math_lebedev_laikov.h"
+#include "module_base/ylm.h"
+
+#include "gtest/gtest.h"
+#include <random>
+#ifdef __MPI
+#include <mpi.h>
+#endif
+
+using ModuleBase::Lebedev_laikov_grid;
+
+// mock the function to prevent unnecessary dependency
+namespace ModuleBase {
+void WARNING_QUIT(const std::string&, const std::string&) {}
+}
+
+class LebedevLaikovTest: public ::testing::Test {
+protected:
+    void randgen(int lmax, std::vector<double>& coef);
+    const double tol = 1e-12;
+};
+
+
+void LebedevLaikovTest::randgen(int lmax, std::vector<double>& coef) {
+    coef.resize((lmax + 1) * (lmax + 1));
+
+    // fill coef with uniformly distributed random numbers
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_real_distribution<double> dis(0.0, 1.0);
+    for (size_t i = 0; i < coef.size(); ++i) {
+        coef[i] = dis(gen);
+    }
+
+    // normalize the coefficients
+    double fac = 0.0;
+    for (size_t i = 0; i < coef.size(); ++i) {
+        fac += coef[i] * coef[i];
+    }
+
+    fac = 1.0 / std::sqrt(fac);
+    for (size_t i = 0; i < coef.size(); ++i) {
+        coef[i] *= fac;
+    }
+}
+
+
+TEST_F(LebedevLaikovTest, Accuracy) {
+    /* 
+     * Given
+     *
+     *      f = c[0]*Y00 + c[1]*Y10 + c[2]*Y11 + ...,
+     *
+     * where c[0], c[1], c[2], ... are some random numbers, the integration
+     * of |f|^2 on the unit sphere
+     *
+     *      \int |f|^2 d\Omega = c[0]^2 + c[1]^2 + c[2]^2 + ... .
+     *
+     * This test verifies with the above integral that quadrature with
+     * Lebedev grid is exact up to floating point errors.
+     *
+     */
+
+    // (ngrid, lmax)
+    std::set<std::pair<int, int>> supported = {
+        {6, 3},
+        {14, 5},
+        {26, 7},
+        {38, 9},
+        {50, 11},
+        {74, 13},
+        {86, 15},
+        {110, 17},
+        {146, 19},
+        {170, 21},
+        {194, 23},
+        {230, 25},
+        {266, 27},
+        {302, 29},
+        {350, 31},
+        {434, 35},
+        {590, 41},
+        {770, 47},
+        {974, 53},
+        {1202, 59},
+        {1454, 65},
+        {1730, 71},
+        {2030, 77},
+        {2354, 83},
+        {2702, 89},
+        {3074, 95},
+        {3470, 101},
+        {3890, 107},
+        {4334, 113},
+        {4802, 119},
+        {5294, 125},
+        {5810, 131},
+    };
+
+    std::vector<double> coef;
+
+    for (auto& grid_info: supported) {
+        int ngrid = grid_info.first;
+        int grid_lmax = grid_info.second;
+
+        Lebedev_laikov_grid lebgrid(ngrid);
+        lebgrid.generate_grid_points();
+        
+        const double* weight = lebgrid.get_weight();
+        const ModuleBase::Vector3<double>* grid = lebgrid.get_grid_coor();
+
+        int func_lmax = grid_lmax / 2;
+        randgen(func_lmax, coef);
+
+        double val = 0.0;
+        std::vector<double> ylm_real;
+        for (int i = 0; i < ngrid; i++) {
+            ModuleBase::Ylm::sph_harm(func_lmax,
+                    grid[i].x, grid[i].y, grid[i].z, ylm_real);
+            double tmp = 0.0;
+            for (size_t j = 0; j < coef.size(); ++j) {
+                tmp += coef[j] * ylm_real[j];
+            }
+            val += weight[i] * tmp * tmp;
+        }
+
+        double val_ref = 0.0;
+        for (size_t i = 0; i < coef.size(); ++i) {
+            val_ref += coef[i] * coef[i];
+        }
+
+        double abs_diff = std::abs(val - val_ref);
+        EXPECT_LT(abs_diff, tol);
+    }
+}
+
+
+int main(int argc, char** argv)
+{
+#ifdef __MPI
+    MPI_Init(&argc, &argv);
+#endif
+
+    testing::InitGoogleTest(&argc, argv);
+    int result = RUN_ALL_TESTS();
+
+#ifdef __MPI
+    MPI_Finalize();
+#endif
+
+    return result;
+}
@@ -247,7 +247,10 @@ int Parallel_Orbitals::set_nloc_wfc_Eij(
         }
         else
         {
-            ModuleBase::WARNING_QUIT("Parallel_Orbitals::set_nloc_wfc_Eij", "some processor has no bands-row-blocks.");
+            ModuleBase::WARNING_QUIT("Parallel_Orbitals::set_nloc_wfc_Eij",
+                "The number of columns of the 2D process grid exceeds the number of bands. "
+                "Try launching the calculation with fewer MPI processes."
+            );
         }
     }
     int col_b_bands = block / dim1;
 
@@ -3,6 +3,17 @@ if (ENABLE_FLOAT_FFTW)
     module_fft/fft_cpu_float.cpp
   )
 endif()
+if (USE_CUDA)
+  list (APPEND FFT_SRC
+    module_fft/fft_cuda.cpp
+  )
+endif()
+if (USE_ROCM)
+  list (APPEND FFT_SRC
+    module_fft/fft_rcom.cpp
+  )
+endif()
+
 list(APPEND objects
     fft.cpp
     pw_basis.cpp
 
@@ -30,6 +30,11 @@ class FFT_BASE
                  bool gamma_only_in, 
                  bool xprime_in = true);
 
+    virtual __attribute__((weak))
+    void initfft(int nx_in, 
+                 int ny_in, 
+                 int nz_in);
+
     /**
      * @brief Setup the fft Plan and data As pure virtual function.
      * 
 
@@ -2,12 +2,12 @@
 #include "fft_bundle.h"
 #include "fft_cpu.h"
 #include "module_base/module_device/device.h"
-// #if defined(__CUDA)
-// #include "fft_cuda.h"
-// #endif
-// #if defined(__ROCM)
-// #include "fft_rcom.h"
-// #endif
+#if defined(__CUDA)
+#include "fft_cuda.h"
+#endif
+#if defined(__ROCM)
+#include "fft_rcom.h"
+#endif
 
 template<typename FFT_BASE, typename... Args>
 std::unique_ptr<FFT_BASE> make_unique(Args &&... args)
@@ -16,6 +16,11 @@ std::unique_ptr<FFT_BASE> make_unique(Args &&... args)
 }
 namespace ModulePW
 {
+FFT_Bundle::~FFT_Bundle()
+{
+    this->clear();
+}
+
 void FFT_Bundle::setfft(std::string device_in,std::string precision_in)
 {
     this->device = device_in;
@@ -83,13 +88,17 @@ void FFT_Bundle::initfft(int nx_in,
     }
     if (device=="gpu")
     {
-        // #if defined(__ROCM)
-        //     fft_float = new FFT_RCOM<float>();
-        //     fft_double = new FFT_RCOM<double>();
-        // #elif defined(__CUDA)
-        //     fft_float = make_unique<FFT_CUDA<float>>();
-        //     fft_double = make_unique<FFT_CUDA<double>>();
-        // #endif
+        #if defined(__ROCM)
+            fft_float = new FFT_RCOM<float>();
+            fft_float->initfft(nx_in,ny_in,nz_in);
+            fft_double = new FFT_RCOM<double>();
+            fft_double->initfft(nx_in,ny_in,nz_in);
+        #elif defined(__CUDA)
+            fft_float = make_unique<FFT_CUDA<float>>();
+            fft_float->initfft(nx_in,ny_in,nz_in);
+            fft_double = make_unique<FFT_CUDA<double>>();
+            fft_double->initfft(nx_in,ny_in,nz_in);
+        #endif
     }
 
 }
 
@@ -9,7 +9,7 @@ class FFT_Bundle
 {
     public:
         FFT_Bundle(){};
-        ~FFT_Bundle(){};
+        ~FFT_Bundle();
         /**
          * @brief Constructor with device and precision.
          * @param device_in  device type, cpu or gpu.
Original file line number	Diff line number	Diff line change
`@@ -247,7 +247,10 @@ int Parallel_Orbitals::set_nloc_wfc_Eij(`
`247`	`247`	`}`
`248`	`248`	`else`
`249`	`249`	`{`
`250`		`- ModuleBase::WARNING_QUIT("Parallel_Orbitals::set_nloc_wfc_Eij", "some processor has no bands-row-blocks.");`
	`250`	`+ ModuleBase::WARNING_QUIT("Parallel_Orbitals::set_nloc_wfc_Eij",`
	`251`	`+ "The number of columns of the 2D process grid exceeds the number of bands. "`
	`252`	`+ "Try launching the calculation with fewer MPI processes."`
	`253`	`+ );`
`251`	`254`	`}`
`252`	`255`	`}`
`253`	`256`	`int col_b_bands = block / dim1;`
Original file line number	Diff line number	Diff line change
`@@ -9,7 +9,7 @@ class FFT_Bundle`
`9`	`9`	`{`
`10`	`10`	`public:`
`11`	`11`	`FFT_Bundle(){};`
`12`		`- ~FFT_Bundle(){};`
	`12`	`+ ~FFT_Bundle();`
`13`	`13`	`/**`
`14`	`14`	`* @brief Constructor with device and precision.`
`15`	`15`	`* @param device_in device type, cpu or gpu.`