deepmodeling
diff --git a/‎docs/advanced/input_files/input-main.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/advanced/input_files/input-main.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎source/Makefile.Objects‎
Lines changed: 5 additions & 8 deletions b/‎source/Makefile.Objects‎
Lines changed: 5 additions & 8 deletions
diff --git a/‎source/driver_run.cpp‎
Lines changed: 4 additions & 1 deletion b/‎source/driver_run.cpp‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎source/module_base/test/CMakeLists.txt‎
Lines changed: 5 additions & 0 deletions b/‎source/module_base/test/CMakeLists.txt‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎source/module_base/test/test_lebedev_laikov.cpp‎
Lines changed: 152 additions & 0 deletions b/‎source/module_base/test/test_lebedev_laikov.cpp‎
Lines changed: 152 additions & 0 deletions
diff --git a/‎source/module_basis/module_ao/parallel_orbitals.cpp‎
Lines changed: 4 additions & 1 deletion b/‎source/module_basis/module_ao/parallel_orbitals.cpp‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎source/module_basis/module_nao/two_center_bundle.cpp‎
Lines changed: 13 additions & 15 deletions b/‎source/module_basis/module_nao/two_center_bundle.cpp‎
Lines changed: 13 additions & 15 deletions
diff --git a/‎source/module_basis/module_pw/CMakeLists.txt‎
Lines changed: 11 additions & 0 deletions b/‎source/module_basis/module_pw/CMakeLists.txt‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎source/module_basis/module_pw/module_fft/fft_base.h‎
Lines changed: 5 additions & 0 deletions b/‎source/module_basis/module_pw/module_fft/fft_base.h‎
Lines changed: 5 additions & 0 deletions
@@ -1392,7 +1392,7 @@ These variables are used to control the geometry relaxation.
 
 - **Type**: Integer
 - **Description**: The maximal number of ionic iteration steps, the minimum value is 1.
-- **Default**: 1
+- **Default**: 1 for SCF, 50 for relax and cell-relax calcualtions
 
 ### relax_cg_thr
 
@@ -2928,7 +2928,7 @@ These variables are used to control DFT+U correlated parameters
 
   - where $\gamma$ is a parameter that adjusts the relative weight of the error function to the derivative error function.
 - **Unit**: Bohr
-- **Default**: 5.0
+- **Default**: 3.0
 
 [back to top](#full-list-of-input-keywords)
 
 
@@ -230,7 +230,6 @@ OBJS_ELECSTAT=elecstate.o\
     pot_xc.o\
 
 OBJS_ELECSTAT_LCAO=elecstate_lcao.o\
-      elecstate_lcao_tddft.o\
       elecstate_lcao_cal_tau.o\
       density_matrix.o\
       density_matrix_io.o\
@@ -255,10 +254,9 @@ OBJS_ESOLVER=esolver.o\
 OBJS_ESOLVER_LCAO=esolver_ks_lcao.o\
       esolver_ks_lcao_tddft.o\
       dpks_cal_e_delta_band.o\
-      dftu_cal_occup_m.o\
       set_matrix_grid.o\
       lcao_before_scf.o\
-      lcao_gets.o\
+      esolver_gets.o\
       lcao_others.o\
       lcao_init_after_vc.o\
 
@@ -321,7 +319,7 @@ OBJS_HAMILT_LCAO=hamilt_lcao.o\
     op_dftu_lcao.o\
     deepks_lcao.o\
     op_exx_lcao.o\
-    sc_lambda_lcao.o\
+    dspin_lcao.o\
     dftu_lcao.o\
 
 OBJS_HCONTAINER=base_matrix.o\
@@ -686,7 +684,9 @@ OBJS_VDW=vdw.o\
     vdwd3_parameters.o\
     vdwd2.o\
     vdwd3.o\
-    vdwd3_parameters_tab.o
+    vdwd3_parameters_tab.o\
+    vdwd3_autoset_xcname.o\
+    vdwd3_autoset_xcparam.o
 
 OBJS_DFTU=dftu.o\
       dftu_force.o\
@@ -698,14 +698,11 @@ OBJS_DFTU=dftu.o\
       dftu_hamilt.o
 
 OBJS_DELTASPIN=basic_funcs.o\
-      cal_h_lambda.o\
       cal_mw_from_lambda.o\
-      cal_mw_helper.o\
       cal_mw.o\
       init_sc.o\
       lambda_loop_helper.o\
       lambda_loop.o\
-      sc_parse_json.o\
       spin_constrain.o\
       template_helpers.o\
 
 
@@ -67,13 +67,16 @@ void Driver::driver_run() {
         Relax_Driver rl_driver;
         rl_driver.relax_driver(p_esolver);
     }
+    else if (cal_type == "get_S")
+    {
+        p_esolver->runner(0, GlobalC::ucell);
+    }
     else
     {
         //! supported "other" functions:
         //! get_pchg(LCAO),
         //! test_memory(PW,LCAO),
         //! test_neighbour(LCAO),
-        //! get_S(LCAO),
         //! gen_bessel(PW), et al.
         const int istep = 0;
         p_esolver->others(istep);
 
@@ -230,6 +230,11 @@ AddTest(
   SOURCES formatter_test.cpp
 )
 
+AddTest(
+  TARGET lebedev_laikov
+  SOURCES test_lebedev_laikov.cpp ../ylm.cpp ../math_lebedev_laikov.cpp
+)
+
 if(ENABLE_GOOGLEBENCH)
   AddTest(
     TARGET perf_sphbes
 
@@ -0,0 +1,152 @@
+#include "module_base/math_lebedev_laikov.h"
+#include "module_base/ylm.h"
+
+#include "gtest/gtest.h"
+#include <random>
+#ifdef __MPI
+#include <mpi.h>
+#endif
+
+using ModuleBase::Lebedev_laikov_grid;
+
+// mock the function to prevent unnecessary dependency
+namespace ModuleBase {
+void WARNING_QUIT(const std::string&, const std::string&) {}
+}
+
+class LebedevLaikovTest: public ::testing::Test {
+protected:
+    void randgen(int lmax, std::vector<double>& coef);
+    const double tol = 1e-12;
+};
+
+
+void LebedevLaikovTest::randgen(int lmax, std::vector<double>& coef) {
+    coef.resize((lmax + 1) * (lmax + 1));
+
+    // fill coef with uniformly distributed random numbers
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_real_distribution<double> dis(0.0, 1.0);
+    for (size_t i = 0; i < coef.size(); ++i) {
+        coef[i] = dis(gen);
+    }
+
+    // normalize the coefficients
+    double fac = 0.0;
+    for (size_t i = 0; i < coef.size(); ++i) {
+        fac += coef[i] * coef[i];
+    }
+
+    fac = 1.0 / std::sqrt(fac);
+    for (size_t i = 0; i < coef.size(); ++i) {
+        coef[i] *= fac;
+    }
+}
+
+
+TEST_F(LebedevLaikovTest, Accuracy) {
+    /* 
+     * Given
+     *
+     *      f = c[0]*Y00 + c[1]*Y10 + c[2]*Y11 + ...,
+     *
+     * where c[0], c[1], c[2], ... are some random numbers, the integration
+     * of |f|^2 on the unit sphere
+     *
+     *      \int |f|^2 d\Omega = c[0]^2 + c[1]^2 + c[2]^2 + ... .
+     *
+     * This test verifies with the above integral that quadrature with
+     * Lebedev grid is exact up to floating point errors.
+     *
+     */
+
+    // (ngrid, lmax)
+    std::set<std::pair<int, int>> supported = {
+        {6, 3},
+        {14, 5},
+        {26, 7},
+        {38, 9},
+        {50, 11},
+        {74, 13},
+        {86, 15},
+        {110, 17},
+        {146, 19},
+        {170, 21},
+        {194, 23},
+        {230, 25},
+        {266, 27},
+        {302, 29},
+        {350, 31},
+        {434, 35},
+        {590, 41},
+        {770, 47},
+        {974, 53},
+        {1202, 59},
+        {1454, 65},
+        {1730, 71},
+        {2030, 77},
+        {2354, 83},
+        {2702, 89},
+        {3074, 95},
+        {3470, 101},
+        {3890, 107},
+        {4334, 113},
+        {4802, 119},
+        {5294, 125},
+        {5810, 131},
+    };
+
+    std::vector<double> coef;
+
+    for (auto& grid_info: supported) {
+        int ngrid = grid_info.first;
+        int grid_lmax = grid_info.second;
+
+        Lebedev_laikov_grid lebgrid(ngrid);
+        lebgrid.generate_grid_points();
+        
+        const double* weight = lebgrid.get_weight();
+        const ModuleBase::Vector3<double>* grid = lebgrid.get_grid_coor();
+
+        int func_lmax = grid_lmax / 2;
+        randgen(func_lmax, coef);
+
+        double val = 0.0;
+        std::vector<double> ylm_real;
+        for (int i = 0; i < ngrid; i++) {
+            ModuleBase::Ylm::sph_harm(func_lmax,
+                    grid[i].x, grid[i].y, grid[i].z, ylm_real);
+            double tmp = 0.0;
+            for (size_t j = 0; j < coef.size(); ++j) {
+                tmp += coef[j] * ylm_real[j];
+            }
+            val += weight[i] * tmp * tmp;
+        }
+
+        double val_ref = 0.0;
+        for (size_t i = 0; i < coef.size(); ++i) {
+            val_ref += coef[i] * coef[i];
+        }
+
+        double abs_diff = std::abs(val - val_ref);
+        EXPECT_LT(abs_diff, tol);
+    }
+}
+
+
+int main(int argc, char** argv)
+{
+#ifdef __MPI
+    MPI_Init(&argc, &argv);
+#endif
+
+    testing::InitGoogleTest(&argc, argv);
+    int result = RUN_ALL_TESTS();
+
+#ifdef __MPI
+    MPI_Finalize();
+#endif
+
+    return result;
+}
@@ -247,7 +247,10 @@ int Parallel_Orbitals::set_nloc_wfc_Eij(
         }
         else
         {
-            ModuleBase::WARNING_QUIT("Parallel_Orbitals::set_nloc_wfc_Eij", "some processor has no bands-row-blocks.");
+            ModuleBase::WARNING_QUIT("Parallel_Orbitals::set_nloc_wfc_Eij",
+                "The number of columns of the 2D process grid exceeds the number of bands. "
+                "Try launching the calculation with fewer MPI processes."
+            );
         }
     }
     int col_b_bands = block / dim1;
 
@@ -63,7 +63,7 @@ void TwoCenterBundle::tabulate()
 {
     ModuleBase::SphericalBesselTransformer sbt(true);
     orb_->set_transformer(sbt);
-    beta_->set_transformer(sbt);
+    if (beta_) { beta_->set_transformer(sbt); }
     if (alpha_) {
         alpha_->set_transformer(sbt);
 }
@@ -75,22 +75,17 @@ void TwoCenterBundle::tabulate()
     //              build two-center integration tables
     //================================================================
     // set up a universal radial grid
-    double rmax = std::max(orb_->rcut_max(), beta_->rcut_max());
-    if (alpha_) {
-        rmax = std::max(rmax, alpha_->rcut_max());
-}
+    double rmax = orb_->rcut_max();
+    if (beta_) { rmax = std::max(rmax, beta_->rcut_max()); }
+    if (alpha_) { rmax = std::max(rmax, alpha_->rcut_max()); }
     double dr = 0.01;
     double cutoff = 2.0 * rmax;
     int nr = static_cast<int>(rmax / dr) + 1;
 
     orb_->set_uniform_grid(true, nr, cutoff, 'i', true);
-    beta_->set_uniform_grid(true, nr, cutoff, 'i', true);
-    if (alpha_) {
-        alpha_->set_uniform_grid(true, nr, cutoff, 'i', true);
-}
-    if (orb_onsite_) {
-        orb_onsite_->set_uniform_grid(true, nr, cutoff, 'i', true);
-}
+    if (beta_) { beta_->set_uniform_grid(true, nr, cutoff, 'i', true); }
+    if (alpha_) { alpha_->set_uniform_grid(true, nr, cutoff, 'i', true);}
+    if (orb_onsite_) {  orb_onsite_->set_uniform_grid(true, nr, cutoff, 'i', true);}
 
     // build TwoCenterIntegrator objects
     kinetic_orb = std::unique_ptr<TwoCenterIntegrator>(new TwoCenterIntegrator);
@@ -101,9 +96,12 @@ void TwoCenterBundle::tabulate()
     overlap_orb->tabulate(*orb_, *orb_, 'S', nr, cutoff);
     ModuleBase::Memory::record("TwoCenterTable: Overlap", overlap_orb->table_memory());
 
-    overlap_orb_beta = std::unique_ptr<TwoCenterIntegrator>(new TwoCenterIntegrator);
-    overlap_orb_beta->tabulate(*orb_, *beta_, 'S', nr, cutoff);
-    ModuleBase::Memory::record("TwoCenterTable: Nonlocal", overlap_orb_beta->table_memory());
+    if (beta_)
+    {
+        overlap_orb_beta = std::unique_ptr<TwoCenterIntegrator>(new TwoCenterIntegrator);
+        overlap_orb_beta->tabulate(*orb_, *beta_, 'S', nr, cutoff);
+        ModuleBase::Memory::record("TwoCenterTable: Nonlocal", overlap_orb_beta->table_memory());
+    }
 
     if (alpha_)
     {
 
@@ -3,6 +3,17 @@ if (ENABLE_FLOAT_FFTW)
     module_fft/fft_cpu_float.cpp
   )
 endif()
+if (USE_CUDA)
+  list (APPEND FFT_SRC
+    module_fft/fft_cuda.cpp
+  )
+endif()
+if (USE_ROCM)
+  list (APPEND FFT_SRC
+    module_fft/fft_rcom.cpp
+  )
+endif()
+
 list(APPEND objects
     fft.cpp
     pw_basis.cpp
 
@@ -30,6 +30,11 @@ class FFT_BASE
                  bool gamma_only_in, 
                  bool xprime_in = true);
 
+    virtual __attribute__((weak))
+    void initfft(int nx_in, 
+                 int ny_in, 
+                 int nz_in);
+
     /**
      * @brief Setup the fft Plan and data As pure virtual function.
      *
Original file line number	Diff line number	Diff line change
`@@ -247,7 +247,10 @@ int Parallel_Orbitals::set_nloc_wfc_Eij(`
`247`	`247`	`}`
`248`	`248`	`else`
`249`	`249`	`{`
`250`		`- ModuleBase::WARNING_QUIT("Parallel_Orbitals::set_nloc_wfc_Eij", "some processor has no bands-row-blocks.");`
	`250`	`+ ModuleBase::WARNING_QUIT("Parallel_Orbitals::set_nloc_wfc_Eij",`
	`251`	`+ "The number of columns of the 2D process grid exceeds the number of bands. "`
	`252`	`+ "Try launching the calculation with fewer MPI processes."`
	`253`	`+ );`
`251`	`254`	`}`
`252`	`255`	`}`
`253`	`256`	`int col_b_bands = block / dim1;`