Skip to content

Commit fba0e72

Browse files
authored
Merge branch 'develop' into fix-dftd3-compilation
2 parents 92ae576 + 7a87079 commit fba0e72

File tree

92 files changed

+2065
-1758
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

92 files changed

+2065
-1758
lines changed

source/Makefile.Objects

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,6 @@ OBJS_ESOLVER=esolver.o\
253253
OBJS_ESOLVER_LCAO=esolver_ks_lcao.o\
254254
esolver_ks_lcao_tddft.o\
255255
dpks_cal_e_delta_band.o\
256-
dftu_cal_occup_m.o\
257256
set_matrix_grid.o\
258257
lcao_before_scf.o\
259258
lcao_gets.o\
@@ -582,7 +581,6 @@ OBJS_LCAO=evolve_elec.o\
582581
LCAO_set_fs.o\
583582
LCAO_set_st.o\
584583
LCAO_nl_mu.o\
585-
LCAO_nnr.o\
586584
LCAO_set_zero.o\
587585
LCAO_allocate.o\
588586
LCAO_set_mat2d.o\

source/module_base/formatter.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,20 @@ class FmtCore
145145
[&delim](const std::string& acc, const std::string& s) { return acc + delim + s; });
146146
}
147147

148+
static std::string upper(const std::string& in)
149+
{
150+
std::string dst = in;
151+
std::transform(dst.begin(), dst.end(), dst.begin(), ::toupper);
152+
return dst;
153+
}
154+
155+
static std::string lower(const std::string& in)
156+
{
157+
std::string dst = in;
158+
std::transform(dst.begin(), dst.end(), dst.begin(), ::tolower);
159+
return dst;
160+
}
161+
148162
private:
149163
std::string fmt_;
150164
template<typename T>

source/module_base/module_mixing/test/mixing_test.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
#ifdef _OPENMP
12
#include <omp.h>
3+
#endif
24

35
#include "../broyden_mixing.h"
46
#include "../plain_mixing.h"
@@ -151,7 +153,9 @@ class Mixing_Test : public testing::Test
151153

152154
TEST_F(Mixing_Test, BroydenSolveLinearEq)
153155
{
156+
#ifdef _OPENMP
154157
omp_set_num_threads(1);
158+
#endif
155159
init_method("broyden");
156160
std::vector<double> x_in = xd_ref;
157161
std::vector<double> x_out(3);
@@ -196,7 +200,9 @@ TEST_F(Mixing_Test, BroydenSolveLinearEq)
196200

197201
TEST_F(Mixing_Test, PulaySolveLinearEq)
198202
{
203+
#ifdef _OPENMP
199204
omp_set_num_threads(1);
205+
#endif
200206
init_method("pulay");
201207
std::vector<double> x_in = xd_ref;
202208
std::vector<double> x_out(3);
@@ -242,7 +248,9 @@ TEST_F(Mixing_Test, PulaySolveLinearEq)
242248

243249
TEST_F(Mixing_Test, PlainSolveLinearEq)
244250
{
251+
#ifdef _OPENMP
245252
omp_set_num_threads(1);
253+
#endif
246254
init_method("plain");
247255
std::vector<double> x_in = xd_ref;
248256
std::vector<double> x_out(3);

source/module_base/test/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,11 @@ AddTest(
230230
SOURCES formatter_test.cpp
231231
)
232232

233+
AddTest(
234+
TARGET lebedev_laikov
235+
SOURCES test_lebedev_laikov.cpp ../ylm.cpp ../math_lebedev_laikov.cpp
236+
)
237+
233238
if(ENABLE_GOOGLEBENCH)
234239
AddTest(
235240
TARGET perf_sphbes
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
#include "module_base/math_lebedev_laikov.h"
2+
#include "module_base/ylm.h"
3+
4+
#include "gtest/gtest.h"
5+
#include <random>
6+
#ifdef __MPI
7+
#include <mpi.h>
8+
#endif
9+
10+
using ModuleBase::Lebedev_laikov_grid;
11+
12+
// mock the function to prevent unnecessary dependency
13+
namespace ModuleBase {
14+
void WARNING_QUIT(const std::string&, const std::string&) {}
15+
}
16+
17+
class LebedevLaikovTest: public ::testing::Test {
18+
protected:
19+
void randgen(int lmax, std::vector<double>& coef);
20+
const double tol = 1e-12;
21+
};
22+
23+
24+
void LebedevLaikovTest::randgen(int lmax, std::vector<double>& coef) {
25+
coef.resize((lmax + 1) * (lmax + 1));
26+
27+
// fill coef with uniformly distributed random numbers
28+
std::random_device rd;
29+
std::mt19937 gen(rd());
30+
std::uniform_real_distribution<double> dis(0.0, 1.0);
31+
for (size_t i = 0; i < coef.size(); ++i) {
32+
coef[i] = dis(gen);
33+
}
34+
35+
// normalize the coefficients
36+
double fac = 0.0;
37+
for (size_t i = 0; i < coef.size(); ++i) {
38+
fac += coef[i] * coef[i];
39+
}
40+
41+
fac = 1.0 / std::sqrt(fac);
42+
for (size_t i = 0; i < coef.size(); ++i) {
43+
coef[i] *= fac;
44+
}
45+
}
46+
47+
48+
TEST_F(LebedevLaikovTest, Accuracy) {
49+
/*
50+
* Given
51+
*
52+
* f = c[0]*Y00 + c[1]*Y10 + c[2]*Y11 + ...,
53+
*
54+
* where c[0], c[1], c[2], ... are some random numbers, the integration
55+
* of |f|^2 on the unit sphere
56+
*
57+
* \int |f|^2 d\Omega = c[0]^2 + c[1]^2 + c[2]^2 + ... .
58+
*
59+
* This test verifies with the above integral that quadrature with
60+
* Lebedev grid is exact up to floating point errors.
61+
*
62+
*/
63+
64+
// (ngrid, lmax)
65+
std::set<std::pair<int, int>> supported = {
66+
{6, 3},
67+
{14, 5},
68+
{26, 7},
69+
{38, 9},
70+
{50, 11},
71+
{74, 13},
72+
{86, 15},
73+
{110, 17},
74+
{146, 19},
75+
{170, 21},
76+
{194, 23},
77+
{230, 25},
78+
{266, 27},
79+
{302, 29},
80+
{350, 31},
81+
{434, 35},
82+
{590, 41},
83+
{770, 47},
84+
{974, 53},
85+
{1202, 59},
86+
{1454, 65},
87+
{1730, 71},
88+
{2030, 77},
89+
{2354, 83},
90+
{2702, 89},
91+
{3074, 95},
92+
{3470, 101},
93+
{3890, 107},
94+
{4334, 113},
95+
{4802, 119},
96+
{5294, 125},
97+
{5810, 131},
98+
};
99+
100+
std::vector<double> coef;
101+
102+
for (auto& grid_info: supported) {
103+
int ngrid = grid_info.first;
104+
int grid_lmax = grid_info.second;
105+
106+
Lebedev_laikov_grid lebgrid(ngrid);
107+
lebgrid.generate_grid_points();
108+
109+
const double* weight = lebgrid.get_weight();
110+
const ModuleBase::Vector3<double>* grid = lebgrid.get_grid_coor();
111+
112+
int func_lmax = grid_lmax / 2;
113+
randgen(func_lmax, coef);
114+
115+
double val = 0.0;
116+
std::vector<double> ylm_real;
117+
for (int i = 0; i < ngrid; i++) {
118+
ModuleBase::Ylm::sph_harm(func_lmax,
119+
grid[i].x, grid[i].y, grid[i].z, ylm_real);
120+
double tmp = 0.0;
121+
for (size_t j = 0; j < coef.size(); ++j) {
122+
tmp += coef[j] * ylm_real[j];
123+
}
124+
val += weight[i] * tmp * tmp;
125+
}
126+
127+
double val_ref = 0.0;
128+
for (size_t i = 0; i < coef.size(); ++i) {
129+
val_ref += coef[i] * coef[i];
130+
}
131+
132+
double abs_diff = std::abs(val - val_ref);
133+
EXPECT_LT(abs_diff, tol);
134+
}
135+
}
136+
137+
138+
int main(int argc, char** argv)
139+
{
140+
#ifdef __MPI
141+
MPI_Init(&argc, &argv);
142+
#endif
143+
144+
testing::InitGoogleTest(&argc, argv);
145+
int result = RUN_ALL_TESTS();
146+
147+
#ifdef __MPI
148+
MPI_Finalize();
149+
#endif
150+
151+
return result;
152+
}

source/module_basis/module_ao/parallel_orbitals.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,10 @@ int Parallel_Orbitals::set_nloc_wfc_Eij(
247247
}
248248
else
249249
{
250-
ModuleBase::WARNING_QUIT("Parallel_Orbitals::set_nloc_wfc_Eij", "some processor has no bands-row-blocks.");
250+
ModuleBase::WARNING_QUIT("Parallel_Orbitals::set_nloc_wfc_Eij",
251+
"The number of columns of the 2D process grid exceeds the number of bands. "
252+
"Try launching the calculation with fewer MPI processes."
253+
);
251254
}
252255
}
253256
int col_b_bands = block / dim1;

source/module_basis/module_pw/CMakeLists.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,17 @@ if (ENABLE_FLOAT_FFTW)
33
module_fft/fft_cpu_float.cpp
44
)
55
endif()
6+
if (USE_CUDA)
7+
list (APPEND FFT_SRC
8+
module_fft/fft_cuda.cpp
9+
)
10+
endif()
11+
if (USE_ROCM)
12+
list (APPEND FFT_SRC
13+
module_fft/fft_rcom.cpp
14+
)
15+
endif()
16+
617
list(APPEND objects
718
fft.cpp
819
pw_basis.cpp

source/module_basis/module_pw/module_fft/fft_base.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ class FFT_BASE
3030
bool gamma_only_in,
3131
bool xprime_in = true);
3232

33+
virtual __attribute__((weak))
34+
void initfft(int nx_in,
35+
int ny_in,
36+
int nz_in);
37+
3338
/**
3439
* @brief Setup the fft Plan and data As pure virtual function.
3540
*

source/module_basis/module_pw/module_fft/fft_bundle.cpp

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22
#include "fft_bundle.h"
33
#include "fft_cpu.h"
44
#include "module_base/module_device/device.h"
5-
// #if defined(__CUDA)
6-
// #include "fft_cuda.h"
7-
// #endif
8-
// #if defined(__ROCM)
9-
// #include "fft_rcom.h"
10-
// #endif
5+
#if defined(__CUDA)
6+
#include "fft_cuda.h"
7+
#endif
8+
#if defined(__ROCM)
9+
#include "fft_rcom.h"
10+
#endif
1111

1212
template<typename FFT_BASE, typename... Args>
1313
std::unique_ptr<FFT_BASE> make_unique(Args &&... args)
@@ -16,6 +16,11 @@ std::unique_ptr<FFT_BASE> make_unique(Args &&... args)
1616
}
1717
namespace ModulePW
1818
{
19+
FFT_Bundle::~FFT_Bundle()
20+
{
21+
this->clear();
22+
}
23+
1924
void FFT_Bundle::setfft(std::string device_in,std::string precision_in)
2025
{
2126
this->device = device_in;
@@ -83,13 +88,17 @@ void FFT_Bundle::initfft(int nx_in,
8388
}
8489
if (device=="gpu")
8590
{
86-
// #if defined(__ROCM)
87-
// fft_float = new FFT_RCOM<float>();
88-
// fft_double = new FFT_RCOM<double>();
89-
// #elif defined(__CUDA)
90-
// fft_float = make_unique<FFT_CUDA<float>>();
91-
// fft_double = make_unique<FFT_CUDA<double>>();
92-
// #endif
91+
#if defined(__ROCM)
92+
fft_float = new FFT_RCOM<float>();
93+
fft_float->initfft(nx_in,ny_in,nz_in);
94+
fft_double = new FFT_RCOM<double>();
95+
fft_double->initfft(nx_in,ny_in,nz_in);
96+
#elif defined(__CUDA)
97+
fft_float = make_unique<FFT_CUDA<float>>();
98+
fft_float->initfft(nx_in,ny_in,nz_in);
99+
fft_double = make_unique<FFT_CUDA<double>>();
100+
fft_double->initfft(nx_in,ny_in,nz_in);
101+
#endif
93102
}
94103

95104
}

source/module_basis/module_pw/module_fft/fft_bundle.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ class FFT_Bundle
99
{
1010
public:
1111
FFT_Bundle(){};
12-
~FFT_Bundle(){};
12+
~FFT_Bundle();
1313
/**
1414
* @brief Constructor with device and precision.
1515
* @param device_in device type, cpu or gpu.

0 commit comments

Comments
 (0)