Skip to content

Commit 978dae0

Browse files
authored
Merge branch 'develop' into mybfgs
2 parents 4c75875 + 7a87079 commit 978dae0

36 files changed

+643
-166
lines changed

source/Makefile.Objects

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,6 @@ OBJS_ESOLVER=esolver.o\
253253
OBJS_ESOLVER_LCAO=esolver_ks_lcao.o\
254254
esolver_ks_lcao_tddft.o\
255255
dpks_cal_e_delta_band.o\
256-
dftu_cal_occup_m.o\
257256
set_matrix_grid.o\
258257
lcao_before_scf.o\
259258
lcao_gets.o\

source/module_base/test/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,11 @@ AddTest(
230230
SOURCES formatter_test.cpp
231231
)
232232

233+
AddTest(
234+
TARGET lebedev_laikov
235+
SOURCES test_lebedev_laikov.cpp ../ylm.cpp ../math_lebedev_laikov.cpp
236+
)
237+
233238
if(ENABLE_GOOGLEBENCH)
234239
AddTest(
235240
TARGET perf_sphbes
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
#include "module_base/math_lebedev_laikov.h"
2+
#include "module_base/ylm.h"
3+
4+
#include "gtest/gtest.h"
5+
#include <random>
6+
#ifdef __MPI
7+
#include <mpi.h>
8+
#endif
9+
10+
using ModuleBase::Lebedev_laikov_grid;
11+
12+
// mock the function to prevent unnecessary dependency
13+
namespace ModuleBase {
14+
void WARNING_QUIT(const std::string&, const std::string&) {}
15+
}
16+
17+
class LebedevLaikovTest: public ::testing::Test {
18+
protected:
19+
void randgen(int lmax, std::vector<double>& coef);
20+
const double tol = 1e-12;
21+
};
22+
23+
24+
void LebedevLaikovTest::randgen(int lmax, std::vector<double>& coef) {
25+
coef.resize((lmax + 1) * (lmax + 1));
26+
27+
// fill coef with uniformly distributed random numbers
28+
std::random_device rd;
29+
std::mt19937 gen(rd());
30+
std::uniform_real_distribution<double> dis(0.0, 1.0);
31+
for (size_t i = 0; i < coef.size(); ++i) {
32+
coef[i] = dis(gen);
33+
}
34+
35+
// normalize the coefficients
36+
double fac = 0.0;
37+
for (size_t i = 0; i < coef.size(); ++i) {
38+
fac += coef[i] * coef[i];
39+
}
40+
41+
fac = 1.0 / std::sqrt(fac);
42+
for (size_t i = 0; i < coef.size(); ++i) {
43+
coef[i] *= fac;
44+
}
45+
}
46+
47+
48+
TEST_F(LebedevLaikovTest, Accuracy) {
49+
/*
50+
* Given
51+
*
52+
* f = c[0]*Y00 + c[1]*Y10 + c[2]*Y11 + ...,
53+
*
54+
* where c[0], c[1], c[2], ... are some random numbers, the integration
55+
* of |f|^2 on the unit sphere
56+
*
57+
* \int |f|^2 d\Omega = c[0]^2 + c[1]^2 + c[2]^2 + ... .
58+
*
59+
* This test verifies with the above integral that quadrature with
60+
* Lebedev grid is exact up to floating point errors.
61+
*
62+
*/
63+
64+
// (ngrid, lmax)
65+
std::set<std::pair<int, int>> supported = {
66+
{6, 3},
67+
{14, 5},
68+
{26, 7},
69+
{38, 9},
70+
{50, 11},
71+
{74, 13},
72+
{86, 15},
73+
{110, 17},
74+
{146, 19},
75+
{170, 21},
76+
{194, 23},
77+
{230, 25},
78+
{266, 27},
79+
{302, 29},
80+
{350, 31},
81+
{434, 35},
82+
{590, 41},
83+
{770, 47},
84+
{974, 53},
85+
{1202, 59},
86+
{1454, 65},
87+
{1730, 71},
88+
{2030, 77},
89+
{2354, 83},
90+
{2702, 89},
91+
{3074, 95},
92+
{3470, 101},
93+
{3890, 107},
94+
{4334, 113},
95+
{4802, 119},
96+
{5294, 125},
97+
{5810, 131},
98+
};
99+
100+
std::vector<double> coef;
101+
102+
for (auto& grid_info: supported) {
103+
int ngrid = grid_info.first;
104+
int grid_lmax = grid_info.second;
105+
106+
Lebedev_laikov_grid lebgrid(ngrid);
107+
lebgrid.generate_grid_points();
108+
109+
const double* weight = lebgrid.get_weight();
110+
const ModuleBase::Vector3<double>* grid = lebgrid.get_grid_coor();
111+
112+
int func_lmax = grid_lmax / 2;
113+
randgen(func_lmax, coef);
114+
115+
double val = 0.0;
116+
std::vector<double> ylm_real;
117+
for (int i = 0; i < ngrid; i++) {
118+
ModuleBase::Ylm::sph_harm(func_lmax,
119+
grid[i].x, grid[i].y, grid[i].z, ylm_real);
120+
double tmp = 0.0;
121+
for (size_t j = 0; j < coef.size(); ++j) {
122+
tmp += coef[j] * ylm_real[j];
123+
}
124+
val += weight[i] * tmp * tmp;
125+
}
126+
127+
double val_ref = 0.0;
128+
for (size_t i = 0; i < coef.size(); ++i) {
129+
val_ref += coef[i] * coef[i];
130+
}
131+
132+
double abs_diff = std::abs(val - val_ref);
133+
EXPECT_LT(abs_diff, tol);
134+
}
135+
}
136+
137+
138+
int main(int argc, char** argv)
139+
{
140+
#ifdef __MPI
141+
MPI_Init(&argc, &argv);
142+
#endif
143+
144+
testing::InitGoogleTest(&argc, argv);
145+
int result = RUN_ALL_TESTS();
146+
147+
#ifdef __MPI
148+
MPI_Finalize();
149+
#endif
150+
151+
return result;
152+
}

source/module_basis/module_pw/CMakeLists.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,17 @@ if (ENABLE_FLOAT_FFTW)
33
module_fft/fft_cpu_float.cpp
44
)
55
endif()
6+
if (USE_CUDA)
7+
list (APPEND FFT_SRC
8+
module_fft/fft_cuda.cpp
9+
)
10+
endif()
11+
if (USE_ROCM)
12+
list (APPEND FFT_SRC
13+
module_fft/fft_rcom.cpp
14+
)
15+
endif()
16+
617
list(APPEND objects
718
fft.cpp
819
pw_basis.cpp

source/module_basis/module_pw/module_fft/fft_base.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ class FFT_BASE
3030
bool gamma_only_in,
3131
bool xprime_in = true);
3232

33+
virtual __attribute__((weak))
34+
void initfft(int nx_in,
35+
int ny_in,
36+
int nz_in);
37+
3338
/**
3439
* @brief Setup the fft Plan and data As pure virtual function.
3540
*

source/module_basis/module_pw/module_fft/fft_bundle.cpp

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22
#include "fft_bundle.h"
33
#include "fft_cpu.h"
44
#include "module_base/module_device/device.h"
5-
// #if defined(__CUDA)
6-
// #include "fft_cuda.h"
7-
// #endif
8-
// #if defined(__ROCM)
9-
// #include "fft_rcom.h"
10-
// #endif
5+
#if defined(__CUDA)
6+
#include "fft_cuda.h"
7+
#endif
8+
#if defined(__ROCM)
9+
#include "fft_rcom.h"
10+
#endif
1111

1212
template<typename FFT_BASE, typename... Args>
1313
std::unique_ptr<FFT_BASE> make_unique(Args &&... args)
@@ -16,6 +16,11 @@ std::unique_ptr<FFT_BASE> make_unique(Args &&... args)
1616
}
1717
namespace ModulePW
1818
{
19+
FFT_Bundle::~FFT_Bundle()
20+
{
21+
this->clear();
22+
}
23+
1924
void FFT_Bundle::setfft(std::string device_in,std::string precision_in)
2025
{
2126
this->device = device_in;
@@ -83,13 +88,17 @@ void FFT_Bundle::initfft(int nx_in,
8388
}
8489
if (device=="gpu")
8590
{
86-
// #if defined(__ROCM)
87-
// fft_float = new FFT_RCOM<float>();
88-
// fft_double = new FFT_RCOM<double>();
89-
// #elif defined(__CUDA)
90-
// fft_float = make_unique<FFT_CUDA<float>>();
91-
// fft_double = make_unique<FFT_CUDA<double>>();
92-
// #endif
91+
#if defined(__ROCM)
92+
fft_float = new FFT_RCOM<float>();
93+
fft_float->initfft(nx_in,ny_in,nz_in);
94+
fft_double = new FFT_RCOM<double>();
95+
fft_double->initfft(nx_in,ny_in,nz_in);
96+
#elif defined(__CUDA)
97+
fft_float = make_unique<FFT_CUDA<float>>();
98+
fft_float->initfft(nx_in,ny_in,nz_in);
99+
fft_double = make_unique<FFT_CUDA<double>>();
100+
fft_double->initfft(nx_in,ny_in,nz_in);
101+
#endif
93102
}
94103

95104
}

source/module_basis/module_pw/module_fft/fft_bundle.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ class FFT_Bundle
99
{
1010
public:
1111
FFT_Bundle(){};
12-
~FFT_Bundle(){};
12+
~FFT_Bundle();
1313
/**
1414
* @brief Constructor with device and precision.
1515
* @param device_in device type, cpu or gpu.

source/module_basis/module_pw/module_fft/fft_cpu_float.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ void FFT_CPU<float>::clear()
303303
fftw_free(c_auxg);
304304
c_auxg = nullptr;
305305
}
306-
if (z_auxr != nullptr)
306+
if (c_auxr != nullptr)
307307
{
308308
fftw_free(c_auxr);
309309
c_auxr = nullptr;
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
#include "fft_cuda.h"
2+
#include "module_base/module_device/memory_op.h"
3+
#include "module_hamilt_pw/hamilt_pwdft/global.h"
4+
namespace ModulePW
5+
{
6+
template <typename FPTYPE>
7+
void FFT_CUDA<FPTYPE>::initfft(int nx_in,
8+
int ny_in,
9+
int nz_in)
10+
{
11+
this->nx = nx_in;
12+
this->ny = ny_in;
13+
this->nz = nz_in;
14+
}
15+
template <>
16+
void FFT_CUDA<float>::setupFFT()
17+
{
18+
cufftPlan3d(&c_handle, this->nx, this->ny, this->nz, CUFFT_C2C);
19+
resmem_cd_op()(gpu_ctx, this->c_auxr_3d, this->nx * this->ny * this->nz);
20+
21+
}
22+
template <>
23+
void FFT_CUDA<double>::setupFFT()
24+
{
25+
cufftPlan3d(&z_handle, this->nx, this->ny, this->nz, CUFFT_Z2Z);
26+
resmem_zd_op()(gpu_ctx, this->z_auxr_3d, this->nx * this->ny * this->nz);
27+
}
28+
template <>
29+
void FFT_CUDA<float>::cleanFFT()
30+
{
31+
if (c_handle)
32+
{
33+
cufftDestroy(c_handle);
34+
c_handle = {};
35+
}
36+
}
37+
template <>
38+
void FFT_CUDA<double>::cleanFFT()
39+
{
40+
if (z_handle)
41+
{
42+
cufftDestroy(z_handle);
43+
z_handle = {};
44+
}
45+
}
46+
template <>
47+
void FFT_CUDA<float>::clear()
48+
{
49+
this->cleanFFT();
50+
if (c_auxr_3d != nullptr)
51+
{
52+
delmem_cd_op()(gpu_ctx, c_auxr_3d);
53+
c_auxr_3d = nullptr;
54+
}
55+
}
56+
template <>
57+
void FFT_CUDA<double>::clear()
58+
{
59+
this->cleanFFT();
60+
if (z_auxr_3d != nullptr)
61+
{
62+
delmem_zd_op()(gpu_ctx, z_auxr_3d);
63+
z_auxr_3d = nullptr;
64+
}
65+
}
66+
67+
template <>
68+
void FFT_CUDA<float>::fft3D_forward(std::complex<float>* in,
69+
std::complex<float>* out) const
70+
{
71+
CHECK_CUFFT(cufftExecC2C(this->c_handle,
72+
reinterpret_cast<cufftComplex*>(in),
73+
reinterpret_cast<cufftComplex*>(out),
74+
CUFFT_FORWARD));
75+
}
76+
template <>
77+
void FFT_CUDA<double>::fft3D_forward(std::complex<double>* in,
78+
std::complex<double>* out) const
79+
{
80+
CHECK_CUFFT(cufftExecZ2Z(this->z_handle,
81+
reinterpret_cast<cufftDoubleComplex*>(in),
82+
reinterpret_cast<cufftDoubleComplex*>(out),
83+
CUFFT_FORWARD));
84+
}
85+
template <>
86+
void FFT_CUDA<float>::fft3D_backward(std::complex<float>* in,
87+
std::complex<float>* out) const
88+
{
89+
CHECK_CUFFT(cufftExecC2C(this->c_handle,
90+
reinterpret_cast<cufftComplex*>(in),
91+
reinterpret_cast<cufftComplex*>(out),
92+
CUFFT_INVERSE));
93+
}
94+
95+
template <>
96+
void FFT_CUDA<double>::fft3D_backward(std::complex<double>* in,
97+
std::complex<double>* out) const
98+
{
99+
CHECK_CUFFT(cufftExecZ2Z(this->z_handle,
100+
reinterpret_cast<cufftDoubleComplex*>(in),
101+
reinterpret_cast<cufftDoubleComplex*>(out),
102+
CUFFT_INVERSE));
103+
}
104+
template <> std::complex<float>*
105+
FFT_CUDA<float>::get_auxr_3d_data() const {return this->c_auxr_3d;}
106+
template <> std::complex<double>*
107+
FFT_CUDA<double>::get_auxr_3d_data() const {return this->z_auxr_3d;}
108+
}// namespace ModulePW

0 commit comments

Comments
 (0)