Skip to content

Commit e70ea45

Browse files
authored
Test: Add unit test to increase the code coverage rate of module_pw (#1845)
* fix: modify velocity operator * fix: mpi cores can be larger than z-fft grids fix case 101_PW_15_lowz's failing change case 101_PW_15_lowz. It has larger total energy and will not be ignored by Autotest.sh * Test: Increase code coverage of module_pw
1 parent 88bd4e2 commit e70ea45

19 files changed

+478
-54
lines changed

source/module_pw/fft.cpp

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ void FFT::clear()
3535
if(z_auxg!=nullptr) {fftw_free(z_auxg); z_auxg = nullptr;}
3636
if(z_auxr!=nullptr) {fftw_free(z_auxr); z_auxr = nullptr;}
3737
d_rspace = nullptr;
38+
#if defined(__CUDA) || defined(__ROCM)
3839
if (GlobalV::device_flag == "gpu") {
3940
if (GlobalV::precision_flag == "single") {
4041
if (c_auxr_3d != nullptr) {
@@ -49,6 +50,7 @@ void FFT::clear()
4950
}
5051
}
5152
}
53+
#endif
5254
if (GlobalV::precision_flag == "single") {
5355
this->cleanfFFT();
5456
if (c_auxg != nullptr) {
@@ -95,6 +97,7 @@ void FFT:: initfft(int nx_in, int ny_in, int nz_in, int lixy_in, int rixy_in, in
9597
d_rspace = (double *) z_auxg;
9698
// auxr_3d = static_cast<std::complex<double> *>(
9799
// fftw_malloc(sizeof(fftw_complex) * (this->nx * this->ny * this->nz)));
100+
#if defined(__CUDA) || defined(__ROCM)
98101
if (GlobalV::device_flag == "gpu") {
99102
if (GlobalV::precision_flag == "single") {
100103
resmem_cd_op()(gpu_ctx, this->c_auxr_3d, this->nx * this->ny * this->nz);
@@ -103,6 +106,7 @@ void FFT:: initfft(int nx_in, int ny_in, int nz_in, int lixy_in, int rixy_in, in
103106
resmem_zd_op()(gpu_ctx, this->z_auxr_3d, this->nx * this->ny * this->nz);
104107
}
105108
}
109+
#endif
106110
if (GlobalV::precision_flag == "single") {
107111
c_auxg = (std::complex<float> *) fftw_malloc(sizeof(fftwf_complex) * maxgrids);
108112
c_auxr = (std::complex<float> *) fftw_malloc(sizeof(fftwf_complex) * maxgrids);
@@ -129,10 +133,10 @@ void FFT:: setupFFT()
129133
#if defined(__FFTW3_MPI) && defined(__MPI)
130134
else
131135
{
132-
this->initplan_mpi();
133-
if (GlobalV::precision_flag == "single") {
134-
this->initplanf_mpi();
135-
}
136+
// this->initplan_mpi();
137+
// if (GlobalV::precision_flag == "single") {
138+
// this->initplanf_mpi();
139+
// }
136140
}
137141
#endif
138142
return;
@@ -227,6 +231,7 @@ void FFT :: initplan()
227231
// reinterpret_cast<fftw_complex *>(auxr_3d),
228232
// FFTW_BACKWARD, FFTW_MEASURE);
229233

234+
#if defined(__CUDA) || defined(__ROCM)
230235
if (GlobalV::device_flag == "gpu") {
231236
if (GlobalV::precision_flag == "single") {
232237
#if defined(__CUDA)
@@ -243,6 +248,7 @@ void FFT :: initplan()
243248
#endif
244249
}
245250
}
251+
#endif
246252

247253
destroyp = false;
248254
}
@@ -320,15 +326,15 @@ void FFT :: initplanf()
320326
destroypf = false;
321327
}
322328

323-
void FFT :: initplan_mpi()
324-
{
329+
// void FFT :: initplan_mpi()
330+
// {
325331

326-
}
332+
// }
327333

328-
void FFT :: initplanf_mpi()
329-
{
334+
// void FFT :: initplanf_mpi()
335+
// {
330336

331-
}
337+
// }
332338

333339
void FFT:: cleanFFT()
334340
{
@@ -369,6 +375,7 @@ void FFT:: cleanFFT()
369375
}
370376
// fftw_destroy_plan(this->plan3dforward);
371377
// fftw_destroy_plan(this->plan3dbackward);
378+
#if defined(__CUDA) || defined(__ROCM)
372379
if (GlobalV::device_flag == "gpu") {
373380
if (GlobalV::precision_flag == "single") {
374381
#if defined(__CUDA)
@@ -385,6 +392,7 @@ void FFT:: cleanFFT()
385392
#endif
386393
}
387394
}
395+
#endif
388396
destroyp = true;
389397
}
390398

@@ -659,6 +667,7 @@ void FFT::fftxyc2r(std::complex<double> * in, double * out)
659667
}
660668
}
661669

670+
#if defined(__CUDA) || defined(__ROCM)
662671
template <>
663672
void FFT::fft3D_forward(const psi::DEVICE_GPU * /*ctx*/, std::complex<float> * in, std::complex<float> * out)
664673
{
@@ -728,6 +737,7 @@ void FFT::fft3D_backward(const psi::DEVICE_GPU * /*ctx*/, std::complex<double>*
728737
hipDeviceSynchronize();
729738
#endif
730739
}
740+
#endif
731741

732742

733743
template <>
@@ -757,6 +767,7 @@ std::complex<double> * FFT::get_auxg_data() {
757767
return this->z_auxg;
758768
}
759769

770+
#if defined(__CUDA) || defined(__ROCM)
760771
template <>
761772
std::complex<float> * FFT::get_auxr_3d_data() {
762773
return this->c_auxr_3d;
@@ -765,5 +776,6 @@ template <>
765776
std::complex<double> * FFT::get_auxr_3d_data() {
766777
return this->z_auxr_3d;
767778
}
779+
#endif
768780

769781
}

source/module_pw/fft.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <hip/hip_runtime.h>
2121
#endif
2222

23+
//Temporary: we donot need psi. However some GPU ops are defined in psi, which should be moved into module_base or module_gpu
2324
#include "module_psi/psi.h"
2425
// #ifdef __MIX_PRECISION
2526
// #include "fftw3f.h"
@@ -63,11 +64,12 @@ class FFT
6364

6465
public:
6566
//init fftw_plans
66-
void initplan();
67-
void initplan_mpi();
67+
void initplan();
68+
// We have not support mpi fftw yet.
69+
// void initplan_mpi();
6870
//init fftwf_plans
69-
void initplanf();
70-
void initplanf_mpi();
71+
void initplanf();
72+
// void initplanf_mpi();
7173

7274
public:
7375
int fftnx=0, fftny=0;

source/module_pw/pw_basis.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,11 @@ PW_Basis:: ~PW_Basis()
3030
delete[] startr;
3131
delete[] ig2igg;
3232
delete[] gg_uniq;
33+
#if defined(__CUDA) || defined(__ROCM)
3334
if (GlobalV::device_flag == "gpu") {
3435
delmem_int_op()(gpu_ctx, this->d_is2fftixy);
3536
}
37+
#endif
3638
}
3739

3840
///
@@ -169,7 +171,7 @@ void PW_Basis::collect_uniqgg()
169171
{
170172
if (std::abs(tmpgg[ig] - tmpgg2[igg]) > 1.0e-8)
171173
{
172-
tmpgg2[igg] = avg_gg / double(avg_n) ;
174+
tmpgg2[igg] = avg_gg / double(avg_n);
173175
++igg;
174176
tmpgg2[igg] = tmpgg[ig];
175177
avg_gg = tmpgg2[igg];
@@ -181,11 +183,8 @@ void PW_Basis::collect_uniqgg()
181183
avg_gg += tmpgg[ig];
182184
}
183185
this->ig2igg[sortindex[ig]] = igg;
184-
if(ig == this->npw)
185-
{
186-
tmpgg2[igg] = avg_gg / double(avg_n) ;
187-
}
188186
}
187+
tmpgg2[igg] = avg_gg / double(avg_n);
189188
this->ngg = igg + 1;
190189
delete[] this->gg_uniq; this->gg_uniq = new double [this->ngg];
191190
for(int igg = 0 ; igg < this->ngg ; ++igg)

source/module_pw/pw_basis_k.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ PW_Basis_K::~PW_Basis_K()
1919
delete[] igl2ig_k;
2020
delete[] gk2;
2121
delete[] ig2ixyz_k_;
22+
#if defined(__CUDA) || defined(__ROCM)
2223
if (GlobalV::device_flag == "gpu") {
2324
if (GlobalV::precision_flag == "single") {
2425
delmem_sd_op()(gpu_ctx, this->s_kvec_c);
@@ -34,13 +35,16 @@ PW_Basis_K::~PW_Basis_K()
3435
delmem_int_op()(gpu_ctx, this->d_igl2isz_k);
3536
}
3637
else {
38+
#endif
3739
if (GlobalV::precision_flag == "single") {
3840
delmem_sh_op()(cpu_ctx, this->s_kvec_c);
3941
delmem_sh_op()(cpu_ctx, this->s_gcar);
4042
delmem_sh_op()(cpu_ctx, this->s_gk2);
4143
}
4244
// There's no need to delete double pointers while in a CPU environment.
45+
#if defined(__CUDA) || defined(__ROCM)
4346
}
47+
#endif
4448
}
4549

4650
void PW_Basis_K:: initparameters(
@@ -86,6 +90,7 @@ void PW_Basis_K:: initparameters(
8690
this->fftnxy = this->fftnx * this->fftny;
8791
this->fftnxyz = this->fftnxy * this->fftnz;
8892
this->distribution_type = distribution_type_in;
93+
#if defined(__CUDA) || defined(__ROCM)
8994
if (GlobalV::device_flag == "gpu") {
9095
if (GlobalV::precision_flag == "single") {
9196
resmem_sd_op()(gpu_ctx, this->s_kvec_c, this->nks * 3);
@@ -97,6 +102,7 @@ void PW_Basis_K:: initparameters(
97102
}
98103
}
99104
else {
105+
#endif
100106
if (GlobalV::precision_flag == "single") {
101107
resmem_sh_op()(cpu_ctx, this->s_kvec_c, this->nks * 3);
102108
castmem_d2s_h2h_op()(cpu_ctx, cpu_ctx, this->s_kvec_c, reinterpret_cast<double *>(&this->kvec_c[0][0]), this->nks * 3);
@@ -105,7 +111,9 @@ void PW_Basis_K:: initparameters(
105111
this->d_kvec_c = reinterpret_cast<double *>(&this->kvec_c[0][0]);
106112
}
107113
// There's no need to allocate double pointers while in a CPU environment.
114+
#if defined(__CUDA) || defined(__ROCM)
108115
}
116+
#endif
109117
}
110118

111119
void PW_Basis_K::setupIndGk()
@@ -151,10 +159,12 @@ void PW_Basis_K::setupIndGk()
151159
}
152160
}
153161
}
162+
#if defined(__CUDA) || defined(__ROCM)
154163
if (GlobalV::device_flag == "gpu") {
155164
resmem_int_op()(gpu_ctx, this->d_igl2isz_k, this->npwk_max * this->nks);
156165
syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, this->d_igl2isz_k, this->igl2isz_k, this->npwk_max * this->nks);
157166
}
167+
#endif
158168
return;
159169
}
160170

@@ -210,6 +220,7 @@ void PW_Basis_K::collect_local_pw()
210220
this->gcar[ik * npwk_max + igl] = f * this->G;
211221
}
212222
}
223+
#if defined(__CUDA) || defined(__ROCM)
213224
if (GlobalV::device_flag == "gpu") {
214225
if (GlobalV::precision_flag == "single") {
215226
resmem_sd_op()(gpu_ctx, this->s_gk2, this->npwk_max * this->nks);
@@ -225,6 +236,7 @@ void PW_Basis_K::collect_local_pw()
225236
}
226237
}
227238
else {
239+
#endif
228240
if (GlobalV::precision_flag == "single") {
229241
resmem_sh_op()(cpu_ctx, this->s_gk2, this->npwk_max * this->nks, "PW_B_K::s_gk2");
230242
resmem_sh_op()(cpu_ctx, this->s_gcar, this->npwk_max * this->nks * 3, "PW_B_K::s_gcar");
@@ -236,7 +248,9 @@ void PW_Basis_K::collect_local_pw()
236248
this->d_gk2 = this->gk2;
237249
}
238250
// There's no need to allocate double pointers while in a CPU environment.
251+
#if defined(__CUDA) || defined(__ROCM)
239252
}
253+
#endif
240254
}
241255

242256
ModuleBase::Vector3<double> PW_Basis_K:: cal_GplusK_cartesian(const int ik, const int ig) const {
@@ -293,7 +307,7 @@ int& PW_Basis_K::getigl2ig(const int ik, const int igl) const
293307

294308
void PW_Basis_K::get_ig2ixyz_k()
295309
{
296-
310+
delete[] this->ig2ixyz_k_;
297311
this->ig2ixyz_k_ = new int [this->npwk_max * this->nks];
298312
ModuleBase::Memory::record("PW_B_K::ig2ixyz", sizeof(int) * this->npwk_max * this->nks);
299313
assert(gamma_only == false); //We only finish non-gamma_only fft on GPU temperarily.
@@ -310,10 +324,12 @@ void PW_Basis_K::get_ig2ixyz_k()
310324
ig2ixyz_k_[igl + ik * npwk_max] = iz + iy * nz + ix * ny * nz;
311325
}
312326
}
327+
#if defined(__CUDA) || defined(__ROCM)
313328
if (GlobalV::device_flag == "gpu") {
314329
resmem_int_op()(gpu_ctx, ig2ixyz_k, this->npwk_max * this->nks);
315330
syncmem_int_h2d_op()(gpu_ctx, cpu_ctx, this->ig2ixyz_k, this->ig2ixyz_k_, this->npwk_max * this->nks);
316331
}
332+
#endif
317333
}
318334

319335
template <>

source/module_pw/pw_basis_k_big.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ class PW_Basis_K_Big: public PW_Basis_K
1616
// combine [bx,by,bz] FFT grids into a big one
1717
// typical values are bx=2, by=2, bz=2
1818
// nbx=nx/bx, nby=ny/by, nbz=nz/bz,
19+
// Note: this class can only use initgrids(lat0_in, latvec_in, PW_Basis_Big::nx, PW_Basis_Big::ny, PW_Basis_Big::nz)!!!
1920
PW_Basis_K_Big(){
2021
bx = 1;
2122
by = 1;

source/module_pw/pw_transform.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ void PW_Basis:: real2recip(const FPTYPE * in, std::complex<FPTYPE> * out, const
9191
#endif
9292
for(int ir = 0 ; ir < this->nrxx ; ++ir)
9393
{
94-
this->ft.get_auxr_data<FPTYPE>()[ir] = std::complex<double>(in[ir],0);
94+
this->ft.get_auxr_data<FPTYPE>()[ir] = std::complex<FPTYPE>(in[ir],0);
9595
}
9696
this->ft.fftxyfor(ft.get_auxr_data<FPTYPE>(),ft.get_auxr_data<FPTYPE>());
9797
}
@@ -141,7 +141,7 @@ void PW_Basis:: recip2real(const std::complex<FPTYPE> * in, std::complex<FPTYPE>
141141
#endif
142142
for(int i = 0 ; i < this->nst * this->nz ; ++i)
143143
{
144-
ft.get_auxg_data<FPTYPE>()[i] = std::complex<double>(0, 0);
144+
ft.get_auxg_data<FPTYPE>()[i] = std::complex<FPTYPE>(0, 0);
145145
}
146146

147147
#ifdef _OPENMP
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
add_definitions(-D__NORMAL)
1+
add_definitions(-D__NORMAL -D__MIX_PRECISION)
22
AddTest(
33
TARGET pw_test
44
LIBS ${math_libs} planewave psi device
@@ -8,16 +8,16 @@ AddTest(
88
../../src_parallel/parallel_global.cpp ../../src_parallel/parallel_reduce.cpp
99
pw_test.cpp test1-1-1.cpp test1-1-2.cpp test1-2.cpp test1-3.cpp test1-4.cpp test1-5.cpp
1010
test2-1-1.cpp test2-1-2.cpp test2-2.cpp test2-3.cpp
11-
test3-1.cpp test3-2.cpp test3-3.cpp
11+
test3-1.cpp test3-2.cpp test3-3.cpp test3-3-2.cpp
1212
test4-1.cpp test4-2.cpp test4-3.cpp test4-4.cpp test4-5.cpp
1313
test5-1-1.cpp test5-1-2.cpp test5-2-1.cpp test5-2-2.cpp test5-3-1.cpp test5-4-1.cpp test5-4-2.cpp
1414
test6-1-1.cpp test6-1-2.cpp test6-2-1.cpp test6-2-2.cpp test6-3-1.cpp test6-4-1.cpp test6-4-2.cpp
1515
test7-1.cpp test6-2-1.cpp test7-3-1.cpp test7-3-2.cpp
1616
test8-1.cpp test8-2-1.cpp test8-3-1.cpp test8-3-2.cpp
17-
test_tool.cpp
17+
test_tool.cpp test-big.cpp test-other.cpp
1818
)
1919

2020
add_test(NAME pw_test_parallel
21-
COMMAND mpirun -np 2 ./pw_test; mpirun -np 3./pw_test
21+
COMMAND mpirun -np 3 ./pw_test; mpirun -np 4 ./pw_test
2222
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
2323
)

0 commit comments

Comments
 (0)