Skip to content

Commit 2fdecfa

Browse files
committed
set C2C and C2R
1 parent 95c1108 commit 2fdecfa

File tree

4 files changed

+191
-568
lines changed

4 files changed

+191
-568
lines changed

source/module_basis/module_pw/test_gpu/recip_to_real_C2R.cpp renamed to source/module_basis/module_pw/test_gpu/pw_basis_C2R.cpp

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ TEST_F(PWTEST, recip_to_real_double)
4343
const int nx = pwtest.nx;
4444
const int ny = pwtest.ny;
4545
const int nz = pwtest.nz;
46-
printf("the nx is %d,the ny is %d\n,the nz is %d\n", nx, ny, nz);
4746
const int nplane = pwtest.nplane;
4847

4948
const double tpiba2 = ModuleBase::TWO_PI * ModuleBase::TWO_PI / lat0 / lat0;
@@ -103,7 +102,7 @@ TEST_F(PWTEST, recip_to_real_double)
103102
MPI_Bcast(tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0, POOL_WORLD);
104103
#endif
105104
// const int size = nx * ny * nz;
106-
complex<double>* h_rhog = new complex<double>[npw];
105+
complex<double>* h_rhog = new complex<double>[npw];
107106
complex<double>* h_rhogout = new complex<double>[npw];
108107
complex<double>* d_rhog;
109108
complex<double>* d_rhogr;
@@ -125,10 +124,8 @@ TEST_F(PWTEST, recip_to_real_double)
125124
}
126125
}
127126
cudaMemcpy(d_rhog, h_rhog, npw * sizeof(complex<double>), cudaMemcpyHostToDevice);
128-
cudaMemcpy(d_rhogout, h_rhogout, npw * sizeof(complex<double>), cudaMemcpyHostToDevice);
129127

130128
double* h_rhor = new double[nrxx];
131-
double* h_rhogrout = new double[nrxx];
132129
double* d_rhor;
133130
cudaMalloc((void**)&d_rhor, nrxx * sizeof(double));
134131
pwtest.recip_to_real<std::complex<double>, double, base_device::DEVICE_GPU>(d_rhog, d_rhor);
@@ -142,11 +139,19 @@ TEST_F(PWTEST, recip_to_real_double)
142139
EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(), h_rhor[ixy * nplane + iz], 1e-6);
143140
}
144141
}
142+
143+
pwtest.real_to_recip<double,std::complex<double>,base_device::DEVICE_GPU>(d_rhor,d_rhog);
144+
cudaMemcpy(h_rhogout,d_rhog,npw * sizeof(complex<double>),cudaMemcpyDeviceToHost);
145+
for (int ig = 0; ig < npw; ++ig)
146+
{
147+
EXPECT_NEAR(h_rhog[ig].real(), h_rhogout[ig].real(), 1e-6);
148+
EXPECT_NEAR(h_rhog[ig].imag(), h_rhogout[ig].imag(), 1e-6);
149+
}
150+
145151
delete[] h_rhog;
146152
delete[] h_rhogout;
147153
delete[] h_rhor;
148154
delete[] tmp;
149-
delete[] h_rhogrout;
150155
cudaFree(d_rhog);
151156
cudaFree(d_rhogr);
152157
cudaFree(d_rhogout);
@@ -274,6 +279,14 @@ TEST_F(PWTEST, recip_to_real_float)
274279
EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(), h_rhor[ixy * nplane + iz], 1e-6);
275280
}
276281
}
282+
283+
pwtest.real_to_recip<float,std::complex<float>,base_device::DEVICE_GPU>(d_rhor,d_rhog);
284+
cudaMemcpy(h_rhogout,d_rhog,npw * sizeof(complex<float>),cudaMemcpyDeviceToHost);
285+
for (int ig = 0; ig < npw; ++ig)
286+
{
287+
EXPECT_NEAR(h_rhog[ig].real(), h_rhogout[ig].real(), 1e-6);
288+
EXPECT_NEAR(h_rhog[ig].imag(), h_rhogout[ig].imag(), 1e-6);
289+
}
277290
delete[] h_rhog;
278291
delete[] h_rhogout;
279292
delete[] h_rhor;
Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
//---------------------------------------------
2+
// TEST for FFT
3+
//---------------------------------------------
4+
#include "../pw_basis_k.h"
5+
#ifdef __MPI
6+
#include "test_tool.h"
7+
#include "module_base/parallel_global.h"
8+
#include "mpi.h"
9+
#endif
10+
#include "module_base/constants.h"
11+
#include "module_base/global_function.h"
12+
#include "pw_test.h"
13+
#include "cuda_runtime.h"
14+
using namespace std;
15+
TEST_F(PWTEST,pw_basis_k_C2C_double)
16+
{
17+
cout<<"dividemthd 1, gamma_only: on, xprime: false, gamma kpoint, check fft"<<endl;
18+
ModulePW::PW_Basis_K pwtest("gpu", "double");
19+
ModuleBase::Matrix3 latvec(1, 0.3, 0, 0, 2, 0, 0, 0, 2);
20+
double wfcecut;
21+
double lat0= 2.7;
22+
bool gamma_only;
23+
ModuleBase::Vector3<double> *kvec_d;
24+
int nks;
25+
//--------------------------------------------------
26+
nks = 1;
27+
kvec_d = new ModuleBase::Vector3<double>[nks];
28+
kvec_d[0].set(0,0,0);
29+
wfcecut = 10;
30+
gamma_only = true;
31+
int distribution_type = 1;
32+
bool xprime = false;
33+
//--------------------------------------------------
34+
#ifdef __MPI
35+
pwtest.initmpi(nproc_in_pool, rank_in_pool, POOL_WORLD);
36+
#endif
37+
//init //real parameter
38+
pwtest.initgrids(lat0,latvec,4*wfcecut);
39+
pwtest.initparameters(gamma_only,wfcecut,nks,kvec_d,distribution_type, xprime);
40+
pwtest.setuptransform();
41+
pwtest.collect_local_pw();
42+
43+
const int nrxx = pwtest.nrxx;
44+
const int nmaxgr = pwtest.nmaxgr;
45+
const int nx = pwtest.nx;
46+
const int ny = pwtest.ny;
47+
const int nz = pwtest.nz;
48+
const int nplane = pwtest.nplane;
49+
const double tpiba2 = ModuleBase::TWO_PI * ModuleBase::TWO_PI / lat0 / lat0;
50+
const double ggecut = wfcecut / tpiba2;
51+
ModuleBase::Matrix3 GT,G,GGT;
52+
GT = latvec.Inverse();
53+
G = GT.Transpose();
54+
GGT = G * GT;
55+
complex<double> *tmp = new complex<double> [nx*ny*nz];
56+
complex<double> * rhogr = new complex<double> [nmaxgr];
57+
double * rhor = new double [nrxx];
58+
for(int ik = 0; ik < nks; ++ik)
59+
{
60+
int npwk = pwtest.npwk[ik];
61+
if(rank_in_pool == 0)
62+
{
63+
ModuleBase::Vector3<double> kk = kvec_d[ik];
64+
for(int ix = 0 ; ix < nx ; ++ix)
65+
{
66+
for(int iy = 0 ; iy < ny ; ++iy)
67+
{
68+
for(int iz = 0 ; iz < nz ; ++iz)
69+
{
70+
tmp[ix*ny*nz + iy*nz + iz]=0.0;
71+
double vx = ix - int(nx/2);
72+
double vy = iy - int(ny/2);
73+
double vz = iz - int(nz/2);
74+
ModuleBase::Vector3<double> v(vx,vy,vz);
75+
// double modulus = v * (GGT * v);
76+
double modulusgk = (v+kk) * (GGT * (v+kk));
77+
if (modulusgk <= ggecut)
78+
{
79+
tmp[ix*ny*nz + iy*nz + iz]=1.0/(modulusgk+1);
80+
if(vy > 0) tmp[ix*ny*nz + iy*nz + iz]+=ModuleBase::IMAG_UNIT / (std::abs(v.x+1) + 1);
81+
else if(vy < 0) tmp[ix*ny*nz + iy*nz + iz]-=ModuleBase::IMAG_UNIT / (std::abs(-v.x+1) + 1);
82+
}
83+
}
84+
}
85+
}
86+
fftw_plan pp = fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE);
87+
fftw_execute(pp);
88+
fftw_destroy_plan(pp);
89+
90+
ModuleBase::Vector3<double> delta_g(double(int(nx/2))/nx, double(int(ny/2))/ny, double(int(nz/2))/nz);
91+
for(int ixy = 0 ; ixy < nx * ny ; ++ixy)
92+
{
93+
for(int iz = 0 ; iz < nz ; ++iz)
94+
{
95+
int ix = ixy / ny;
96+
int iy = ixy % ny;
97+
ModuleBase::Vector3<double> real_r(ix, iy, iz);
98+
double phase_im = -delta_g * real_r;
99+
complex<double> phase(0,ModuleBase::TWO_PI * phase_im);
100+
tmp[ixy * nz + iz] *= exp(phase);
101+
}
102+
}
103+
}
104+
#ifdef __MPI
105+
MPI_Bcast(tmp,2*nx*ny*nz,MPI_DOUBLE,0,POOL_WORLD);
106+
#endif
107+
complex<double> * h_rhog = new complex<double> [npwk];
108+
complex<double> * rhogout = new complex<double> [npwk];
109+
for(int ig = 0 ; ig < npwk ; ++ig)
110+
{
111+
h_rhog[ig] = 1.0/(pwtest.getgk2(ik,ig)+1);
112+
rhogr[ig] = 1.0/(pwtest.getgk2(ik,ig)+1);
113+
ModuleBase::Vector3<double> f = pwtest.getgdirect(ik,ig);
114+
if(f.y > 0)
115+
{
116+
h_rhog[ig]+=ModuleBase::IMAG_UNIT / (std::abs(f.x+1) + 1);
117+
rhogr[ig]+=ModuleBase::IMAG_UNIT / (std::abs(f.x+1) + 1);
118+
}
119+
}
120+
121+
complex<double>* h_rhogout = new complex<double>[npwk];
122+
complex<double>* d_rhog;
123+
complex<double>* d_rhor;
124+
complex<double>* d_rhogout;
125+
cudaMalloc((void**)&d_rhog, npwk * sizeof(complex<double>));
126+
cudaMalloc((void**)&d_rhor, npwk * sizeof(complex<double>));
127+
cudaMalloc((void**)&d_rhogout, npwk * sizeof(complex<double>));
128+
pwtest.recip_to_real<std::complex<double>,std::complex<double>,base_device::DEVICE_GPU>(h_rhog,d_rhor,ik); //check out-of-place transform
129+
130+
int startiz = pwtest.startz_current;
131+
for(int ixy = 0 ; ixy < nx * ny ; ++ixy)
132+
{
133+
for(int iz = 0 ; iz < nplane ; ++iz)
134+
{
135+
EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),rhor[ixy*nplane+iz],1e-6);
136+
EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(),((double*)rhogr)[ixy*nplane+iz],1e-6);
137+
}
138+
}
139+
140+
pwtest.real2recip(rhor,rhogout,ik);
141+
142+
pwtest.real2recip((double*)rhogr,rhogr,ik);
143+
144+
145+
for(int ig = 0 ; ig < npwk ; ++ig)
146+
{
147+
EXPECT_NEAR(h_rhog[ig].real(),rhogout[ig].real(),1e-6);
148+
EXPECT_NEAR(h_rhog[ig].imag(),rhogout[ig].imag(),1e-6);
149+
EXPECT_NEAR(h_rhog[ig].real(),rhogr[ig].real(),1e-6);
150+
EXPECT_NEAR(h_rhog[ig].imag(),rhogr[ig].imag(),1e-6);
151+
}
152+
153+
154+
delete [] h_rhog;
155+
delete [] rhogout;
156+
//check igl2ig
157+
for(int igl = 0; igl < npwk ; ++igl)
158+
{
159+
const int isz = pwtest.getigl2isz(ik,igl);
160+
for(int ig = 0 ; ig < pwtest.npwk; ++ig)
161+
{
162+
if(isz == pwtest.ig2isz[ig]){
163+
EXPECT_EQ(ig,pwtest.getigl2ig(ik,igl));}
164+
}
165+
}
166+
167+
}
168+
delete []tmp;
169+
delete [] rhor;
170+
delete[] kvec_d;
171+
delete[] rhogr;
172+
fftw_cleanup();
173+
}

0 commit comments

Comments
 (0)