33// ---------------------------------------------
44#include " ../pw_basis_k.h"
55#ifdef __MPI
6- #include " test_tool.h"
76#include " module_base/parallel_global.h"
87#include " mpi.h"
8+ #include " test_tool.h"
99#endif
10+ #include " cuda_runtime.h"
1011#include " module_base/constants.h"
1112#include " module_base/global_function.h"
1213#include " pw_test.h"
13- #include " cuda_runtime.h"
1414using namespace std ;
15- TEST_F (PWTEST,pw_basis_k_C2C_double)
15+ TEST_F (PWTEST, pw_basis_k_C2C_double)
1616{
17- cout<< " dividemthd 1, gamma_only: on, xprime: false, gamma kpoint, check fft" << endl;
17+ cout << " dividemthd 1, gamma_only: on, xprime: false, gamma kpoint, check fft" << endl;
1818 ModulePW::PW_Basis_K pwtest (" gpu" , " double" );
1919 ModuleBase::Matrix3 latvec (1 , 0.3 , 0 , 0 , 2 , 0 , 0 , 0 , 2 );
20- double wfcecut;
21- double lat0= 2.7 ;
20+ double wfcecut = 10 ;
21+ double lat0 = 2.7 ;
2222 bool gamma_only;
23- ModuleBase::Vector3<double > *kvec_d;
24- int nks;
25- // --------------------------------------------------
26- nks = 1 ;
23+ ModuleBase::Vector3<double >* kvec_d;
24+ int nks = 1 ;
2725 kvec_d = new ModuleBase::Vector3<double >[nks];
28- kvec_d[0 ].set (0 ,0 , 0 );
26+ kvec_d[0 ].set (0 , 0 , 0 );
2927 wfcecut = 10 ;
30- gamma_only = true ;
28+ gamma_only = false ;
3129 int distribution_type = 1 ;
3230 bool xprime = false ;
3331 // --------------------------------------------------
3432#ifdef __MPI
3533 pwtest.initmpi (nproc_in_pool, rank_in_pool, POOL_WORLD);
3634#endif
37- // init //real parameter
38- pwtest.initgrids (lat0,latvec,4 * wfcecut);
39- pwtest.initparameters (gamma_only,wfcecut,nks,kvec_d,distribution_type, xprime);
35+ // init //real parameter
36+ pwtest.initgrids (lat0, latvec, 4 * wfcecut);
37+ pwtest.initparameters (gamma_only, wfcecut, nks, kvec_d, distribution_type, xprime);
4038 pwtest.setuptransform ();
4139 pwtest.collect_local_pw ();
4240
@@ -48,101 +46,252 @@ TEST_F(PWTEST,pw_basis_k_C2C_double)
4846 const int nplane = pwtest.nplane ;
4947 const double tpiba2 = ModuleBase::TWO_PI * ModuleBase::TWO_PI / lat0 / lat0;
5048 const double ggecut = wfcecut / tpiba2;
51- ModuleBase::Matrix3 GT,G, GGT;
49+ ModuleBase::Matrix3 GT, G, GGT;
5250 GT = latvec.Inverse ();
53- G = GT.Transpose ();
54- GGT = G * GT;
55- complex <double > * tmp = new complex <double > [nx*ny* nz];
56- double * rhor = new double [nrxx];
57- for (int ik = 0 ; ik < nks; ++ik)
51+ G = GT.Transpose ();
52+ GGT = G * GT;
53+ complex <double >* tmp = new complex <double >[nx * ny * nz];
54+ double * rhor = new double [nrxx];
55+ for (int ik = 0 ; ik < nks; ++ik)
5856 {
5957 int npwk = pwtest.npwk [ik];
60- if (rank_in_pool == 0 )
58+ if (rank_in_pool == 0 )
6159 {
6260 ModuleBase::Vector3<double > kk = kvec_d[ik];
63- for (int ix = 0 ; ix < nx ; ++ix)
61+ for (int ix = 0 ; ix < nx; ++ix)
6462 {
65- for (int iy = 0 ; iy < ny ; ++iy)
63+ for (int iy = 0 ; iy < ny; ++iy)
6664 {
67- for (int iz = 0 ; iz < nz ; ++iz)
65+ for (int iz = 0 ; iz < nz; ++iz)
6866 {
69- tmp[ix*ny*nz + iy*nz + iz]=0.0 ;
70- double vx = ix - int (nx/2 );
71- double vy = iy - int (ny/2 );
72- double vz = iz - int (nz/2 );
73- ModuleBase::Vector3<double > v (vx,vy,vz);
74- // double modulus = v * (GGT * v);
75- double modulusgk = (v+kk) * (GGT * (v+kk));
67+ tmp[ix * ny * nz + iy * nz + iz] = 0.0 ;
68+ double vx = ix - int (nx / 2 );
69+ double vy = iy - int (ny / 2 );
70+ double vz = iz - int (nz / 2 );
71+ ModuleBase::Vector3<double > v (vx, vy, vz);
72+ double modulusgk = (v + kk) * (GGT * (v + kk));
7673 if (modulusgk <= ggecut)
7774 {
78- tmp[ix*ny*nz + iy*nz + iz]=1.0 /(modulusgk+1 );
79- if (vy > 0 ) tmp[ix*ny*nz + iy*nz + iz]+=ModuleBase::IMAG_UNIT / (std::abs (v.x +1 ) + 1 );
80- else if (vy < 0 ) tmp[ix*ny*nz + iy*nz + iz]-=ModuleBase::IMAG_UNIT / (std::abs (-v.x +1 ) + 1 );
75+ tmp[ix * ny * nz + iy * nz + iz] = 1.0 / (modulusgk + 1 );
76+ if (vy > 0 )
77+ tmp[ix * ny * nz + iy * nz + iz] += ModuleBase::IMAG_UNIT / (std::abs (v.x + 1 ) + 1 );
78+ else if (vy < 0 )
79+ tmp[ix * ny * nz + iy * nz + iz] -= ModuleBase::IMAG_UNIT / (std::abs (-v.x + 1 ) + 1 );
8180 }
8281 }
83- }
82+ }
8483 }
85- fftw_plan pp = fftw_plan_dft_3d (nx,ny,nz,(fftw_complex *) tmp, (fftw_complex *) tmp, FFTW_BACKWARD, FFTW_ESTIMATE);
86- fftw_execute (pp);
87- fftw_destroy_plan (pp);
84+ fftw_plan pp
85+ = fftw_plan_dft_3d (nx, ny, nz, (fftw_complex*)tmp, (fftw_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE);
86+ fftw_execute (pp);
87+ fftw_destroy_plan (pp);
8888
89- ModuleBase::Vector3<double > delta_g (double (int (nx/2 ))/nx, double (int (ny/2 ))/ny, double (int (nz/2 ))/nz);
90- for (int ixy = 0 ; ixy < nx * ny ; ++ixy)
89+ ModuleBase::Vector3<double > delta_g (double (int (nx / 2 )) / nx,
90+ double (int (ny / 2 )) / ny,
91+ double (int (nz / 2 )) / nz);
92+ for (int ixy = 0 ; ixy < nx * ny; ++ixy)
9193 {
92- for (int iz = 0 ; iz < nz ; ++iz)
94+ for (int iz = 0 ; iz < nz; ++iz)
9395 {
9496 int ix = ixy / ny;
9597 int iy = ixy % ny;
9698 ModuleBase::Vector3<double > real_r (ix, iy, iz);
9799 double phase_im = -delta_g * real_r;
98- complex <double > phase (0 ,ModuleBase::TWO_PI * phase_im);
100+ complex <double > phase (0 , ModuleBase::TWO_PI * phase_im);
99101 tmp[ixy * nz + iz] *= exp (phase);
100102 }
101103 }
102104 }
103105#ifdef __MPI
104- MPI_Bcast (tmp,2 *nx*ny* nz,MPI_DOUBLE,0 , POOL_WORLD);
106+ MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0 , POOL_WORLD);
105107#endif
106- complex <double > * h_rhog = new complex <double > [npwk];
107- complex <double > * h_rhor = new complex <double > [nrxx];
108- for (int ig = 0 ; ig < npwk ; ++ig)
109- {
110- h_rhog[ig] = 1.0 /(pwtest.getgk2 (ik,ig)+1 );
111- ModuleBase::Vector3<double > f = pwtest.getgdirect (ik,ig);
112- if (f.y > 0 )
113- {
114- h_rhog[ig]+=ModuleBase::IMAG_UNIT / (std::abs (f.x +1 ) + 1 );
115-
116- }
117- }
118-
108+ complex <double >* h_rhog = new complex <double >[npwk];
119109 complex <double >* h_rhogout = new complex <double >[npwk];
110+ complex <double >* h_rhor = new complex <double >[nrxx];
111+ for (int ig = 0 ; ig < npwk; ++ig)
112+ {
113+ h_rhog[ig] = 1.0 / (pwtest.getgk2 (ik, ig) + 1 );
114+ ModuleBase::Vector3<double > f = pwtest.getgdirect (ik, ig);
115+ if (f.y > 0 )
116+ h_rhog[ig] += ModuleBase::IMAG_UNIT / (std::abs (f.x + 1 ) + 1 );
117+ else if (f.y < 0 )
118+ h_rhog[ig] -= ModuleBase::IMAG_UNIT / (std::abs (-f.x + 1 ) + 1 );
119+ }
120120 complex <double >* d_rhog;
121121 complex <double >* d_rhor;
122- complex <double >* d_rhogout;
123122 cudaMalloc ((void **)&d_rhog, npwk * sizeof (complex <double >));
124- cudaMalloc ((void **)&d_rhor, npwk * sizeof (complex <double >));
125- cudaMalloc ((void **)&d_rhogout, npwk * sizeof (complex <double >));
126- cudaMemcpy (d_rhog,h_rhog,npwk*sizeof (complex <double >),cudaMemcpyHostToDevice);
127- pwtest.recip_to_real <std::complex <double >,std::complex <double >,base_device::DEVICE_GPU>(h_rhog,d_rhor,ik); // check out-of-place transform
128- cudaMemcpy (h_rhor,d_rhor,nrxx*sizeof (complex <double >),cudaMemcpyHostToDevice);
123+ cudaMalloc ((void **)&d_rhor, nrxx * sizeof (complex <double >));
124+ cudaMemcpy (d_rhog, h_rhog, npwk * sizeof (complex <double >), cudaMemcpyHostToDevice);
125+ pwtest.recip_to_real <std::complex <double >, base_device::DEVICE_GPU>(d_rhog,
126+ d_rhor,
127+ ik);
128+ cudaMemcpy (h_rhor, d_rhor, nrxx * sizeof (complex <double >), cudaMemcpyDeviceToHost);
129129 int startiz = pwtest.startz_current ;
130- for (int ixy = 0 ; ixy < nx * ny ; ++ixy)
130+ for (int ixy = 0 ; ixy < nx * ny; ++ixy)
131131 {
132- for (int iz = 0 ; iz < nplane ; ++iz)
132+ for (int iz = 0 ; iz < nplane; ++iz)
133133 {
134- EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (),h_rhor[ixy*nplane+iz].real (),1e-6 );
134+ EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), h_rhor[ixy * nplane + iz].real (), 1e-6 );
135+ EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), h_rhor[ixy * nplane + iz].imag (), 1e-6 );
135136 }
136137 }
137138
138- delete [] h_rhog;
139- delete [] h_rhor;
140-
139+ pwtest.real_to_recip <std::complex <double >,base_device::DEVICE_GPU>(d_rhor,d_rhog,ik);
140+ cudaMemcpy (h_rhogout,d_rhog,npwk * sizeof (complex <double >),cudaMemcpyDeviceToHost);
141+ for (int ig = 0 ; ig < npwk; ++ig)
142+ {
143+ EXPECT_NEAR (h_rhog[ig].real (), h_rhogout[ig].real (), 1e-6 );
144+ EXPECT_NEAR (h_rhog[ig].imag (), h_rhogout[ig].imag (), 1e-6 );
145+ }
146+ delete[] h_rhog;
147+ delete[] h_rhor;
148+ delete[] h_rhogout;
149+ cudaFree (d_rhor);
150+ cudaFree (d_rhog);
151+ }
152+ delete[] tmp;
153+ delete[] rhor;
154+ delete[] kvec_d;
155+ fftw_cleanup ();
156+ }
141157
158+ TEST_F (PWTEST, pw_basis_k_C2C_float)
159+ {
160+ cout << " dividemthd 1, gamma_only: on, xprime: false, gamma kpoint, check fft" << endl;
161+ ModulePW::PW_Basis_K pwtest (" gpu" , " single" );
162+ ModuleBase::Matrix3 latvec (1 , 0.3 , 0 , 0 , 2 , 0 , 0 , 0 , 2 );
163+ double wfcecut =10 ;
164+ double lat0 = 2.7 ;
165+ bool gamma_only;
166+ ModuleBase::Vector3<double >* kvec_d;
167+ int nks = 1 ;
168+ kvec_d = new ModuleBase::Vector3<double >[nks];
169+ kvec_d[0 ].set (0 , 0 , 0 );
170+ wfcecut = 10 ;
171+ gamma_only = false ;
172+ int distribution_type = 1 ;
173+ bool xprime = false ;
174+ // --------------------------------------------------
175+ #ifdef __MPI
176+ pwtest.initmpi (nproc_in_pool, rank_in_pool, POOL_WORLD);
177+ #endif
178+ // init //real parameter
179+ pwtest.initgrids (lat0, latvec, 4 * wfcecut);
180+ pwtest.initparameters (gamma_only, wfcecut, nks, kvec_d, distribution_type, xprime);
181+ pwtest.setuptransform ();
182+ pwtest.collect_local_pw ();
183+
184+ const int nrxx = pwtest.nrxx ;
185+ const int nmaxgr = pwtest.nmaxgr ;
186+ const int nx = pwtest.nx ;
187+ const int ny = pwtest.ny ;
188+ const int nz = pwtest.nz ;
189+ const int nplane = pwtest.nplane ;
190+ const float tpiba2 = ModuleBase::TWO_PI * ModuleBase::TWO_PI / lat0 / lat0;
191+ const float ggecut = wfcecut / tpiba2;
192+ ModuleBase::Matrix3 GT, G, GGT;
193+ GT = latvec.Inverse ();
194+ G = GT.Transpose ();
195+ GGT = G * GT;
196+ complex <float >* tmp = new complex <float >[nx * ny * nz];
197+ for (int ik = 0 ; ik < nks; ++ik)
198+ {
199+ int npwk = pwtest.npwk [ik];
200+ if (rank_in_pool == 0 )
201+ {
202+ ModuleBase::Vector3<double > kk = kvec_d[ik];
203+ for (int ix = 0 ; ix < nx; ++ix)
204+ {
205+ for (int iy = 0 ; iy < ny; ++iy)
206+ {
207+ for (int iz = 0 ; iz < nz; ++iz)
208+ {
209+ tmp[ix * ny * nz + iy * nz + iz] = 0.0 ;
210+ double vx = ix - int (nx / 2 );
211+ double vy = iy - int (ny / 2 );
212+ double vz = iz - int (nz / 2 );
213+ ModuleBase::Vector3<double > v (vx, vy, vz);
214+ float modulusgk = float ((v + kk) * (GGT * (v + kk)));
215+ if (modulusgk <= ggecut)
216+ {
217+ tmp[ix * ny * nz + iy * nz + iz] = float (1.0 / (modulusgk + 1 ));
218+ if (vy > 0 )
219+ tmp[ix * ny * nz + iy * nz + iz] += std::complex <float >(0 ,1.0 ) / (std::abs (float (v.x ) + 1 ) + 1 );
220+ else if (vy < 0 )
221+ tmp[ix * ny * nz + iy * nz + iz] -= std::complex <float >(0 ,1.0 ) / (std::abs (float (-v.x ) + 1 ) + 1 );
222+ }
223+ }
224+ }
225+ }
226+ fftwf_plan pp
227+ = fftwf_plan_dft_3d (nx, ny, nz, (fftwf_complex*)tmp, (fftwf_complex*)tmp, FFTW_BACKWARD, FFTW_ESTIMATE);
228+ fftwf_execute (pp);
229+ fftwf_destroy_plan (pp);
230+
231+ ModuleBase::Vector3<float > delta_g (float (int (nx / 2 )) / nx,
232+ float (int (ny / 2 )) / ny,
233+ float (int (nz / 2 )) / nz);
234+ for (int ixy = 0 ; ixy < nx * ny; ++ixy)
235+ {
236+ for (int iz = 0 ; iz < nz; ++iz)
237+ {
238+ int ix = ixy / ny;
239+ int iy = ixy % ny;
240+ ModuleBase::Vector3<float > real_r (ix, iy, iz);
241+ float phase_im = -delta_g * real_r;
242+ complex <float > phase (0 , ModuleBase::TWO_PI * phase_im);
243+ tmp[ixy * nz + iz] *= exp (phase);
244+ }
245+ }
246+ }
247+ #ifdef __MPI
248+ MPI_Bcast (tmp, 2 * nx * ny * nz, MPI_DOUBLE, 0 , POOL_WORLD);
249+ #endif
250+ complex <float >* h_rhog = new complex <float >[npwk];
251+ complex <float >* h_rhogout = new complex <float >[npwk];
252+ complex <float >* h_rhor = new complex <float >[nrxx];
253+ for (int ig = 0 ; ig < npwk; ++ig)
254+ {
255+ h_rhog[ig] = float (1.0 / (pwtest.getgk2 (ik, ig) + 1 ));
256+ ModuleBase::Vector3<double > f = pwtest.getgdirect (ik, ig);
257+ if (f.y > 0 )
258+ h_rhog[ig] += std::complex <float >(0 ,1.0 ) / (std::abs (float (f.x ) + 1 ) + 1 );
259+ else if (f.y < 0 )
260+ h_rhog[ig] -= std::complex <float >(0 ,1.0 ) / (std::abs (float (-f.x ) + 1 ) + 1 );
261+ }
262+ complex <float >* d_rhog;
263+ complex <float >* d_rhor;
264+ cudaMalloc ((void **)&d_rhog, npwk * sizeof (complex <float >));
265+ cudaMalloc ((void **)&d_rhor, nrxx * sizeof (complex <float >));
266+ cudaMemcpy (d_rhog, h_rhog, npwk * sizeof (complex <float >), cudaMemcpyHostToDevice);
267+ pwtest.recip_to_real <std::complex <float >, base_device::DEVICE_GPU>(d_rhog,
268+ d_rhor,
269+ ik);
270+ cudaMemcpy (h_rhor, d_rhor, nrxx * sizeof (complex <float >), cudaMemcpyDeviceToHost);
271+ int startiz = pwtest.startz_current ;
272+ for (int ixy = 0 ; ixy < nx * ny; ++ixy)
273+ {
274+ for (int iz = 0 ; iz < nplane; ++iz)
275+ {
276+ EXPECT_NEAR (tmp[ixy * nz + startiz + iz].real (), h_rhor[ixy * nplane + iz].real (), 1e-4 );
277+ EXPECT_NEAR (tmp[ixy * nz + startiz + iz].imag (), h_rhor[ixy * nplane + iz].imag (), 1e-4 );
278+ }
279+ }
280+
281+ pwtest.real_to_recip <std::complex <float >,base_device::DEVICE_GPU>(d_rhor,d_rhog,ik);
282+ cudaMemcpy (h_rhogout,d_rhog,npwk * sizeof (complex <float >),cudaMemcpyDeviceToHost);
283+ for (int ig = 0 ; ig < npwk; ++ig)
284+ {
285+ EXPECT_NEAR (h_rhog[ig].real (), h_rhogout[ig].real (), 1e-4 );
286+ EXPECT_NEAR (h_rhog[ig].imag (), h_rhogout[ig].imag (), 1e-4 );
287+ }
288+ delete[] h_rhog;
289+ delete[] h_rhor;
290+ delete[] h_rhogout;
291+ cudaFree (d_rhor);
292+ cudaFree (d_rhog);
142293 }
143- delete [] tmp;
144- delete [] rhor;
294+ delete[] tmp;
145295 delete[] kvec_d;
146- delete[] rhogr;
147296 fftw_cleanup ();
148- }
297+ }
0 commit comments