11#include " fft.h"
2- #include " ../module_base/tool_quit.h"
32namespace ModulePW
43{
54
@@ -55,8 +54,9 @@ void FFT:: initfft(int nx_in, int bigny_in, int nz_in, int ns_in, int nplane_in,
5554 if (!this ->mpifft )
5655 {
5756 // It seems in-place fft is faster than out-of-place fft
58- if (this ->nproc == 1 ) c_gspace = (std::complex <double > *) fftw_malloc (sizeof (fftw_complex) * this ->nz * this ->ns );
59- else c_gspace = (std::complex <double > *) fftw_malloc (sizeof (fftw_complex) * maxgrids);
57+ // if(this->nproc == 1) c_gspace = (std::complex<double> *) fftw_malloc(sizeof(fftw_complex) * this->nz * this->ns);
58+ // else c_gspace = (std::complex<double> *) fftw_malloc(sizeof(fftw_complex) * maxgrids);
59+ c_gspace = (std::complex <double > *) fftw_malloc (sizeof (fftw_complex) * maxgrids);
6060 // c_gspace2 = (std::complex<double> *) fftw_malloc(sizeof(fftw_complex) * this->nz * this->ns);
6161 if (this ->gamma_only )
6262 {
@@ -71,9 +71,11 @@ void FFT:: initfft(int nx_in, int bigny_in, int nz_in, int ns_in, int nplane_in,
7171 }
7272 else
7373 {
74- if (this ->nproc == 1 ) c_rspace = (std::complex <double > *) fftw_malloc (sizeof (fftw_complex) * this ->bignxy * nplane);
75- else c_rspace = (std::complex <double > *) fftw_malloc (sizeof (fftw_complex) * maxgrids);
74+ // if(this->nproc == 1) c_rspace = (std::complex<double> *) fftw_malloc(sizeof(fftw_complex) * this->bignxy * nplane);
75+ // else c_rspace = (std::complex<double> *) fftw_malloc(sizeof(fftw_complex) * maxgrids);
76+ c_rspace = (std::complex <double > *) fftw_malloc (sizeof (fftw_complex) * maxgrids);
7677 }
78+ c_gspace2 = c_rspace;
7779#ifdef __MIX_PRECISION
7880 cf_gspace = (std::complex <float > *)fftw_malloc (sizeof (fftwf_complex) * this ->nz * this ->ns );
7981 cf_rspace = (std::complex <float > *)fftw_malloc (sizeof (fftwf_complex) * this ->bignxy * nplane);
@@ -121,13 +123,14 @@ void FFT :: initplan()
121123 // fftw_complex *in, const int *inembed, int istride, int idist,
122124 // fftw_complex *out, const int *onembed, int ostride, int odist, int sign, unsigned flags);
123125
126+ // It is better to use out-of-place fft for stride = 1.
124127 this ->planzfor = fftw_plan_many_dft ( 1 , &this ->nz , this ->ns ,
125128 (fftw_complex*) c_gspace, &this ->nz , 1 , this ->nz ,
126- (fftw_complex*) c_gspace , &this ->nz , 1 , this ->nz , FFTW_FORWARD, FFTW_MEASURE);
129+ (fftw_complex*) c_gspace2 , &this ->nz , 1 , this ->nz , FFTW_FORWARD, FFTW_MEASURE);
127130
128131 this ->planzbac = fftw_plan_many_dft ( 1 , &this ->nz , this ->ns ,
129132 (fftw_complex*) c_gspace, &this ->nz , 1 , this ->nz ,
130- (fftw_complex*) c_gspace , &this ->nz , 1 , this ->nz , FFTW_BACKWARD, FFTW_MEASURE);
133+ (fftw_complex*) c_gspace2 , &this ->nz , 1 , this ->nz , FFTW_BACKWARD, FFTW_MEASURE);
131134
132135 // this->planzfor = fftw_plan_dft_1d(this->nz,(fftw_complex*) c_gspace,(fftw_complex*) c_gspace, FFTW_FORWARD, FFTW_MEASURE);
133136 // this->planzbac = fftw_plan_dft_1d(this->nz,(fftw_complex*) c_gspace,(fftw_complex*) c_gspace,FFTW_BACKWARD, FFTW_MEASURE);
@@ -136,7 +139,7 @@ void FFT :: initplan()
136139 // 2 D
137140 // ---------------------------------------------------------
138141
139- int nrank[2 ] = {this ->nx ,this ->bigny };
142+ // int nrank[2] = {this->nx,this->bigny};
140143 int *embed = NULL ;
141144 if (this ->gamma_only )
142145 {
@@ -184,6 +187,8 @@ void FFT :: initplan()
184187 // this->plan2bac = fftw_plan_many_dft( 2, nrank, this->nplane,
185188 // (fftw_complex*) c_rspace, embed, this->nplane, 1,
186189 // (fftw_complex*) c_rspace, embed, this->nplane, 1, FFTW_BACKWARD, FFTW_MEASURE);
190+
191+ // It is better to use in-place for stride > 1
187192 int npy = this ->nplane * this ->ny ;
188193 this ->planxfor = fftw_plan_many_dft ( 1 , &this ->nx , npy, (fftw_complex *)c_rspace, embed, npy, 1 ,
189194 (fftw_complex *)c_rspace, embed, npy, 1 , FFTW_FORWARD, FFTW_MEASURE );
@@ -301,80 +306,78 @@ void FFT:: cleanFFT()
301306 return ;
302307}
303308
304- void FFT::executefftw (std::string instr )
309+ void FFT::fftzfor (std::complex < double >* & in, std:: complex < double >* & out )
305310{
306- if (instr == " 1for" )
307- {
308- // for(int i = 0 ; i < this->ns ; ++i)
309- // {
310- // fftw_execute_dft(this->planzfor,(fftw_complex *)&c_gspace[i*nz],(fftw_complex *)&c_gspace[i*nz]);
311- // }
312- fftw_execute_dft (this ->planzfor ,(fftw_complex *)c_gspace,(fftw_complex *)c_gspace);
313- // fftw_execute(this->planzfor);
314- }
315- else if (instr == " 1bac" )
316- {
317- // for(int i = 0 ; i < this->ns ; ++i)
318- // {
319- // fftw_execute_dft(this->planzbac,(fftw_complex *)&c_gspace[i*nz],(fftw_complex *)&c_gspace[i*nz]);
320- // }
321- fftw_execute_dft (this ->planzbac ,(fftw_complex *)c_gspace,(fftw_complex *)c_gspace);
322- // fftw_execute(this->planzbac);
323- }
324- else if (instr == " 2for" )
311+ // for(int i = 0 ; i < this->ns ; ++i)
312+ // {
313+ // fftw_execute_dft(this->planzfor,(fftw_complex *)&in[i*nz],(fftw_complex *)&out[i*nz]);
314+ // }
315+ fftw_execute_dft (this ->planzfor ,(fftw_complex *)in,(fftw_complex *)out);
316+ return ;
317+ }
318+
319+ void FFT::fftzbac (std::complex <double >* & in, std::complex <double >* & out)
320+ {
321+ // for(int i = 0 ; i < this->ns ; ++i)
322+ // {
323+ // fftw_execute_dft(this->planzbac,(fftw_complex *)&c_gspace[i*nz],(fftw_complex *)&c_gspace[i*nz]);
324+ // }
325+ fftw_execute_dft (this ->planzbac ,(fftw_complex *)in, (fftw_complex *)out);
326+ return ;
327+ }
328+
329+ void FFT::fftxyfor (std::complex <double >* & in, std::complex <double >* & out)
330+ {
331+ int npy = this ->nplane * this -> ny;
332+ fftw_execute_dft ( this ->planxfor , (fftw_complex *)in, (fftw_complex *)out);
333+ for (int i=0 ; i<this ->nx ;++i)
325334 {
326- int npy = this ->nplane * this -> ny;
327- fftw_execute_dft ( this ->planxfor , (fftw_complex *)c_rspace, (fftw_complex *)c_rspace);
328- for (int i=0 ; i<this ->nx ;++i)
329- {
330- fftw_execute_dft ( this ->planyfor , (fftw_complex*)&c_rspace[i*npy], (fftw_complex*)&c_rspace[i*npy] );
331- }
332- // fftw_execute(this->plan2for);
335+ fftw_execute_dft ( this ->planyfor , (fftw_complex*)&in[i*npy], (fftw_complex*)&out[i*npy] );
333336 }
334- else if (instr == " 2bac" )
335- {
336- // fftw_execute(this->plan2bac);
337- int npy = this ->nplane * this -> ny;
338-
339- for (int i=0 ; i<this ->nx ;++i)
340- {
341- fftw_execute_dft ( this ->planybac , (fftw_complex*)&c_rspace[i*npy], (fftw_complex*)&c_rspace[i*npy] );
342- }
343- fftw_execute_dft ( this ->planxbac , (fftw_complex *)c_rspace, (fftw_complex *)c_rspace);
337+ return ;
338+ }
339+
340+ void FFT::fftxybac (std::complex <double >* & in, std::complex <double >* & out)
341+ {
342+ int npy = this ->nplane * this -> ny;
344343
345- }
346- else if (instr == " 2r2c" )
344+ for (int i=0 ; i<this ->nx ;++i)
347345 {
348- // fftw_execute(this->plan2r2c);
349- // int npy = this->nplane * this-> ny;
350- int bignpy = this ->nplane * this -> bigny;
351- // int padnpy = this->nplane * this-> ny * 2;
352- for (int i=0 ; i<this ->nx ;++i)
353- {
354- fftw_execute_dft_r2c ( this ->planyr2c , &r_rspace[i*bignpy], (fftw_complex*)&c_rspace[i*bignpy] );
355- // fftw_execute_dft_r2c( this->planyfor, &r_rspace[4*i*padnpy], (fftw_complex*)&c_rspace[i*padnpy] );
356- }
357- fftw_execute_dft ( this ->planxfor , (fftw_complex *)c_rspace, (fftw_complex *)c_rspace);
346+ fftw_execute_dft ( this ->planybac , (fftw_complex*)&in[i*npy], (fftw_complex*)&out[i*npy] );
358347 }
359- else if (instr == " 2c2r" )
348+ fftw_execute_dft ( this ->planxbac , (fftw_complex *)in, (fftw_complex *)out);
349+ return ;
350+ }
351+
352+ void FFT::fftxyr2c (double * &in, std::complex <double >* & out)
353+ {
354+ // int npy = this->nplane * this-> ny;
355+ int bignpy = this ->nplane * this -> bigny;
356+ // int padnpy = this->nplane * this-> ny * 2;
357+ for (int i=0 ; i<this ->nx ;++i)
360358 {
361- // fftw_execute(this->plan2c2r);
362- // int npy = this->nplane * this-> ny;
363- int bignpy = this ->nplane * this -> bigny;
364- // int padnpy = this->nplane * this-> ny * 2;
365- fftw_execute_dft ( this ->planxbac , (fftw_complex *)c_rspace, (fftw_complex *)c_rspace);
366- for (int i=0 ; i<this ->nx ;++i)
367- {
368- fftw_execute_dft_c2r ( this ->planyc2r , (fftw_complex*)&c_rspace[i*bignpy], &r_rspace[i*bignpy] );
369- // fftw_execute_dft_c2r( this->planybac, (fftw_complex*)&c_rspace[i*padnpy], &r_rspace[4*i*padnpy] );
370- }
359+ fftw_execute_dft_r2c ( this ->planyr2c , &in[i*bignpy], (fftw_complex*)&out[i*bignpy] );
360+ // fftw_execute_dft_r2c( this->planyfor, &r_rspace[4*i*padnpy], (fftw_complex*)&c_rspace[i*padnpy] );
371361 }
372- else
362+ fftw_execute_dft ( this ->planxfor , (fftw_complex *)out, (fftw_complex *)out);
363+ return ;
364+ }
365+
366+ void FFT::fftxyc2r (std::complex <double >* & in, double * & out)
367+ {
368+ // int npy = this->nplane * this-> ny;
369+ int bignpy = this ->nplane * this -> bigny;
370+ // int padnpy = this->nplane * this-> ny * 2;
371+ fftw_execute_dft ( this ->planxbac , (fftw_complex *)in, (fftw_complex *)in);
372+ for (int i=0 ; i<this ->nx ;++i)
373373 {
374- ModuleBase::WARNING_QUIT (" FFT" , " Wrong input for excutefftw" );
374+ fftw_execute_dft_c2r ( this ->planyc2r , (fftw_complex*)&in[i*bignpy], &out[i*bignpy] );
375+ // fftw_execute_dft_c2r( this->planybac, (fftw_complex*)&c_rspace[i*padnpy], &r_rspace[4*i*padnpy] );
375376 }
377+ return ;
376378}
377379
380+
378381#ifdef __MIX_PRECISION
379382void executefftwf (std::string instr)
380383{
@@ -390,10 +393,6 @@ void executefftwf(std::string instr)
390393 fftwf_execute (this ->planf2r2c );
391394 else if (instr == " 2c2r" )
392395 fftwf_execute (this ->planf2c2r );
393- else
394- {
395- ModuleBase::WARNING_QUIT (" FFT" , " Wrong input for excutefftwf" );
396- }
397396}
398397#endif
399398}
0 commit comments