@@ -54,8 +54,9 @@ void FFT:: initfft(int nx_in, int bigny_in, int nz_in, int ns_in, int nplane_in,
5454 if (!this ->mpifft )
5555 {
5656 // It seems in-place fft is faster than out-of-place fft
57- if (this ->nproc == 1 ) c_gspace = (std::complex <double > *) fftw_malloc (sizeof (fftw_complex) * this ->nz * this ->ns );
58- else c_gspace = (std::complex <double > *) fftw_malloc (sizeof (fftw_complex) * maxgrids);
57+ // if(this->nproc == 1) c_gspace = (std::complex<double> *) fftw_malloc(sizeof(fftw_complex) * this->nz * this->ns);
58+ // else c_gspace = (std::complex<double> *) fftw_malloc(sizeof(fftw_complex) * maxgrids);
59+ c_gspace = (std::complex <double > *) fftw_malloc (sizeof (fftw_complex) * maxgrids);
5960 // c_gspace2 = (std::complex<double> *) fftw_malloc(sizeof(fftw_complex) * this->nz * this->ns);
6061 if (this ->gamma_only )
6162 {
@@ -70,9 +71,11 @@ void FFT:: initfft(int nx_in, int bigny_in, int nz_in, int ns_in, int nplane_in,
7071 }
7172 else
7273 {
73- if (this ->nproc == 1 ) c_rspace = (std::complex <double > *) fftw_malloc (sizeof (fftw_complex) * this ->bignxy * nplane);
74- else c_rspace = (std::complex <double > *) fftw_malloc (sizeof (fftw_complex) * maxgrids);
74+ // if(this->nproc == 1) c_rspace = (std::complex<double> *) fftw_malloc(sizeof(fftw_complex) * this->bignxy * nplane);
75+ // else c_rspace = (std::complex<double> *) fftw_malloc(sizeof(fftw_complex) * maxgrids);
76+ c_rspace = (std::complex <double > *) fftw_malloc (sizeof (fftw_complex) * maxgrids);
7577 }
78+ c_gspace2 = c_rspace;
7679#ifdef __MIX_PRECISION
7780 cf_gspace = (std::complex <float > *)fftw_malloc (sizeof (fftwf_complex) * this ->nz * this ->ns );
7881 cf_rspace = (std::complex <float > *)fftw_malloc (sizeof (fftwf_complex) * this ->bignxy * nplane);
@@ -120,13 +123,14 @@ void FFT :: initplan()
120123 // fftw_complex *in, const int *inembed, int istride, int idist,
121124 // fftw_complex *out, const int *onembed, int ostride, int odist, int sign, unsigned flags);
122125
126+ // It is better to use out-of-place fft for stride = 1.
123127 this ->planzfor = fftw_plan_many_dft ( 1 , &this ->nz , this ->ns ,
124128 (fftw_complex*) c_gspace, &this ->nz , 1 , this ->nz ,
125- (fftw_complex*) c_gspace , &this ->nz , 1 , this ->nz , FFTW_FORWARD, FFTW_MEASURE);
129+ (fftw_complex*) c_gspace2 , &this ->nz , 1 , this ->nz , FFTW_FORWARD, FFTW_MEASURE);
126130
127131 this ->planzbac = fftw_plan_many_dft ( 1 , &this ->nz , this ->ns ,
128132 (fftw_complex*) c_gspace, &this ->nz , 1 , this ->nz ,
129- (fftw_complex*) c_gspace , &this ->nz , 1 , this ->nz , FFTW_BACKWARD, FFTW_MEASURE);
133+ (fftw_complex*) c_gspace2 , &this ->nz , 1 , this ->nz , FFTW_BACKWARD, FFTW_MEASURE);
130134
131135 // this->planzfor = fftw_plan_dft_1d(this->nz,(fftw_complex*) c_gspace,(fftw_complex*) c_gspace, FFTW_FORWARD, FFTW_MEASURE);
132136 // this->planzbac = fftw_plan_dft_1d(this->nz,(fftw_complex*) c_gspace,(fftw_complex*) c_gspace,FFTW_BACKWARD, FFTW_MEASURE);
@@ -183,6 +187,8 @@ void FFT :: initplan()
183187 // this->plan2bac = fftw_plan_many_dft( 2, nrank, this->nplane,
184188 // (fftw_complex*) c_rspace, embed, this->nplane, 1,
185189 // (fftw_complex*) c_rspace, embed, this->nplane, 1, FFTW_BACKWARD, FFTW_MEASURE);
190+
191+ // It is better to use in-place for stride > 1
186192 int npy = this ->nplane * this ->ny ;
187193 this ->planxfor = fftw_plan_many_dft ( 1 , &this ->nx , npy, (fftw_complex *)c_rspace, embed, npy, 1 ,
188194 (fftw_complex *)c_rspace, embed, npy, 1 , FFTW_FORWARD, FFTW_MEASURE );
0 commit comments