Skip to content

Commit 09d5339

Browse files
authored
Merge pull request #593 from Qianruipku/planewave
change fft in x direction
2 parents f2b6e1f + 6260af7 commit 09d5339

File tree

6 files changed

+67
-84
lines changed

6 files changed

+67
-84
lines changed

source/module_pw/fft.cpp

Lines changed: 36 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,9 @@ void FFT :: initplan()
133133

134134
// int npy = this->nplane * this->ny;
135135
int bignpy = this->nplane * this->bigny;
136-
this->planxfor = fftw_plan_many_dft( 1, &this->nx, this->nplane, (fftw_complex *)aux2, embed, bignpy, 1,
136+
this->planxfor1 = fftw_plan_many_dft( 1, &this->nx, this->nplane * (liy + 1), (fftw_complex *)aux2, embed, bignpy, 1,
137137
(fftw_complex *)aux2, embed, bignpy, 1, FFTW_FORWARD, FFTW_MEASURE );
138-
this->planxbac = fftw_plan_many_dft( 1, &this->nx, this->nplane, (fftw_complex *)aux2, embed, bignpy, 1,
138+
this->planxbac1 = fftw_plan_many_dft( 1, &this->nx, this->nplane * (liy + 1), (fftw_complex *)aux2, embed, bignpy, 1,
139139
(fftw_complex *)aux2, embed, bignpy, 1, FFTW_BACKWARD, FFTW_MEASURE );
140140
this->planyr2c = fftw_plan_many_dft_r2c( 1, &this->bigny, this->nplane, r_rspace , embed, this->nplane, 1,
141141
(fftw_complex*)aux1, embed, this->nplane, 1, FFTW_MEASURE );
@@ -165,9 +165,13 @@ void FFT :: initplan()
165165

166166
//It is better to use in-place for stride > 1
167167
int bignpy = this->nplane * this->bigny;
168-
this->planxfor = fftw_plan_many_dft( 1, &this->nx, this->nplane, (fftw_complex *)aux2, embed, bignpy, 1,
168+
this->planxfor1 = fftw_plan_many_dft( 1, &this->nx, this->nplane * (liy + 1), (fftw_complex *)aux2, embed, bignpy, 1,
169169
(fftw_complex *)aux2, embed, bignpy, 1, FFTW_FORWARD, FFTW_MEASURE );
170-
this->planxbac = fftw_plan_many_dft( 1, &this->nx, this->nplane, (fftw_complex *)aux2, embed, bignpy, 1,
170+
this->planxbac1 = fftw_plan_many_dft( 1, &this->nx, this->nplane * (liy + 1), (fftw_complex *)aux2, embed, bignpy, 1,
171+
(fftw_complex *)aux2, embed, bignpy, 1, FFTW_BACKWARD, FFTW_MEASURE );
172+
this->planxfor2 = fftw_plan_many_dft( 1, &this->nx, this->nplane * (bigny - riy), (fftw_complex *)aux2, embed, bignpy, 1,
173+
(fftw_complex *)aux2, embed, bignpy, 1, FFTW_FORWARD, FFTW_MEASURE );
174+
this->planxbac2 = fftw_plan_many_dft( 1, &this->nx, this->nplane * (bigny - riy), (fftw_complex *)aux2, embed, bignpy, 1,
171175
(fftw_complex *)aux2, embed, bignpy, 1, FFTW_BACKWARD, FFTW_MEASURE );
172176
this->planyfor = fftw_plan_many_dft( 1, &this->bigny, this->nplane, (fftw_complex*)aux2 , embed, this->nplane, 1,
173177
(fftw_complex*)aux2, embed, this->nplane, 1, FFTW_FORWARD, FFTW_MEASURE );
@@ -205,9 +209,9 @@ void FFT :: initplanf()
205209

206210
int *embed = NULL;
207211
int bignpy = this->nplane * this->bigny;
208-
this->planfxfor = fftwf_plan_many_dft( 1, &this->nx, this->nplane, (fftwf_complex *)auxf2, embed, bignpy, 1,
212+
this->planfxfor1 = fftwf_plan_many_dft( 1, &this->nx, this->nplane * (liy + 1), (fftwf_complex *)auxf2, embed, bignpy, 1,
209213
(fftwf_complex *)auxf2, embed, bignpy, 1, FFTW_FORWARD, FFTW_MEASURE );
210-
this->planfxbac = fftwf_plan_many_dft( 1, &this->nx, this->nplane, (fftwf_complex *)auxf2, embed, bignpy, 1,
214+
this->planfxbac1 = fftwf_plan_many_dft( 1, &this->nx, this->nplane * (liy + 1), (fftwf_complex *)auxf2, embed, bignpy, 1,
211215
(fftwf_complex *)auxf2, embed, bignpy, 1, FFTW_BACKWARD, FFTW_MEASURE );
212216
if(this->gamma_only)
213217
{
@@ -218,6 +222,10 @@ void FFT :: initplanf()
218222
}
219223
else
220224
{
225+
this->planfxfor2 = fftwf_plan_many_dft( 1, &this->nx, this->nplane * (bigny - riy), (fftwf_complex *)auxf2, embed, bignpy, 1,
226+
(fftwf_complex *)auxf2, embed, bignpy, 1, FFTW_FORWARD, FFTW_MEASURE );
227+
this->planfxbac2 = fftwf_plan_many_dft( 1, &this->nx, this->nplane * (bigny - riy), (fftwf_complex *)auxf2, embed, bignpy, 1,
228+
(fftwf_complex *)auxf2, embed, bignpy, 1, FFTW_BACKWARD, FFTW_MEASURE );
221229
this->planfyfor = fftwf_plan_many_dft( 1, &this->bigny, this->nplane, (fftwf_complex*)auxf2 , embed, this->nplane, 1,
222230
(fftwf_complex*)auxf2, embed, this->nplane, 1, FFTW_FORWARD, FFTW_MEASURE );
223231
this->planfybac = fftwf_plan_many_dft( 1, &this->bigny, this->nplane, (fftwf_complex*)auxf2 , embed, this->nplane, 1,
@@ -244,15 +252,17 @@ void FFT:: cleanFFT()
244252
if(destroyp==true) return;
245253
fftw_destroy_plan(planzfor);
246254
fftw_destroy_plan(planzbac);
247-
fftw_destroy_plan(planxfor);
248-
fftw_destroy_plan(planxbac);
255+
fftw_destroy_plan(planxfor1);
256+
fftw_destroy_plan(planxbac1);
249257
if(this->gamma_only)
250258
{
251259
fftw_destroy_plan(planyr2c);
252260
fftw_destroy_plan(planyc2r);
253261
}
254262
else
255263
{
264+
fftw_destroy_plan(planxfor2);
265+
fftw_destroy_plan(planxbac2);
256266
fftw_destroy_plan(planyfor);
257267
fftw_destroy_plan(planybac);
258268
}
@@ -265,15 +275,17 @@ void FFT:: cleanfFFT()
265275
if(destroypf==true) return;
266276
fftwf_destroy_plan(planfzfor);
267277
fftwf_destroy_plan(planfzbac);
268-
fftwf_destroy_plan(planfxfor);
269-
fftwf_destroy_plan(planfxbac);
278+
fftwf_destroy_plan(planfxfor1);
279+
fftwf_destroy_plan(planfxbac1);
270280
if(this->gamma_only)
271281
{
272282
fftwf_destroy_plan(planfyr2c);
273283
fftwf_destroy_plan(planfyc2r);
274284
}
275285
else
276286
{
287+
fftwf_destroy_plan(planfxfor2);
288+
fftwf_destroy_plan(planfxbac2);
277289
fftwf_destroy_plan(planfyfor);
278290
fftwf_destroy_plan(planfybac);
279291
}
@@ -311,29 +323,18 @@ void FFT::fftxyfor(std::complex<double>* & in, std::complex<double>* & out)
311323
fftw_execute_dft( this->planyfor, (fftw_complex *)&in[i*bignpy], (fftw_complex *)&out[i*bignpy]);
312324
}
313325

314-
for (int i=0; i<=this->liy;++i)
315-
{
316-
fftw_execute_dft( this->planxfor, (fftw_complex *)&in[i*nplane], (fftw_complex *)&out[i*nplane]);
317-
}
318-
for (int i=this->riy; i<this->ny;++i)
319-
{
320-
fftw_execute_dft( this->planxfor, (fftw_complex *)&in[i*nplane], (fftw_complex *)&out[i*nplane]);
321-
}
326+
327+
fftw_execute_dft( this->planxfor1, (fftw_complex *)in, (fftw_complex *)out);
328+
fftw_execute_dft( this->planxfor2, (fftw_complex *)&in[riy*nplane], (fftw_complex *)&out[riy*nplane]);
322329
return;
323330
}
324331

325332
void FFT::fftxybac(std::complex<double>* & in, std::complex<double>* & out)
326333
{
327334
int bignpy = this->nplane * this-> bigny;
328335
//x-direction
329-
for (int i=0; i<=this->liy;++i)
330-
{
331-
fftw_execute_dft( this->planxbac, (fftw_complex *)&in[i*nplane], (fftw_complex *)&out[i*nplane]);
332-
}
333-
for (int i=this->riy; i<this->ny;++i)
334-
{
335-
fftw_execute_dft( this->planxbac, (fftw_complex *)&in[i*nplane], (fftw_complex *)&out[i*nplane]);
336-
}
336+
fftw_execute_dft( this->planxbac1, (fftw_complex *)in, (fftw_complex *)out);
337+
fftw_execute_dft( this->planxbac2, (fftw_complex *)&in[riy*nplane], (fftw_complex *)&out[riy*nplane]);
337338

338339
////y-direction
339340
for (int i=0; i<this->nx;++i)
@@ -352,30 +353,16 @@ void FFT::fftxyr2c(double* &in, std::complex<double>* & out)
352353
fftw_execute_dft_r2c( this->planyr2c, &in[i*bignpy*2], (fftw_complex*)&out[i*bignpy] );
353354
}
354355

355-
for (int i=0; i<=this->liy;++i)
356-
{
357-
fftw_execute_dft( this->planxfor, (fftw_complex *)&out[i*nplane], (fftw_complex *)&out[i*nplane]);
358-
}
359-
for (int i=this->riy; i<this->ny;++i)
360-
{
361-
fftw_execute_dft( this->planxfor, (fftw_complex *)&out[i*nplane], (fftw_complex *)&out[i*nplane]);
362-
}
356+
fftw_execute_dft( this->planxfor1, (fftw_complex *)out, (fftw_complex *)out);
363357
return;
364358
}
365359

366360

367361
void FFT::fftxyc2r(std::complex<double>* & in, double* & out)
368362
{
369363
int bignpy = this->nplane * this-> bigny;
370-
for (int i=0; i<=this->liy;++i)
371-
{
372-
fftw_execute_dft( this->planxbac, (fftw_complex *)&in[i*nplane], (fftw_complex *)&in[i*nplane]);
373-
}
374-
for (int i=this->riy; i<this->ny;++i)
375-
{
376-
fftw_execute_dft( this->planxbac, (fftw_complex *)&in[i*nplane], (fftw_complex *)&in[i*nplane]);
377-
}
378-
364+
fftw_execute_dft( this->planxbac1, (fftw_complex *)in, (fftw_complex *)in);
365+
379366
for (int i=0; i<this->nx;++i)
380367
{
381368
fftw_execute_dft_c2r( this->planyc2r, (fftw_complex*)&in[i*bignpy], &out[i*bignpy*2] );
@@ -405,29 +392,17 @@ void FFT::fftfxyfor(std::complex<float>* & in, std::complex<float>* & out)
405392
fftwf_execute_dft( this->planfyfor, (fftwf_complex *)&in[i*bignpy], (fftwf_complex *)&out[i*bignpy]);
406393
}
407394

408-
for (int i=0; i<=this->liy;++i)
409-
{
410-
fftwf_execute_dft( this->planfxfor, (fftwf_complex *)&in[i*nplane], (fftwf_complex *)&out[i*nplane]);
411-
}
412-
for (int i=this->riy; i<this->ny;++i)
413-
{
414-
fftwf_execute_dft( this->planfxfor, (fftwf_complex *)&in[i*nplane], (fftwf_complex *)&out[i*nplane]);
415-
}
395+
fftwf_execute_dft( this->planfxfor1, (fftwf_complex *)in, (fftwf_complex *)out);
396+
fftwf_execute_dft( this->planfxfor2, (fftwf_complex *)&in[riy*nplane], (fftwf_complex *)&out[riy*nplane]);
416397
return;
417398
}
418399

419400
void FFT::fftfxybac(std::complex<float>* & in, std::complex<float>* & out)
420401
{
421402
int bignpy = this->nplane * this-> bigny;
422403
//x-direction
423-
for (int i=0; i<=this->liy;++i)
424-
{
425-
fftwf_execute_dft( this->planfxbac, (fftwf_complex *)&in[i*nplane], (fftwf_complex *)&out[i*nplane]);
426-
}
427-
for (int i=this->riy; i<this->ny;++i)
428-
{
429-
fftwf_execute_dft( this->planfxbac, (fftwf_complex *)&in[i*nplane], (fftwf_complex *)&out[i*nplane]);
430-
}
404+
fftwf_execute_dft( this->planfxbac1, (fftwf_complex *)in, (fftwf_complex *)out);
405+
fftwf_execute_dft( this->planfxbac2, (fftwf_complex *)&in[riy*nplane], (fftwf_complex *)&out[riy*nplane]);
431406

432407
////y-direction
433408
for (int i=0; i<this->nx;++i)
@@ -446,29 +421,15 @@ void FFT::fftfxyr2c(float* &in, std::complex<float>* & out)
446421
fftwf_execute_dft_r2c( this->planfyr2c, &in[i*bignpy*2], (fftwf_complex*)&out[i*bignpy] );
447422
}
448423

449-
for (int i=0; i<=this->liy;++i)
450-
{
451-
fftwf_execute_dft( this->planfxfor, (fftwf_complex *)&out[i*nplane], (fftwf_complex *)&out[i*nplane]);
452-
}
453-
for (int i=this->riy; i<this->ny;++i)
454-
{
455-
fftwf_execute_dft( this->planfxfor, (fftwf_complex *)&out[i*nplane], (fftwf_complex *)&out[i*nplane]);
456-
}
424+
fftwf_execute_dft( this->planfxfor1, (fftwf_complex *)out, (fftwf_complex *)out);
457425
return;
458426
}
459427

460428

461429
void FFT::fftfxyc2r(std::complex<float>* & in, float* & out)
462430
{
463431
int bignpy = this->nplane * this-> bigny;
464-
for (int i=0; i<=this->liy;++i)
465-
{
466-
fftwf_execute_dft( this->planfxbac, (fftwf_complex *)&in[i*nplane], (fftwf_complex *)&in[i*nplane]);
467-
}
468-
for (int i=this->riy; i<this->ny;++i)
469-
{
470-
fftwf_execute_dft( this->planfxbac, (fftwf_complex *)&in[i*nplane], (fftwf_complex *)&in[i*nplane]);
471-
}
432+
fftwf_execute_dft( this->planfxbac1, (fftwf_complex *)in, (fftwf_complex *)in);
472433

473434
for (int i=0; i<this->nx;++i)
474435
{

source/module_pw/fft.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,10 @@ class FFT
8787
// fftw_plan plan2bac;
8888
fftw_plan planzfor;
8989
fftw_plan planzbac;
90-
fftw_plan planxfor;
91-
fftw_plan planxbac;
90+
fftw_plan planxfor1;
91+
fftw_plan planxbac1;
92+
fftw_plan planxfor2;
93+
fftw_plan planxbac2;
9294
fftw_plan planyfor;
9395
fftw_plan planybac;
9496
fftw_plan planyr2c;
@@ -97,8 +99,10 @@ class FFT
9799
bool destroypf;
98100
fftwf_plan planfzfor;
99101
fftwf_plan planfzbac;
100-
fftwf_plan planfxfor;
101-
fftwf_plan planfxbac;
102+
fftwf_plan planfxfor1;
103+
fftwf_plan planfxbac1;
104+
fftwf_plan planfxfor2;
105+
fftwf_plan planfxbac2;
102106
fftwf_plan planfyfor;
103107
fftwf_plan planfybac;
104108
fftwf_plan planfyr2c;

source/module_pw/unittest/Makefile

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,17 @@ TESTFILE0 = ${DOUBLEFILE}
4949
#==========================
5050
#No MPI
5151
# HONG = -D__NORMAL
52+
# CPLUSPULS = icpc
53+
54+
#Mix Precision
55+
# HONG = -D__MIX_PRECISION -D__NORMAL
56+
# TESTFILE0 = ${DOUBLEFILE} ${FLOATFILE}
57+
# CPLUSPULS = icpc
5258

5359
#Only MPI
5460
HONG = -D__MPI -D__NORMAL
5561

56-
#Mix Precision
62+
#MPI + Mix Precision
5763
# HONG = -D__MPI -D__MIX_PRECISION -D__NORMAL
5864
# TESTFILE0 = ${DOUBLEFILE} ${FLOATFILE}
5965

@@ -108,7 +114,7 @@ init :
108114
@ if [ ! -d $(BIN_DIR) ]; then mkdir $(BIN_DIR); fi
109115
@ if [ ! -d $(OBJ_DIR)/README ]; then echo "This directory contains all of the .o files" > $(OBJ_DIR)/README; fi
110116

111-
${BIN_DIR}/%.exe: %.cpp
117+
${BIN_DIR}/%.exe: %.cpp ${PW_OBJS}
112118
${CPLUSPULS} ${OPTS} $< test_tool.cpp ${PW_OBJS} ${LIBS} -o $@
113119
${OBJ_DIR}/%.o:%.cpp
114120
${CPLUSPULS} ${OPTS} -c ${HONG} $< -o $@

source/module_pw/unittest/Makefile.gnu

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,17 @@ TESTFILE0 = ${DOUBLEFILE}
4949
#==========================
5050
#No MPI
5151
# HONG = -D__NORMAL
52+
# CPLUSPLUS = g++
53+
54+
#Mix Precision
55+
# HONG = -D__MIX_PRECISION -D__NORMAL
56+
# TESTFILE0 = ${DOUBLEFILE} ${FLOATFILE}
57+
# CPLUSPULS = g++
5258

5359
#Only MPI
5460
HONG = -D__MPI -D__NORMAL
5561

56-
#Mix Precision
62+
#MPI + Mix Precision
5763
# HONG = -D__MPI -D__MIX_PRECISION -D__NORMAL
5864
# TESTFILE0 = ${DOUBLEFILE} ${FLOATFILE}
5965

source/module_pw/unittest/Makefile.intel

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,17 @@ TESTFILE0 = ${DOUBLEFILE}
4949
#==========================
5050
#No MPI
5151
# HONG = -D__NORMAL
52+
# CPLUSPULS = icpc
53+
54+
#Mix Precision
55+
# HONG = -D__MIX_PRECISION -D__NORMAL
56+
# TESTFILE0 = ${DOUBLEFILE} ${FLOATFILE}
57+
# CPLUSPULS = icpc
5258

5359
#Only MPI
5460
HONG = -D__MPI -D__NORMAL
5561

56-
#Mix Precision
62+
#MPI + Mix Precision
5763
# HONG = -D__MPI -D__MIX_PRECISION -D__NORMAL
5864
# TESTFILE0 = ${DOUBLEFILE} ${FLOATFILE}
5965

source/module_pw/unittest/test2.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ int main(int argc,char **argv)
165165
cout<<endl;
166166
}
167167

168-
ModuleBase::timer::finish(GlobalV::ofs_running, true);
168+
if(rank_in_pool==0) ModuleBase::timer::finish(GlobalV::ofs_running, true);
169169

170170
#ifdef __MPI
171171
MPI_Barrier(MPI_COMM_WORLD);

0 commit comments

Comments
 (0)