Skip to content

Commit 08b757b

Browse files
committed
< profile >
use more integrated many_fft to do fftxfor/fftxbac
1 parent 33f0e95 commit 08b757b

File tree

4 files changed

+26
-43
lines changed

4 files changed

+26
-43
lines changed

source/module_pw/fft.cpp

Lines changed: 20 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,9 @@ void FFT :: initplan()
133133

134134
// int npy = this->nplane * this->ny;
135135
int bignpy = this->nplane * this->bigny;
136-
this->planxfor = fftw_plan_many_dft( 1, &this->nx, this->nplane, (fftw_complex *)aux2, embed, bignpy, 1,
136+
this->planxfor1 = fftw_plan_many_dft( 1, &this->nx, this->nplane * (liy + 1), (fftw_complex *)aux2, embed, bignpy, 1,
137137
(fftw_complex *)aux2, embed, bignpy, 1, FFTW_FORWARD, FFTW_MEASURE );
138-
this->planxbac = fftw_plan_many_dft( 1, &this->nx, this->nplane, (fftw_complex *)aux2, embed, bignpy, 1,
138+
this->planxbac1 = fftw_plan_many_dft( 1, &this->nx, this->nplane * (liy + 1), (fftw_complex *)aux2, embed, bignpy, 1,
139139
(fftw_complex *)aux2, embed, bignpy, 1, FFTW_BACKWARD, FFTW_MEASURE );
140140
this->planyr2c = fftw_plan_many_dft_r2c( 1, &this->bigny, this->nplane, r_rspace , embed, this->nplane, 1,
141141
(fftw_complex*)aux1, embed, this->nplane, 1, FFTW_MEASURE );
@@ -165,9 +165,13 @@ void FFT :: initplan()
165165

166166
//It is better to use in-place for stride > 1
167167
int bignpy = this->nplane * this->bigny;
168-
this->planxfor = fftw_plan_many_dft( 1, &this->nx, this->nplane, (fftw_complex *)aux2, embed, bignpy, 1,
168+
this->planxfor1 = fftw_plan_many_dft( 1, &this->nx, this->nplane * (liy + 1), (fftw_complex *)aux2, embed, bignpy, 1,
169169
(fftw_complex *)aux2, embed, bignpy, 1, FFTW_FORWARD, FFTW_MEASURE );
170-
this->planxbac = fftw_plan_many_dft( 1, &this->nx, this->nplane, (fftw_complex *)aux2, embed, bignpy, 1,
170+
this->planxbac1 = fftw_plan_many_dft( 1, &this->nx, this->nplane * (liy + 1), (fftw_complex *)aux2, embed, bignpy, 1,
171+
(fftw_complex *)aux2, embed, bignpy, 1, FFTW_BACKWARD, FFTW_MEASURE );
172+
this->planxfor2 = fftw_plan_many_dft( 1, &this->nx, this->nplane * (bigny - riy), (fftw_complex *)aux2, embed, bignpy, 1,
173+
(fftw_complex *)aux2, embed, bignpy, 1, FFTW_FORWARD, FFTW_MEASURE );
174+
this->planxbac2 = fftw_plan_many_dft( 1, &this->nx, this->nplane * (bigny - riy), (fftw_complex *)aux2, embed, bignpy, 1,
171175
(fftw_complex *)aux2, embed, bignpy, 1, FFTW_BACKWARD, FFTW_MEASURE );
172176
this->planyfor = fftw_plan_many_dft( 1, &this->bigny, this->nplane, (fftw_complex*)aux2 , embed, this->nplane, 1,
173177
(fftw_complex*)aux2, embed, this->nplane, 1, FFTW_FORWARD, FFTW_MEASURE );
@@ -244,15 +248,17 @@ void FFT:: cleanFFT()
244248
if(destroyp==true) return;
245249
fftw_destroy_plan(planzfor);
246250
fftw_destroy_plan(planzbac);
247-
fftw_destroy_plan(planxfor);
248-
fftw_destroy_plan(planxbac);
251+
fftw_destroy_plan(planxfor1);
252+
fftw_destroy_plan(planxbac1);
249253
if(this->gamma_only)
250254
{
251255
fftw_destroy_plan(planyr2c);
252256
fftw_destroy_plan(planyc2r);
253257
}
254258
else
255259
{
260+
fftw_destroy_plan(planxfor2);
261+
fftw_destroy_plan(planxbac2);
256262
fftw_destroy_plan(planyfor);
257263
fftw_destroy_plan(planybac);
258264
}
@@ -311,29 +317,18 @@ void FFT::fftxyfor(std::complex<double>* & in, std::complex<double>* & out)
311317
fftw_execute_dft( this->planyfor, (fftw_complex *)&in[i*bignpy], (fftw_complex *)&out[i*bignpy]);
312318
}
313319

314-
for (int i=0; i<=this->liy;++i)
315-
{
316-
fftw_execute_dft( this->planxfor, (fftw_complex *)&in[i*nplane], (fftw_complex *)&out[i*nplane]);
317-
}
318-
for (int i=this->riy; i<this->ny;++i)
319-
{
320-
fftw_execute_dft( this->planxfor, (fftw_complex *)&in[i*nplane], (fftw_complex *)&out[i*nplane]);
321-
}
320+
321+
fftw_execute_dft( this->planxfor1, (fftw_complex *)in, (fftw_complex *)out);
322+
fftw_execute_dft( this->planxfor2, (fftw_complex *)&in[riy*nplane], (fftw_complex *)&out[riy*nplane]);
322323
return;
323324
}
324325

325326
void FFT::fftxybac(std::complex<double>* & in, std::complex<double>* & out)
326327
{
327328
int bignpy = this->nplane * this-> bigny;
328329
//x-direction
329-
for (int i=0; i<=this->liy;++i)
330-
{
331-
fftw_execute_dft( this->planxbac, (fftw_complex *)&in[i*nplane], (fftw_complex *)&out[i*nplane]);
332-
}
333-
for (int i=this->riy; i<this->ny;++i)
334-
{
335-
fftw_execute_dft( this->planxbac, (fftw_complex *)&in[i*nplane], (fftw_complex *)&out[i*nplane]);
336-
}
330+
fftw_execute_dft( this->planxbac1, (fftw_complex *)in, (fftw_complex *)out);
331+
fftw_execute_dft( this->planxbac2, (fftw_complex *)&in[riy*nplane], (fftw_complex *)&out[riy*nplane]);
337332

338333
////y-direction
339334
for (int i=0; i<this->nx;++i)
@@ -352,30 +347,16 @@ void FFT::fftxyr2c(double* &in, std::complex<double>* & out)
352347
fftw_execute_dft_r2c( this->planyr2c, &in[i*bignpy*2], (fftw_complex*)&out[i*bignpy] );
353348
}
354349

355-
for (int i=0; i<=this->liy;++i)
356-
{
357-
fftw_execute_dft( this->planxfor, (fftw_complex *)&out[i*nplane], (fftw_complex *)&out[i*nplane]);
358-
}
359-
for (int i=this->riy; i<this->ny;++i)
360-
{
361-
fftw_execute_dft( this->planxfor, (fftw_complex *)&out[i*nplane], (fftw_complex *)&out[i*nplane]);
362-
}
350+
fftw_execute_dft( this->planxfor1, (fftw_complex *)out, (fftw_complex *)out);
363351
return;
364352
}
365353

366354

367355
void FFT::fftxyc2r(std::complex<double>* & in, double* & out)
368356
{
369357
int bignpy = this->nplane * this-> bigny;
370-
for (int i=0; i<=this->liy;++i)
371-
{
372-
fftw_execute_dft( this->planxbac, (fftw_complex *)&in[i*nplane], (fftw_complex *)&in[i*nplane]);
373-
}
374-
for (int i=this->riy; i<this->ny;++i)
375-
{
376-
fftw_execute_dft( this->planxbac, (fftw_complex *)&in[i*nplane], (fftw_complex *)&in[i*nplane]);
377-
}
378-
358+
fftw_execute_dft( this->planxbac1, (fftw_complex *)in, (fftw_complex *)in);
359+
379360
for (int i=0; i<this->nx;++i)
380361
{
381362
fftw_execute_dft_c2r( this->planyc2r, (fftw_complex*)&in[i*bignpy], &out[i*bignpy*2] );

source/module_pw/fft.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,10 @@ class FFT
8787
// fftw_plan plan2bac;
8888
fftw_plan planzfor;
8989
fftw_plan planzbac;
90-
fftw_plan planxfor;
91-
fftw_plan planxbac;
90+
fftw_plan planxfor1;
91+
fftw_plan planxbac1;
92+
fftw_plan planxfor2;
93+
fftw_plan planxbac2;
9294
fftw_plan planyfor;
9395
fftw_plan planybac;
9496
fftw_plan planyr2c;

source/module_pw/unittest/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ init :
114114
@ if [ ! -d $(BIN_DIR) ]; then mkdir $(BIN_DIR); fi
115115
@ if [ ! -d $(OBJ_DIR)/README ]; then echo "This directory contains all of the .o files" > $(OBJ_DIR)/README; fi
116116

117-
${BIN_DIR}/%.exe: %.cpp
117+
${BIN_DIR}/%.exe: %.cpp ${PW_OBJS}
118118
${CPLUSPULS} ${OPTS} $< test_tool.cpp ${PW_OBJS} ${LIBS} -o $@
119119
${OBJ_DIR}/%.o:%.cpp
120120
${CPLUSPULS} ${OPTS} -c ${HONG} $< -o $@

source/module_pw/unittest/test2.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ int main(int argc,char **argv)
165165
cout<<endl;
166166
}
167167

168-
ModuleBase::timer::finish(GlobalV::ofs_running, true);
168+
if(rank_in_pool==0) ModuleBase::timer::finish(GlobalV::ofs_running, true);
169169

170170
#ifdef __MPI
171171
MPI_Barrier(MPI_COMM_WORLD);

0 commit comments

Comments
 (0)