Skip to content

Commit 968d123

Browse files
authored
Merge pull request #507 from Qianruipku/planewave
fix a bug & add Makefile.gnu
2 parents 828ed73 + e011110 commit 968d123

File tree

8 files changed

+262
-52
lines changed

8 files changed

+262
-52
lines changed

source/module_base/mymath.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,6 @@
55
#include "fftw.h"
66
#elif defined __FFTW3
77
#include "fftw3.h"
8-
#else
9-
#include <fftw3-mpi.h>
10-
//#include "fftw3-mpi_mkl.h"
118
#endif
129
namespace ModuleBase
1310
{

source/module_pw/fft.cpp

Lines changed: 61 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -118,29 +118,33 @@ void FFT :: initplan()
118118
this->plan1bac = fftw_plan_many_dft( 1, &this->nz, this->ns,
119119
(fftw_complex*) c_gspace, &this->nz, 1, this->nz,
120120
(fftw_complex*) c_gspace2, &this->nz, 1, this->nz, FFTW_BACKWARD, FFTW_MEASURE);
121-
122121

123122
//---------------------------------------------------------
124123
// 2 D
125124
//---------------------------------------------------------
126125

127126
int nrank[2] = {this->nx,this->bigny};
128127
int *embed = NULL;
129-
this->plan2for = fftw_plan_many_dft( 2, nrank, this->nplane,
130-
(fftw_complex*) c_rspace, embed, this->nplane, 1,
131-
(fftw_complex*) c_rspace2, embed, this->nplane, 1, FFTW_FORWARD, FFTW_MEASURE);
128+
if(this->gamma_only)
129+
{
130+
this->plan2r2c = fftw_plan_many_dft_r2c( 2, nrank, this->nplane,
131+
r_rspace, embed, this->nplane, 1,
132+
(fftw_complex*) c_rspace, embed, this->nplane, 1, FFTW_MEASURE);
132133

133-
this->plan2bac = fftw_plan_many_dft( 2, nrank, this->nplane,
134-
(fftw_complex*) c_rspace, embed, this->nplane, 1,
135-
(fftw_complex*) c_rspace2, embed, this->nplane, 1, FFTW_BACKWARD, FFTW_MEASURE);
136-
137-
this->plan2r2c = fftw_plan_many_dft_r2c( 2, nrank, this->nplane,
138-
r_rspace, embed, this->nplane, 1,
139-
(fftw_complex*) c_rspace, embed, this->nplane, 1, FFTW_MEASURE);
140-
141-
this->plan2c2r = fftw_plan_many_dft_c2r( 2, nrank, this->nplane,
142-
(fftw_complex*) c_rspace, embed, this->nplane, 1,
143-
r_rspace, embed, this->nplane, 1, FFTW_MEASURE);
134+
this->plan2c2r = fftw_plan_many_dft_c2r( 2, nrank, this->nplane,
135+
(fftw_complex*) c_rspace, embed, this->nplane, 1,
136+
r_rspace, embed, this->nplane, 1, FFTW_MEASURE);
137+
}
138+
else
139+
{
140+
this->plan2for = fftw_plan_many_dft( 2, nrank, this->nplane,
141+
(fftw_complex*) c_rspace, embed, this->nplane, 1,
142+
(fftw_complex*) c_rspace2, embed, this->nplane, 1, FFTW_FORWARD, FFTW_MEASURE);
143+
144+
this->plan2bac = fftw_plan_many_dft( 2, nrank, this->nplane,
145+
(fftw_complex*) c_rspace, embed, this->nplane, 1,
146+
(fftw_complex*) c_rspace2, embed, this->nplane, 1, FFTW_BACKWARD, FFTW_MEASURE);
147+
}
144148
destroyp = false;
145149
}
146150

@@ -169,21 +173,27 @@ void FFT :: initplanf()
169173
//---------------------------------------------------------
170174

171175
int nrank[2] = {this->nx,this->bigny};
172-
this->planf2for = fftwf_plan_many_dft( 2, nrank, this->nplane,
173-
(fftwf_complex*)c_rspace, nrank, this->nplane, 1,
174-
(fftwf_complex*)c_rspace, nrank, this->nplane, 1, FFTW_FORWARD, FFTW_MEASURE);
175-
176-
this->planf2bac = fftwf_plan_many_dft( 2, nrank, this->nplane,
177-
(fftwf_complex*)c_rspace, nrank, this->nplane, 1,
178-
(fftwf_complex*)c_rspace, nrank, this->nplane, 1, FFTW_BACKWARD, FFTW_MEASURE);
179176

180-
this->planf2r2c = fftwf_plan_many_dft_r2c( 2, nrank, this->nplane,
181-
r_rspace, nrank, this->nplane, 1,
182-
(fftwf_complex*)c_rspace, nrank, this->nplane, 1, FFTW_MEASURE);
183-
184-
this->planf2c2r = fftwf_plan_many_dft_c2r( 2, nrank, this->nplane,
185-
(fftwf_complex*)c_rspace, nrank, this->nplane, 1,
186-
r_rspace, nrank, this->nplane, 1, FFTW_MEASURE);
177+
if(this->gamma_only)
178+
{
179+
this->planf2r2c = fftwf_plan_many_dft_r2c( 2, nrank, this->nplane,
180+
r_rspace, nrank, this->nplane, 1,
181+
(fftwf_complex*)c_rspace, nrank, this->nplane, 1, FFTW_MEASURE);
182+
183+
this->planf2c2r = fftwf_plan_many_dft_c2r( 2, nrank, this->nplane,
184+
(fftwf_complex*)c_rspace, nrank, this->nplane, 1,
185+
r_rspace, nrank, this->nplane, 1, FFTW_MEASURE);
186+
}
187+
else
188+
{
189+
this->planf2for = fftwf_plan_many_dft( 2, nrank, this->nplane,
190+
(fftwf_complex*)c_rspace, nrank, this->nplane, 1,
191+
(fftwf_complex*)c_rspace, nrank, this->nplane, 1, FFTW_FORWARD, FFTW_MEASURE);
192+
193+
this->planf2bac = fftwf_plan_many_dft( 2, nrank, this->nplane,
194+
(fftwf_complex*)c_rspace, nrank, this->nplane, 1,
195+
(fftwf_complex*)c_rspace, nrank, this->nplane, 1, FFTW_BACKWARD, FFTW_MEASURE);
196+
}
187197
destroypf = false;
188198
}
189199
#endif
@@ -205,21 +215,33 @@ void FFT:: cleanFFT()
205215
if(destroyp==true) return;
206216
fftw_destroy_plan(plan1for);
207217
fftw_destroy_plan(plan1bac);
208-
fftw_destroy_plan(plan2for);
209-
fftw_destroy_plan(plan2bac);
210-
fftw_destroy_plan(plan2r2c);
211-
fftw_destroy_plan(plan2c2r);
212-
destroyp == true;
218+
if(this->gamma_only)
219+
{
220+
fftw_destroy_plan(plan2r2c);
221+
fftw_destroy_plan(plan2c2r);
222+
}
223+
else
224+
{
225+
fftw_destroy_plan(plan2for);
226+
fftw_destroy_plan(plan2bac);
227+
}
228+
destroyp = true;
213229

214230
#ifdef __MIX_PRECISION
215231
if(destroypf==true) return;
216232
fftw_destroy_plan(planf1for);
217233
fftw_destroy_plan(planf1bac);
218-
fftw_destroy_plan(planf2for);
219-
fftw_destroy_plan(planf2bac);
220-
fftw_destroy_plan(planf2r2c);
221-
fftw_destroy_plan(planf2c2r);
222-
destroypf == true;
234+
if(this->gamma_only)
235+
{
236+
fftw_destroy_plan(planf2r2c);
237+
fftw_destroy_plan(planf2c2r);
238+
}
239+
else
240+
{
241+
fftw_destroy_plan(planf2for);
242+
fftw_destroy_plan(planf2bac);
243+
}
244+
destroypf = true;
223245
#endif
224246

225247
return;

source/module_pw/pw_distributeg_method1.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ void PW_Basis::distribution_method1()
2020
// initial the variables needed by all proc.
2121
int tot_npw = 0; // total number of planewaves.
2222
this->nstot = 0; // total number of sticks.
23-
int st_start = 0; // index of the first stick on current proc.
2423
int *st_bottom2D = NULL; // st_bottom2D[ixy], minimum z of stick on (x, y).
2524
int *st_length2D = NULL; // st_length2D[ixy], number of planewaves in stick on (x, y).
2625

@@ -275,7 +274,6 @@ void PW_Basis::collect_st(
275274
ibox[2] = int(this->nz / 2) + 1; // scan z from -ibox[2] to ibox[2].
276275

277276
ModuleBase::Vector3<double> f;
278-
int ig = 0; // index of planewave.
279277
int is = 0; // index of stick.
280278

281279
int iy_start = -ibox[1]; // determine the scaning area along x-direct, if gamma-only, only positive axis is used.
@@ -335,10 +333,8 @@ void PW_Basis::collect_st(
335333
st_sorted_index[0] = 0;
336334
ModuleBase::heapsort(this->nstot, temp_st_length, st_sorted_index); // sort st_* in the order of length decreasing.
337335

338-
int index = 0; // indexs in the order of length decreasing.
339336
for (int istot = 0; istot < this->nstot; ++istot)
340337
{
341-
index = (this->nstot - 1) - istot;
342338
st_length[istot] = static_cast<int>(temp_st_length[istot]);
343339
st_i[istot] = temp_st_i[st_sorted_index[istot]];
344340
st_j[istot] = temp_st_j[st_sorted_index[istot]];

source/module_pw/pw_distributeg_method2.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ void PW_Basis::distribution_method1()
2121
// initial the variables needed by all proc.
2222
int tot_npw = 0; // total number of planewaves.
2323
this->nstot = 0;
24-
int st_start = 0; // index of the first stick on current proc.
2524
int *st_i = NULL; // x or x + nx (if x < 0) of stick.
2625
int *st_j = NULL; // y or y + ny (if y < 0) of stick.
2726
int *st_bottom = NULL; // minimum z of stick.
@@ -292,7 +291,6 @@ void PW_Basis::collect_st(
292291
ibox[2] = int(this->nz / 2) + 1; // scan z from -ibox[2] to ibox[2].
293292

294293
ModuleBase::Vector3<double> f;
295-
int ig = 0; // index of planewave.
296294
int is = 0; // index of stick.
297295

298296
int ix_start = -ibox[0]; // determine the scaning area along x-direct, if gamma-only, only positive axis is used.
@@ -353,10 +351,9 @@ void PW_Basis::collect_st(
353351
st_sorted_index[0] = 0;
354352
ModuleBase::heapsort(this->nstot, temp_st_length, st_sorted_index); // sort st_* in the order of length increasing.
355353

356-
int index = 0; // indexs in the order of length decreasing.
357354
for (int istot = 0; istot < this->nstot; ++istot)
358355
{
359-
index = (this->nstot - 1) - istot;
356+
int index = (this->nstot - 1) - istot;
360357
st_length[index] = static_cast<int>(temp_st_length[istot]);
361358
st_i[index] = temp_st_i[st_sorted_index[istot]];
362359
st_j[index] = temp_st_j[st_sorted_index[istot]];

source/module_pw/pw_transform.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ void PW_Basis:: recip2real(std::complex<double> * in, std::complex<double> * out
7979

8080
for(int ir = 0 ; ir < this->nrxx ; ++ir)
8181
{
82-
out[ir] = this->ft.c_rspace2[ir] / this->bignxyz;
82+
out[ir] = this->ft.c_rspace2[ir] / double(this->bignxyz);
8383
}
8484
return;
8585
}

source/module_pw/unittest/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ PW_OBJS=$(patsubst %.o, ${OBJ_DIR}/%.o, ${PW_OBJS_0})
5555
#Use fftw package
5656
#FFTW_DIR = /home/qianrui/intelcompile/impi_fftw
5757
#FFTW_LIB_DIR = ${FFTW_DIR}/lib
58-
#FFTW_INCLUDE_DIR = ${FFTW_DIR}/lib
58+
#FFTW_INCLUDE_DIR = ${FFTW_DIR}/include
5959
#FFTW_LIB = -L${FFTW_LIB_DIR} -lfftw3 -Wl,-rpath=${FFTW_LIB_DIR}
6060

6161
#Use mkl_fftw
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
2+
#==========================
3+
# Compiler information
4+
#==========================
5+
CPLUSPLUS = g++
6+
CPLUSPLUS_MPI = mpicxx
7+
CUDA_COMPILE = nvcc
8+
OBJ_DIR = pw_obj
9+
NP = 12
10+
11+
#==========================
12+
# Options
13+
#==========================
14+
#Only MPI
15+
HONG = -D__MPI -D__NORMAL
16+
17+
#Mix Precision
18+
#HONG = -D__MIX_PRECISION -D__NORMAL
19+
20+
#Cuda
21+
#HONG = -D__MPI -D__CUDA -D__NORMAL
22+
23+
#Cuda & Mix Precision
24+
#HONG = -D__MPI -D__CUDA -D__MIX_PRECISION -D__NORMAL
25+
26+
#==========================
27+
# Objects
28+
#==========================
29+
VPATH=../../src_parallel\
30+
:../../module_base\
31+
:../
32+
33+
PW_OBJS_0=intarray.o\
34+
matrix.o\
35+
matrix3.o\
36+
tool_quit.o\
37+
mymath3.o\
38+
timer.o\
39+
global_variable.o\
40+
parallel_global.o\
41+
pw_basis.o\
42+
pw_distributer.o\
43+
pw_gatherscatter.o\
44+
pw_init.o\
45+
pw_transform.o\
46+
pw_distributeg.o\
47+
pw_distributeg_method1.o\
48+
fft.o
49+
50+
PW_OBJS=$(patsubst %.o, ${OBJ_DIR}/%.o, ${PW_OBJS_0})
51+
52+
##==========================
53+
## FFTW package needed
54+
##==========================
55+
#Use fftw package
56+
FFTW_DIR = /home/qianrui/gnucompile/g_fftw-3.3.8
57+
FFTW_LIB_DIR = ${FFTW_DIR}/lib
58+
FFTW_INCLUDE_DIR = ${FFTW_DIR}/include
59+
FFTW_LIB = -L${FFTW_LIB_DIR} -lfftw3 -Wl,-rpath=${FFTW_LIB_DIR}
60+
61+
62+
63+
##==========================
64+
## CUDA needed
65+
##==========================
66+
# CUDA_DIR = /usr/local/cuda-11.0
67+
# CUDA_INCLUDE_DIR = ${CUDA_DIR}/include
68+
# CUDA_LIB_DIR = ${CUDA_DIR}/lib64
69+
# CUDA_LIB = -L${CUDA_LIB_DIR} -lcufft -lcublas -lcudart
70+
71+
LIBS = ${FFTW_LIB} ${CUDA_LIB}
72+
OPTS = -I${FFTW_INCLUDE_DIR} ${HONG} -Ofast -march=native -std=c++11 -Wall -g
73+
#==========================
74+
# MAKING OPTIONS
75+
#==========================
76+
pw :
77+
@ make init
78+
@ make -j $(NP) parallel
79+
80+
init :
81+
@ if [ ! -d $(OBJ_DIR) ]; then mkdir $(OBJ_DIR); fi
82+
@ if [ ! -d $(OBJ_DIR)/README ]; then echo "This directory contains all of the .o files" > $(OBJ_DIR)/README; fi
83+
84+
parallel : ${PW_OBJS}
85+
${CPLUSPLUS_MPI} ${OPTS} test1.cpp test_tool.cpp ${PW_OBJS} ${LIBS} -o test1.exe
86+
${CPLUSPLUS_MPI} ${OPTS} test2.cpp test_tool.cpp ${PW_OBJS} ${LIBS} -o test2.exe
87+
${CPLUSPLUS_MPI} ${OPTS} test3.cpp test_tool.cpp ${PW_OBJS} ${LIBS} -o test3.exe
88+
89+
${OBJ_DIR}/%.o:%.cpp
90+
${CPLUSPLUS_MPI} ${OPTS} -c ${HONG} $< -o $@
91+
92+
.PHONY:clean
93+
clean:
94+
@ if [ -d $(OBJ_DIR) ]; then rm -rf $(OBJ_DIR); fi
95+
@ if [ -e test1.exe ]; then rm -f test1.exe; fi
96+
@ if [ -e test2.exe ]; then rm -f test2.exe; fi
97+
@ if [ -e test3.exe ]; then rm -f test3.exe; fi

0 commit comments

Comments
 (0)