Skip to content

Commit af1b58a

Browse files
committed
merge from deepmodeling
2 parents 7188a3a + 41d38a0 commit af1b58a

File tree

10 files changed

+280
-60
lines changed

10 files changed

+280
-60
lines changed

source/module_base/mymath.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,6 @@
55
#include "fftw.h"
66
#elif defined __FFTW3
77
#include "fftw3.h"
8-
#else
9-
#include <fftw3-mpi.h>
10-
//#include "fftw3-mpi_mkl.h"
118
#endif
129
namespace ModuleBase
1310
{

source/module_pw/fft.cpp

Lines changed: 61 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -118,29 +118,33 @@ void FFT :: initplan()
118118
this->plan1bac = fftw_plan_many_dft( 1, &this->nz, this->ns,
119119
(fftw_complex*) c_gspace, &this->nz, 1, this->nz,
120120
(fftw_complex*) c_gspace2, &this->nz, 1, this->nz, FFTW_BACKWARD, FFTW_MEASURE);
121-
122121

123122
//---------------------------------------------------------
124123
// 2 D
125124
//---------------------------------------------------------
126125

127126
int nrank[2] = {this->nx,this->bigny};
128127
int *embed = NULL;
129-
this->plan2for = fftw_plan_many_dft( 2, nrank, this->nplane,
130-
(fftw_complex*) c_rspace, embed, this->nplane, 1,
131-
(fftw_complex*) c_rspace2, embed, this->nplane, 1, FFTW_FORWARD, FFTW_MEASURE);
128+
if(this->gamma_only)
129+
{
130+
this->plan2r2c = fftw_plan_many_dft_r2c( 2, nrank, this->nplane,
131+
r_rspace, embed, this->nplane, 1,
132+
(fftw_complex*) c_rspace, embed, this->nplane, 1, FFTW_MEASURE);
132133

133-
this->plan2bac = fftw_plan_many_dft( 2, nrank, this->nplane,
134-
(fftw_complex*) c_rspace, embed, this->nplane, 1,
135-
(fftw_complex*) c_rspace2, embed, this->nplane, 1, FFTW_BACKWARD, FFTW_MEASURE);
136-
137-
this->plan2r2c = fftw_plan_many_dft_r2c( 2, nrank, this->nplane,
138-
r_rspace, embed, this->nplane, 1,
139-
(fftw_complex*) c_rspace, embed, this->nplane, 1, FFTW_MEASURE);
140-
141-
this->plan2c2r = fftw_plan_many_dft_c2r( 2, nrank, this->nplane,
142-
(fftw_complex*) c_rspace, embed, this->nplane, 1,
143-
r_rspace, embed, this->nplane, 1, FFTW_MEASURE);
134+
this->plan2c2r = fftw_plan_many_dft_c2r( 2, nrank, this->nplane,
135+
(fftw_complex*) c_rspace, embed, this->nplane, 1,
136+
r_rspace, embed, this->nplane, 1, FFTW_MEASURE);
137+
}
138+
else
139+
{
140+
this->plan2for = fftw_plan_many_dft( 2, nrank, this->nplane,
141+
(fftw_complex*) c_rspace, embed, this->nplane, 1,
142+
(fftw_complex*) c_rspace2, embed, this->nplane, 1, FFTW_FORWARD, FFTW_MEASURE);
143+
144+
this->plan2bac = fftw_plan_many_dft( 2, nrank, this->nplane,
145+
(fftw_complex*) c_rspace, embed, this->nplane, 1,
146+
(fftw_complex*) c_rspace2, embed, this->nplane, 1, FFTW_BACKWARD, FFTW_MEASURE);
147+
}
144148
destroyp = false;
145149
}
146150

@@ -169,21 +173,27 @@ void FFT :: initplanf()
169173
//---------------------------------------------------------
170174

171175
int nrank[2] = {this->nx,this->bigny};
172-
this->planf2for = fftwf_plan_many_dft( 2, nrank, this->nplane,
173-
(fftwf_complex*)c_rspace, nrank, this->nplane, 1,
174-
(fftwf_complex*)c_rspace, nrank, this->nplane, 1, FFTW_FORWARD, FFTW_MEASURE);
175-
176-
this->planf2bac = fftwf_plan_many_dft( 2, nrank, this->nplane,
177-
(fftwf_complex*)c_rspace, nrank, this->nplane, 1,
178-
(fftwf_complex*)c_rspace, nrank, this->nplane, 1, FFTW_BACKWARD, FFTW_MEASURE);
179176

180-
this->planf2r2c = fftwf_plan_many_dft_r2c( 2, nrank, this->nplane,
181-
r_rspace, nrank, this->nplane, 1,
182-
(fftwf_complex*)c_rspace, nrank, this->nplane, 1, FFTW_MEASURE);
183-
184-
this->planf2c2r = fftwf_plan_many_dft_c2r( 2, nrank, this->nplane,
185-
(fftwf_complex*)c_rspace, nrank, this->nplane, 1,
186-
r_rspace, nrank, this->nplane, 1, FFTW_MEASURE);
177+
if(this->gamma_only)
178+
{
179+
this->planf2r2c = fftwf_plan_many_dft_r2c( 2, nrank, this->nplane,
180+
r_rspace, nrank, this->nplane, 1,
181+
(fftwf_complex*)c_rspace, nrank, this->nplane, 1, FFTW_MEASURE);
182+
183+
this->planf2c2r = fftwf_plan_many_dft_c2r( 2, nrank, this->nplane,
184+
(fftwf_complex*)c_rspace, nrank, this->nplane, 1,
185+
r_rspace, nrank, this->nplane, 1, FFTW_MEASURE);
186+
}
187+
else
188+
{
189+
this->planf2for = fftwf_plan_many_dft( 2, nrank, this->nplane,
190+
(fftwf_complex*)c_rspace, nrank, this->nplane, 1,
191+
(fftwf_complex*)c_rspace, nrank, this->nplane, 1, FFTW_FORWARD, FFTW_MEASURE);
192+
193+
this->planf2bac = fftwf_plan_many_dft( 2, nrank, this->nplane,
194+
(fftwf_complex*)c_rspace, nrank, this->nplane, 1,
195+
(fftwf_complex*)c_rspace, nrank, this->nplane, 1, FFTW_BACKWARD, FFTW_MEASURE);
196+
}
187197
destroypf = false;
188198
}
189199
#endif
@@ -205,21 +215,33 @@ void FFT:: cleanFFT()
205215
if(destroyp==true) return;
206216
fftw_destroy_plan(plan1for);
207217
fftw_destroy_plan(plan1bac);
208-
fftw_destroy_plan(plan2for);
209-
fftw_destroy_plan(plan2bac);
210-
fftw_destroy_plan(plan2r2c);
211-
fftw_destroy_plan(plan2c2r);
212-
destroyp == true;
218+
if(this->gamma_only)
219+
{
220+
fftw_destroy_plan(plan2r2c);
221+
fftw_destroy_plan(plan2c2r);
222+
}
223+
else
224+
{
225+
fftw_destroy_plan(plan2for);
226+
fftw_destroy_plan(plan2bac);
227+
}
228+
destroyp = true;
213229

214230
#ifdef __MIX_PRECISION
215231
if(destroypf==true) return;
216232
fftw_destroy_plan(planf1for);
217233
fftw_destroy_plan(planf1bac);
218-
fftw_destroy_plan(planf2for);
219-
fftw_destroy_plan(planf2bac);
220-
fftw_destroy_plan(planf2r2c);
221-
fftw_destroy_plan(planf2c2r);
222-
destroypf == true;
234+
if(this->gamma_only)
235+
{
236+
fftw_destroy_plan(planf2r2c);
237+
fftw_destroy_plan(planf2c2r);
238+
}
239+
else
240+
{
241+
fftw_destroy_plan(planf2for);
242+
fftw_destroy_plan(planf2bac);
243+
}
244+
destroypf = true;
223245
#endif
224246

225247
return;

source/module_pw/pw_distributeg_method1.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ void PW_Basis::distribution_method1()
2828
// initial the variables needed by all proc.
2929
int tot_npw = 0; // total number of planewaves.
3030
this->nstot = 0; // total number of sticks.
31-
int st_start = 0; // index of the first stick on current proc.
3231
int *st_bottom2D = NULL; // st_bottom2D[ixy], minimum z of stick on (x, y).
3332
int *st_length2D = NULL; // st_length2D[ixy], number of planewaves in stick on (x, y).
3433

@@ -217,7 +216,6 @@ void PW_Basis::collect_st(
217216
ibox[2] = int(this->nz / 2) + 1; // scan z from -ibox[2] to ibox[2].
218217

219218
ModuleBase::Vector3<double> f;
220-
int ig = 0; // index of planewave.
221219
int is = 0; // index of stick.
222220

223221
int iy_start = -ibox[1]; // determine the scaning area along x-direct, if gamma-only, only positive axis is used.
@@ -277,10 +275,8 @@ void PW_Basis::collect_st(
277275
st_sorted_index[0] = 0;
278276
ModuleBase::heapsort(this->nstot, temp_st_length, st_sorted_index); // sort st_* in the order of length decreasing.
279277

280-
int index = 0; // indexs in the order of length decreasing.
281278
for (int istot = 0; istot < this->nstot; ++istot)
282279
{
283-
index = (this->nstot - 1) - istot;
284280
st_length[istot] = static_cast<int>(temp_st_length[istot]);
285281
st_i[istot] = temp_st_i[st_sorted_index[istot]];
286282
st_j[istot] = temp_st_j[st_sorted_index[istot]];

source/module_pw/pw_gatherscatter.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ void PW_Basis:: gatherp_scatters(complex<double> *in, complex<double> *out)
2323
return;
2424
}
2525
#ifdef __MPI
26-
std::complex<double> * tmp;
26+
std::complex<double> * tmp = NULL;
2727
if(this->poolrank == 0) tmp = new std::complex<double> [this->nz * this->nstot];
2828

2929
//gather planes of different processors
@@ -40,7 +40,7 @@ void PW_Basis:: gatherp_scatters(complex<double> *in, complex<double> *out)
4040
MPI_Scatterv(tmp, this->nstnz_per, this->startnsz_per,mpicomplex,out,
4141
this->nstnz,mpicomplex,0, POOL_WORLD);
4242

43-
if(this->poolrank == 0) delete[] tmp;
43+
if(tmp!=NULL) delete[] tmp;
4444
#endif
4545
return;
4646
}
@@ -107,7 +107,7 @@ void PW_Basis:: gathers_scatterp(complex<double> *in, complex<double> *out)
107107
}
108108
#ifdef __MPI
109109
if(this->poolnproc == 1) return;
110-
std::complex<double> * tmp;
110+
std::complex<double> * tmp = NULL;
111111
if(this->poolrank == 0) tmp = new std::complex<double> [this->nz * this->nstot];
112112

113113
//scatter sticks to different processors
@@ -121,7 +121,7 @@ void PW_Basis:: gathers_scatterp(complex<double> *in, complex<double> *out)
121121
MPI_Scatterv(&tmp[istot*this->nz], this->numz,this->startz, mpicomplex, &out[ixy*this->nplane],
122122
this->nplane,mpicomplex,0,POOL_WORLD);
123123
}
124-
if(this->poolrank == 0) delete[] tmp;
124+
if(tmp!=NULL) delete[] tmp;
125125
#endif
126126
return;
127127
}

source/module_pw/pw_transform.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ void PW_Basis:: recip2real(std::complex<double> * in, std::complex<double> * out
7979

8080
for(int ir = 0 ; ir < this->nrxx ; ++ir)
8181
{
82-
out[ir] = this->ft.c_rspace2[ir] / this->bignxyz;
82+
out[ir] = this->ft.c_rspace2[ir] / double(this->bignxyz);
8383
}
8484
return;
8585
}

source/module_pw/unittest/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ PW_OBJS=$(patsubst %.o, ${OBJ_DIR}/%.o, ${PW_OBJS_0})
5656
#Use fftw package
5757
#FFTW_DIR = /home/qianrui/intelcompile/impi_fftw
5858
#FFTW_LIB_DIR = ${FFTW_DIR}/lib
59-
#FFTW_INCLUDE_DIR = ${FFTW_DIR}/lib
59+
#FFTW_INCLUDE_DIR = ${FFTW_DIR}/include
6060
#FFTW_LIB = -L${FFTW_LIB_DIR} -lfftw3 -Wl,-rpath=${FFTW_LIB_DIR}
6161

6262
#Use mkl_fftw
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
2+
#==========================
3+
# Compiler information
4+
#==========================
5+
CPLUSPLUS = g++
6+
CPLUSPLUS_MPI = mpicxx
7+
CUDA_COMPILE = nvcc
8+
OBJ_DIR = pw_obj
9+
NP = 12
10+
11+
#==========================
12+
# Options
13+
#==========================
14+
#Only MPI
15+
HONG = -D__MPI -D__NORMAL
16+
17+
#Mix Precision
18+
#HONG = -D__MIX_PRECISION -D__NORMAL
19+
20+
#Cuda
21+
#HONG = -D__MPI -D__CUDA -D__NORMAL
22+
23+
#Cuda & Mix Precision
24+
#HONG = -D__MPI -D__CUDA -D__MIX_PRECISION -D__NORMAL
25+
26+
#==========================
27+
# Objects
28+
#==========================
29+
VPATH=../../src_parallel\
30+
:../../module_base\
31+
:../
32+
33+
PW_OBJS_0=intarray.o\
34+
matrix.o\
35+
matrix3.o\
36+
tool_quit.o\
37+
mymath3.o\
38+
timer.o\
39+
global_variable.o\
40+
parallel_global.o\
41+
pw_basis.o\
42+
pw_distributer.o\
43+
pw_gatherscatter.o\
44+
pw_init.o\
45+
pw_transform.o\
46+
pw_distributeg.o\
47+
pw_distributeg_method1.o\
48+
fft.o
49+
50+
PW_OBJS=$(patsubst %.o, ${OBJ_DIR}/%.o, ${PW_OBJS_0})
51+
52+
##==========================
53+
## FFTW package needed
54+
##==========================
55+
#Use fftw package
56+
FFTW_DIR = /home/qianrui/gnucompile/g_fftw-3.3.8
57+
FFTW_LIB_DIR = ${FFTW_DIR}/lib
58+
FFTW_INCLUDE_DIR = ${FFTW_DIR}/include
59+
FFTW_LIB = -L${FFTW_LIB_DIR} -lfftw3 -Wl,-rpath=${FFTW_LIB_DIR}
60+
61+
62+
63+
##==========================
64+
## CUDA needed
65+
##==========================
66+
# CUDA_DIR = /usr/local/cuda-11.0
67+
# CUDA_INCLUDE_DIR = ${CUDA_DIR}/include
68+
# CUDA_LIB_DIR = ${CUDA_DIR}/lib64
69+
# CUDA_LIB = -L${CUDA_LIB_DIR} -lcufft -lcublas -lcudart
70+
71+
LIBS = ${FFTW_LIB} ${CUDA_LIB}
72+
OPTS = -I${FFTW_INCLUDE_DIR} ${HONG} -Ofast -std=c++11 -Wall -g
73+
#==========================
74+
# MAKING OPTIONS
75+
#==========================
76+
pw :
77+
@ make init
78+
@ make -j $(NP) parallel
79+
80+
init :
81+
@ if [ ! -d $(OBJ_DIR) ]; then mkdir $(OBJ_DIR); fi
82+
@ if [ ! -d $(OBJ_DIR)/README ]; then echo "This directory contains all of the .o files" > $(OBJ_DIR)/README; fi
83+
84+
parallel : ${PW_OBJS}
85+
${CPLUSPLUS_MPI} ${OPTS} test1.cpp test_tool.cpp ${PW_OBJS} ${LIBS} -o test1.exe
86+
${CPLUSPLUS_MPI} ${OPTS} test2.cpp test_tool.cpp ${PW_OBJS} ${LIBS} -o test2.exe
87+
${CPLUSPLUS_MPI} ${OPTS} test3.cpp test_tool.cpp ${PW_OBJS} ${LIBS} -o test3.exe
88+
89+
${OBJ_DIR}/%.o:%.cpp
90+
${CPLUSPLUS_MPI} ${OPTS} -c ${HONG} $< -o $@
91+
92+
.PHONY:clean
93+
clean:
94+
@ if [ -d $(OBJ_DIR) ]; then rm -rf $(OBJ_DIR); fi
95+
@ if [ -e test1.exe ]; then rm -f test1.exe; fi
96+
@ if [ -e test2.exe ]; then rm -f test2.exe; fi
97+
@ if [ -e test3.exe ]; then rm -f test3.exe; fi

0 commit comments

Comments
 (0)