Skip to content

Commit e5f5a42

Browse files
committed
reset the for
1 parent af0b97f commit e5f5a42

File tree

4 files changed

+91
-60
lines changed

4 files changed

+91
-60
lines changed

source/module_basis/module_pw/test_gpu/pw_basis_C2C.cpp

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -49,24 +49,28 @@ TEST_F(PWTEST, recip_to_real_C2C_double)
4949
{
5050
for (int ix = 0; ix < nx; ++ix)
5151
{
52+
const double vx = ix - int(nx / 2);
5253
for (int iy = 0; iy < ny; ++iy)
5354
{
5455
const int offset = (ix * ny + iy) * nz;
56+
const double vy = iy - int(ny / 2);
5557
for (int iz = 0; iz < nz; ++iz)
5658
{
5759
tmp[offset + iz] = 0.0;
58-
double vx = ix - int(nx / 2);
59-
double vy = iy - int(ny / 2);
60-
double vz = iz - int(nz / 2);
60+
const double vz = iz - int(nz / 2);
6161
ModuleBase::Vector3<double> v(vx, vy, vz);
6262
double modulus = v * (GGT * v);
6363
if (modulus <= ggecut)
6464
{
6565
tmp[offset + iz] = 1.0 / (modulus + 1);
6666
if (vy > 0)
67+
{
6768
tmp[offset + iz] += ModuleBase::IMAG_UNIT / (std::abs(v.x + 1) + 1);
69+
}
6870
else if (vy < 0)
71+
{
6972
tmp[offset + iz] -= ModuleBase::IMAG_UNIT / (std::abs(-v.x + 1) + 1);
73+
}
7074
}
7175
}
7276
}
@@ -81,10 +85,10 @@ TEST_F(PWTEST, recip_to_real_C2C_double)
8185
double(int(nz / 2)) / nz);
8286
for (int ixy = 0; ixy < nx * ny; ++ixy)
8387
{
88+
const int ix = ixy / ny;
89+
const int iy = ixy % ny;
8490
for (int iz = 0; iz < nz; ++iz)
8591
{
86-
int ix = ixy / ny;
87-
int iy = ixy % ny;
8892
ModuleBase::Vector3<double> real_r(ix, iy, iz);
8993
double phase_im = -delta_g * real_r;
9094
complex<double> phase(0, ModuleBase::TWO_PI * phase_im);
@@ -95,9 +99,9 @@ TEST_F(PWTEST, recip_to_real_C2C_double)
9599
// const int size = nx * ny * nz;
96100
complex<double>* h_rhog = new complex<double>[npw];
97101
complex<double>* h_rhogout = new complex<double>[npw];
98-
complex<double>* d_rhog;
99-
complex<double>* d_rhogr;
100-
complex<double>* d_rhogout;
102+
complex<double>* d_rhog = nullptr;
103+
complex<double>* d_rhogr = nullptr;
104+
complex<double>* d_rhogout = nullptr;
101105
cudaMalloc((void**)&d_rhog, npw * sizeof(complex<double>));
102106
cudaMalloc((void**)&d_rhogr, npw * sizeof(complex<double>));
103107
cudaMalloc((void**)&d_rhogout, npw * sizeof(complex<double>));
@@ -117,18 +121,20 @@ TEST_F(PWTEST, recip_to_real_C2C_double)
117121
cudaMemcpy(d_rhog, h_rhog, npw * sizeof(complex<double>), cudaMemcpyHostToDevice);
118122

119123
std::complex<double>* h_rhor = new std::complex<double>[nrxx];
120-
std::complex<double>* d_rhor;
124+
std::complex<double>* d_rhor = nullptr;
121125
cudaMalloc((void**)&d_rhor, nrxx * sizeof(std::complex<double>));
122126
pwtest.recip_to_real<std::complex<double>, std::complex<double>, base_device::DEVICE_GPU>(d_rhog, d_rhor);
123127
cudaMemcpy(h_rhor, d_rhor, nrxx * sizeof(std::complex<double>), cudaMemcpyDeviceToHost);
124128

125129
int startiz = pwtest.startz_current;
126130
for (int ixy = 0; ixy < nx * ny; ++ixy)
127131
{
132+
const int offset = ixy * nz + startiz;
133+
const int startz = ixy * nplane ;
128134
for (int iz = 0; iz < nplane; ++iz)
129135
{
130-
EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(), h_rhor[ixy * nplane + iz].real(), 1e-6);
131-
EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(), h_rhor[ixy * nplane + iz].imag(), 1e-6);
136+
EXPECT_NEAR(tmp[offset + iz].real(), h_rhor[startz + iz].real(), 1e-6);
137+
EXPECT_NEAR(tmp[offset + iz].imag(), h_rhor[startz + iz].imag(), 1e-6);
132138
}
133139
}
134140

@@ -185,14 +191,14 @@ TEST_F(PWTEST, recip_to_real_C2C_float)
185191
{
186192
for (int ix = 0; ix < nx; ++ix)
187193
{
194+
const float vx = ix - int(nx / 2);
188195
for (int iy = 0; iy < ny; ++iy)
189196
{
197+
const float vy = iy - int(ny / 2);
190198
const int offset = (ix * ny + iy) * nz;
191199
for (int iz = 0; iz < nz; ++iz)
192200
{
193201
tmp[offset+ iz] = 0.0;
194-
float vx = ix - int(nx / 2);
195-
float vy = iy - int(ny / 2);
196202
float vz = iz - int(nz / 2);
197203
ModuleBase::Vector3<double> v(vx, vy, vz);
198204
float modulus = v * (GGT * v);
@@ -215,10 +221,10 @@ TEST_F(PWTEST, recip_to_real_C2C_float)
215221
ModuleBase::Vector3<float> delta_g(float(int(nx / 2)) / nx, float(int(ny / 2)) / ny, float(int(nz / 2)) / nz);
216222
for (int ixy = 0; ixy < nx * ny; ++ixy)
217223
{
224+
const int ix = ixy / ny;
225+
const int iy = ixy % ny;
218226
for (int iz = 0; iz < nz; ++iz)
219227
{
220-
int ix = ixy / ny;
221-
int iy = ixy % ny;
222228
ModuleBase::Vector3<float> real_r(ix, iy, iz);
223229
float phase_im = -delta_g * real_r;
224230
complex<float> phase(0, ModuleBase::TWO_PI * phase_im);
@@ -229,9 +235,9 @@ TEST_F(PWTEST, recip_to_real_C2C_float)
229235
// const int size = nx * ny * nz;
230236
complex<float>* h_rhog = new complex<float>[npw];
231237
complex<float>* h_rhogout = new complex<float>[npw];
232-
complex<float>* d_rhog;
233-
complex<float>* d_rhogr;
234-
complex<float>* d_rhogout;
238+
complex<float>* d_rhog = nullptr;
239+
complex<float>* d_rhogr = nullptr;
240+
complex<float>* d_rhogout = nullptr;
235241
cudaMalloc((void**)&d_rhog, npw * sizeof(complex<float>));
236242
cudaMalloc((void**)&d_rhogr, npw * sizeof(complex<float>));
237243
cudaMalloc((void**)&d_rhogout, npw * sizeof(complex<float>));
@@ -252,18 +258,20 @@ TEST_F(PWTEST, recip_to_real_C2C_float)
252258
cudaMemcpy(d_rhogout, h_rhogout, npw * sizeof(complex<float>), cudaMemcpyHostToDevice);
253259

254260
std::complex<float>* h_rhor = new std::complex<float>[nrxx];
255-
std::complex<float>* d_rhor;
261+
std::complex<float>* d_rhor = nullptr;
256262
cudaMalloc((void**)&d_rhor, nrxx * sizeof(std::complex<float>));
257263
pwtest.recip_to_real<std::complex<float>, std::complex<float>, base_device::DEVICE_GPU>(d_rhog, d_rhor);
258264
cudaMemcpy(h_rhor, d_rhor, nrxx * sizeof(std::complex<float>), cudaMemcpyDeviceToHost);
259265

260266
int startiz = pwtest.startz_current;
261267
for (int ixy = 0; ixy < nx * ny; ++ixy)
262268
{
269+
const int offset = ixy * nz + startiz;
270+
const int startz = ixy * nplane;
263271
for (int iz = 0; iz < nplane; ++iz)
264272
{
265-
EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(), h_rhor[ixy * nplane + iz].real(), 1e-4);
266-
EXPECT_NEAR(tmp[ixy * nz + startiz + iz].imag(), h_rhor[ixy * nplane + iz].imag(), 1e-4);
273+
EXPECT_NEAR(tmp[offset + iz].real(), h_rhor[startz + iz].real(), 1e-4);
274+
EXPECT_NEAR(tmp[offset + iz].imag(), h_rhor[startz + iz].imag(), 1e-4);
267275
}
268276
}
269277

source/module_basis/module_pw/test_gpu/pw_basis_C2R.cpp

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -52,24 +52,28 @@ TEST_F(PWTEST, recip_to_real_double)
5252
{
5353
for (int ix = 0; ix < nx; ++ix)
5454
{
55+
const double vx = ix -int(nx/2);
5556
for (int iy = 0; iy < ny; ++iy)
5657
{
5758
const int offset = (ix * ny + iy) * nz;
59+
const double vy = iy - int(ny / 2);
5860
for (int iz = 0; iz < nz; ++iz)
5961
{
6062
tmp[offset+ iz] = 0.0;
61-
double vx = ix - int(nx / 2);
62-
double vy = iy - int(ny / 2);
6363
double vz = iz - int(nz / 2);
6464
ModuleBase::Vector3<double> v(vx, vy, vz);
6565
double modulus = v * (GGT * v);
6666
if (modulus <= ggecut)
6767
{
6868
tmp[offset+ iz] = 1.0 / (modulus + 1);
6969
if (vy > 0)
70+
{
7071
tmp[offset+ iz] += ModuleBase::IMAG_UNIT / (std::abs(v.x + 1) + 1);
72+
}
7173
else if (vy < 0)
74+
{
7275
tmp[offset+ iz] -= ModuleBase::IMAG_UNIT / (std::abs(-v.x + 1) + 1);
76+
}
7377
}
7478
}
7579
}
@@ -84,10 +88,10 @@ TEST_F(PWTEST, recip_to_real_double)
8488
double(int(nz / 2)) / nz);
8589
for (int ixy = 0; ixy < nx * ny; ++ixy)
8690
{
91+
const int ix = ixy / ny;
92+
const int iy = ixy % ny;
8793
for (int iz = 0; iz < nz; ++iz)
8894
{
89-
int ix = ixy / ny;
90-
int iy = ixy % ny;
9195
ModuleBase::Vector3<double> real_r(ix, iy, iz);
9296
double phase_im = -delta_g * real_r;
9397
complex<double> phase(0, ModuleBase::TWO_PI * phase_im);
@@ -98,9 +102,9 @@ TEST_F(PWTEST, recip_to_real_double)
98102
// const int size = nx * ny * nz;
99103
complex<double>* h_rhog = new complex<double>[npw];
100104
complex<double>* h_rhogout = new complex<double>[npw];
101-
complex<double>* d_rhog;
102-
complex<double>* d_rhogr;
103-
complex<double>* d_rhogout;
105+
complex<double>* d_rhog = nullptr;
106+
complex<double>* d_rhogr = nullptr;
107+
complex<double>* d_rhogout = nullptr;
104108
cudaMalloc((void**)&d_rhog, npw * sizeof(complex<double>));
105109
cudaMalloc((void**)&d_rhogr, npw * sizeof(complex<double>));
106110
cudaMalloc((void**)&d_rhogout, npw * sizeof(complex<double>));
@@ -120,17 +124,19 @@ TEST_F(PWTEST, recip_to_real_double)
120124
cudaMemcpy(d_rhog, h_rhog, npw * sizeof(complex<double>), cudaMemcpyHostToDevice);
121125

122126
double* h_rhor = new double[nrxx];
123-
double* d_rhor;
127+
double* d_rhor = nullptr;
124128
cudaMalloc((void**)&d_rhor, nrxx * sizeof(double));
125129
pwtest.recip_to_real<std::complex<double>, double, base_device::DEVICE_GPU>(d_rhog, d_rhor);
126130
cudaMemcpy(h_rhor, d_rhor, nrxx * sizeof(double), cudaMemcpyDeviceToHost);
127131

128132
int startiz = pwtest.startz_current;
129133
for (int ixy = 0; ixy < nx * ny; ++ixy)
130134
{
135+
const int offset = ixy * nz + startiz;
136+
const int startz = ixy * nplane;
131137
for (int iz = 0; iz < nplane; ++iz)
132138
{
133-
EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(), h_rhor[ixy * nplane + iz], 1e-6);
139+
EXPECT_NEAR(tmp[offset + iz].real(), h_rhor[startz + iz], 1e-6);
134140
}
135141
}
136142

@@ -188,24 +194,28 @@ TEST_F(PWTEST, recip_to_real_float)
188194
{
189195
for (int ix = 0; ix < nx; ++ix)
190196
{
197+
const float vx = ix - int(nx / 2);
191198
for (int iy = 0; iy < ny; ++iy)
192199
{
193200
const int offset = (ix * ny + iy) * nz;
201+
const float vy = iy - int(ny / 2);
194202
for (int iz = 0; iz < nz; ++iz)
195203
{
196204
tmp[offset+ iz] = 0.0;
197-
float vx = ix - int(nx / 2);
198-
float vy = iy - int(ny / 2);
199-
float vz = iz - int(nz / 2);
205+
const float vz = iz - int(nz / 2);
200206
ModuleBase::Vector3<double> v(vx, vy, vz);
201207
float modulus = v * (GGT * v);
202208
if (modulus <= ggecut)
203209
{
204210
tmp[offset+ iz] = 1.0 / (modulus + 1);
205211
if (vy > 0)
212+
{
206213
tmp[offset+ iz] += std::complex<float>(0, 1.0) / (std::abs(vx + 1) + 1);
214+
}
207215
else if (vy < 0)
216+
{
208217
tmp[offset+ iz] -= std::complex<float>(0, 1.0) / (std::abs(-vx + 1) + 1);
218+
}
209219
}
210220
}
211221
}
@@ -218,10 +228,10 @@ TEST_F(PWTEST, recip_to_real_float)
218228
ModuleBase::Vector3<float> delta_g(float(int(nx / 2)) / nx, float(int(ny / 2)) / ny, float(int(nz / 2)) / nz);
219229
for (int ixy = 0; ixy < nx * ny; ++ixy)
220230
{
231+
const int ix = ixy / ny;
232+
const int iy = ixy % ny;
221233
for (int iz = 0; iz < nz; ++iz)
222234
{
223-
int ix = ixy / ny;
224-
int iy = ixy % ny;
225235
ModuleBase::Vector3<float> real_r(ix, iy, iz);
226236
float phase_im = -delta_g * real_r;
227237
complex<float> phase(0, ModuleBase::TWO_PI * phase_im);
@@ -232,9 +242,9 @@ TEST_F(PWTEST, recip_to_real_float)
232242
// const int size = nx * ny * nz;
233243
complex<float>* h_rhog = new complex<float>[npw];
234244
complex<float>* h_rhogout = new complex<float>[npw];
235-
complex<float>* d_rhog;
236-
complex<float>* d_rhogr;
237-
complex<float>* d_rhogout;
245+
complex<float>* d_rhog = nullptr;
246+
complex<float>* d_rhogr = nullptr;
247+
complex<float>* d_rhogout = nullptr;
238248
cudaMalloc((void**)&d_rhog, npw * sizeof(complex<float>));
239249
cudaMalloc((void**)&d_rhogr, npw * sizeof(complex<float>));
240250
cudaMalloc((void**)&d_rhogout, npw * sizeof(complex<float>));
@@ -256,17 +266,19 @@ TEST_F(PWTEST, recip_to_real_float)
256266

257267
float* h_rhor = new float[nrxx];
258268
float* h_rhogrout = new float[nrxx];
259-
float* d_rhor;
269+
float* d_rhor = nullptr;
260270
cudaMalloc((void**)&d_rhor, nrxx * sizeof(float));
261271
pwtest.recip_to_real<std::complex<float>, float, base_device::DEVICE_GPU>(d_rhog, d_rhor);
262272
cudaMemcpy(h_rhor, d_rhor, nrxx * sizeof(float), cudaMemcpyDeviceToHost);
263273

264274
int startiz = pwtest.startz_current;
265275
for (int ixy = 0; ixy < nx * ny; ++ixy)
266276
{
277+
const int offset = ixy * nz + startiz;
278+
const int startz = ixy * nplane;
267279
for (int iz = 0; iz < nplane; ++iz)
268280
{
269-
EXPECT_NEAR(tmp[ixy * nz + startiz + iz].real(), h_rhor[ixy * nplane + iz], 1e-6);
281+
EXPECT_NEAR(tmp[offset + iz].real(), h_rhor[startz + iz], 1e-6);
270282
}
271283
}
272284

0 commit comments

Comments
 (0)