Skip to content

Commit 6ffb2a3

Browse files
committed
fix GPU bug
1 parent d2d9970 commit 6ffb2a3

File tree

12 files changed

+101
-15
lines changed

12 files changed

+101
-15
lines changed

source/module_base/parallel_device.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ void gatherv_dev(const T* sendbuf,
165165
T* sendbuf_cpu = o1.get(sendbuf, sendcount, tmp_sspace);
166166
T* recvbuf_cpu = o2.get(recvbuf, gather_space, tmp_rspace);
167167
o1.sync_d2h(sendbuf_cpu, sendbuf, sendcount);
168-
gatherv_data(sendbuf_cpu, sendcount, recvbuf, recvcounts, displs, comm);
168+
gatherv_data(sendbuf_cpu, sendcount, recvbuf_cpu, recvcounts, displs, comm);
169169
o2.sync_h2d(recvbuf, recvbuf_cpu, gather_space);
170170
o1.del(sendbuf_cpu);
171171
o2.del(recvbuf_cpu);

source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
pseudopot_cell_vnl::pseudopot_cell_vnl()
1919
{
20-
this->use_gpu_ = (this->use_gpu_);
20+
this->use_gpu_ = (PARAM.inp.device == "gpu");
2121
}
2222

2323
pseudopot_cell_vnl::~pseudopot_cell_vnl()

source/module_hamilt_pw/hamilt_pwdft/elecond.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,15 +155,16 @@ void EleCond<FPTYPE, Device>::jjresponse_ks(const int ik,
155155
nbands);
156156

157157
std::complex<FPTYPE>* pij_c = nullptr;
158+
std::vector<std::complex<FPTYPE>> pij_h_;
158159
if(std::is_same<Device, base_device::DEVICE_CPU>::value)
159160
{
160161
pij_c = pij_d;
161162
}
162163
else
163164
{
164-
std::vector<std::complex<FPTYPE>> pij_h(nbands * nbands);
165-
syncmem_complex_d2h_op()(pij_h.data(), pij_d, nbands * nbands);
166-
pij_c = pij_h.data();
165+
pij_h_.resize(nbands * nbands);
166+
syncmem_complex_d2h_op()(pij_h_.data(), pij_d, nbands * nbands);
167+
pij_c = pij_h_.data();
167168
}
168169

169170
#ifdef __MPI

source/module_hamilt_pw/hamilt_stodft/sto_elecond.cpp

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -200,12 +200,7 @@ void Sto_EleCond<FPTYPE, Device>::cal_jmatrix(hamilt::HamiltSdftPW<std::complex<
200200
{
201201
vec_rightf_all.resize(allbands_ks);
202202
rightf_all = vec_rightf_all.data();
203-
Parallel_Common::gatherv_dev<std::complex<FPTYPE>, Device>(rightfact,
204-
perbands_ks,
205-
rightf_all,
206-
ks_fact->nrecv,
207-
ks_fact->displs,
208-
BP_WORLD);
203+
Parallel_Common::gatherv_data(rightfact, perbands_ks, rightf_all, ks_fact->nrecv, ks_fact->displs, BP_WORLD);
209204
}
210205
#endif
211206

@@ -747,10 +742,7 @@ void Sto_EleCond<FPTYPE, Device>::sKG(const int& smear_type,
747742
{
748743
for (int ib = 0; ib < perbands_ks; ++ib)
749744
{
750-
for (int ig = 0; ig < npw; ++ig)
751-
{
752-
kspsi(0, ib, ig) = this->p_psi[0](ib0_ks + ib, ig);
753-
}
745+
cpymem_complex_op()(&kspsi(0, ib, 0), &this->p_psi[0](ib0_ks + ib, 0), npw);
754746
FPTYPE fi = this->stofunc.fd(FPTYPE(en[ib]));
755747
expmtmf_fact[ib] = 1 - fi;
756748
expmtf_fact[ib] = fi;

source/module_hamilt_pw/hamilt_stodft/sto_elecond.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ class Sto_EleCond : protected EleCond<FPTYPE, Device>
2020
using delmem_lcomplex_op = base_device::memory::delete_memory_op<std::complex<lowTYPE>, Device>;
2121
using cpymem_lcomplex_op = base_device::memory::synchronize_memory_op<std::complex<lowTYPE>, Device, Device>;
2222
using castmem_lcomplex_op = base_device::memory::cast_memory_op<std::complex<lowTYPE>, std::complex<FPTYPE>, Device, Device>;
23+
using cpymem_complex_op = base_device::memory::synchronize_memory_op<std::complex<FPTYPE>, Device, Device>;
2324
public:
2425
Sto_EleCond(UnitCell* p_ucell_in,
2526
K_Vectors* p_kv_in,
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
INPUT_PARAMETERS
2+
#Parameters (1.General)
3+
suffix autotest
4+
calculation scf
5+
esolver_type sdft
6+
method_sto 1
7+
8+
nbands 5
9+
nbands_sto all
10+
nche_sto 120
11+
emax_sto 0
12+
emin_sto 0
13+
seed_sto 20000
14+
pseudo_dir ../../PP_ORB
15+
symmetry 1
16+
kpar 1
17+
bndpar 2
18+
device gpu
19+
20+
#Parameters (2.Iteration)
21+
ecutwfc 20
22+
scf_thr 1e-6
23+
scf_nmax 20
24+
25+
26+
#Parameters (3.Basis)
27+
basis_type pw
28+
29+
#Parameters (4.Smearing)
30+
smearing_method fd
31+
smearing_sigma 0.6
32+
33+
#Parameters (5.Mixing)
34+
mixing_type broyden
35+
mixing_beta 0.4
36+
mixing_gg0 0.0
37+
38+
cal_cond 1
39+
cond_fwhm 16
40+
cond_wcut 10
41+
cond_dw 1
42+
cond_dt 0.237464
43+
cond_nonlocal 1
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
K_POINTS
2+
0
3+
Gamma
4+
1 1 1 0 0 0
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
This test for:
2+
*MDFT
3+
*Al
4+
*kpoints 2*1*1
5+
*complete basis
6+
*kpar 1
7+
*bndpar 2
8+
*cal_cond 1
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
ATOMIC_SPECIES
2+
Si 14 Si.pz-vbc.UPF
3+
4+
LATTICE_CONSTANT
5+
5 // add lattice constant
6+
7+
LATTICE_VECTORS
8+
1 0 0
9+
0 1 0
10+
0 0 1
11+
12+
ATOMIC_POSITIONS
13+
Direct
14+
15+
Si // Element type
16+
0.0 // magnetism
17+
1
18+
0.00 0.00 0.00 1 1 1
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
## w(eV) sigma(Sm^-1) kappa(W(mK)^-1) L12/e(Am^-1) L22/e^2(Wm^-1)
2+
0.5 145953 177.614 -3.70776e+06 1.11017e+08
3+
1.5 144420 175.681 -3.67128e+06 1.0997e+08
4+
2.5 141416 171.896 -3.59972e+06 1.07914e+08
5+
3.5 137057 166.415 -3.49581e+06 1.0493e+08
6+
4.5 131513 159.459 -3.36351e+06 1.01129e+08
7+
5.5 124997 151.302 -3.20782e+06 9.66558e+07
8+
6.5 117754 142.252 -3.03448e+06 9.16732e+07
9+
7.5 110048 132.637 -2.84972e+06 8.63592e+07
10+
8.5 102146 122.779 -2.65987e+06 8.08944e+07
11+
9.5 94302.5 112.982 -2.47103e+06 7.54522e+07

0 commit comments

Comments
 (0)