Skip to content

Commit 10c69d7

Browse files
Merge branch 'develop' into prbranch
2 parents fc7a7a3 + 32ba8d4 commit 10c69d7

File tree

8 files changed

+54
-46
lines changed

8 files changed

+54
-46
lines changed

source/module_base/blas_connector.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -69,17 +69,17 @@ float BlasConnector::dot( const int n, const float *X, const int incX, const flo
6969
{
7070
if (device_type == base_device::AbacusDevice_t::CpuDevice) {
7171
return sdot_(&n, X, &incX, Y, &incY);
72+
}
7273
return sdot_(&n, X, &incX, Y, &incY);
7374
}
74-
}
7575

7676
double BlasConnector::dot( const int n, const double *X, const int incX, const double *Y, const int incY, base_device::AbacusDevice_t device_type)
7777
{
7878
if (device_type == base_device::AbacusDevice_t::CpuDevice) {
7979
return ddot_(&n, X, &incX, Y, &incY);
80+
}
8081
return ddot_(&n, X, &incX, Y, &incY);
8182
}
82-
}
8383

8484
// C = a * A.? * B.? + b * C
8585
void BlasConnector::gemm(const char transa, const char transb, const int m, const int n, const int k,
@@ -196,39 +196,39 @@ float BlasConnector::nrm2( const int n, const float *X, const int incX, base_dev
196196
{
197197
if (device_type == base_device::AbacusDevice_t::CpuDevice) {
198198
return snrm2_( &n, X, &incX );
199+
}
199200
return snrm2_( &n, X, &incX );
200201
}
201-
}
202202

203203

204204
double BlasConnector::nrm2( const int n, const double *X, const int incX, base_device::AbacusDevice_t device_type )
205205
{
206206
if (device_type == base_device::AbacusDevice_t::CpuDevice) {
207207
return dnrm2_( &n, X, &incX );
208+
}
208209
return dnrm2_( &n, X, &incX );
209210
}
210-
}
211211

212212

213213
double BlasConnector::nrm2( const int n, const std::complex<double> *X, const int incX, base_device::AbacusDevice_t device_type )
214214
{
215215
if (device_type == base_device::AbacusDevice_t::CpuDevice) {
216216
return dznrm2_( &n, X, &incX );
217+
}
217218
return dznrm2_( &n, X, &incX );
218219
}
219-
}
220220

221221
// copies a into b
222222
void BlasConnector::copy(const long n, const double *a, const int incx, double *b, const int incy, base_device::AbacusDevice_t device_type)
223223
{
224224
if (device_type == base_device::AbacusDevice_t::CpuDevice) {
225225
dcopy_(&n, a, &incx, b, &incy);
226-
}
226+
}
227227
}
228228

229229
void BlasConnector::copy(const long n, const std::complex<double> *a, const int incx, std::complex<double> *b, const int incy, base_device::AbacusDevice_t device_type)
230230
{
231231
if (device_type == base_device::AbacusDevice_t::CpuDevice) {
232232
zcopy_(&n, a, &incx, b, &incy);
233-
}
233+
}
234234
}

source/module_base/lapack_connector.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,10 @@ extern "C"
6161

6262

6363
void dsygvx_(const int* itype, const char* jobz, const char* range, const char* uplo,
64-
const int* n, double* A, const int* lda, double* B, const int* ldb,
65-
const double* vl, const double* vu, const int* il, const int* iu,
66-
const double* abstol, const int* m, double* w, double* Z, const int* ldz,
67-
double* work, int* lwork, int*iwork, int* ifail, int* info);
64+
const int* n, double* A, const int* lda, double* B, const int* ldb,
65+
const double* vl, const double* vu, const int* il, const int* iu,
66+
const double* abstol, const int* m, double* w, double* Z, const int* ldz,
67+
double* work, const int* lwork, int* iwork, int* ifail, int* info);
6868

6969
void chegvx_(const int* itype,const char* jobz,const char* range,const char* uplo,
7070
const int* n,std::complex<float> *a,const int* lda,std::complex<float> *b,

source/module_base/lapack_wrapper.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#ifndef LAPACK_HPP
22
#define LAPACK_HPP
3-
3+
#include <iostream>
44
extern "C"
55
{
66
// =================================================================================
@@ -49,10 +49,10 @@ extern "C"
4949
// =================================================================================
5050
// gvx
5151
void dsygvx_(const int* itype, const char* jobz, const char* range, const char* uplo,
52-
const int* n, double* A, const int* lda, double* B, const int* ldb,
53-
const double* vl, const double* vu, const int* il, const int* iu,
54-
const double* abstol, const int* m, double* w, double* Z, const int* ldz,
55-
double* work, int* lwork, int*iwork, int* ifail, int* info);
52+
const int* n, double* A, const int* lda, double* B, const int* ldb,
53+
const double* vl, const double* vu, const int* il, const int* iu,
54+
const double* abstol, const int* m, double* w, double* Z, const int* ldz,
55+
double* work, const int* lwork, int* iwork, int* ifail, int* info);
5656

5757
void chegvx_(const int* itype,const char* jobz,const char* range,const char* uplo,
5858
const int* n,std::complex<float> *a,const int* lda,std::complex<float> *b,
@@ -424,8 +424,8 @@ class LapackWrapper
424424
int* ifail,
425425
int& info)
426426
{
427-
// dsygvx_(&itype, &jobz, &range, &uplo, &n, a, &lda, b, &ldb, &vl,
428-
// &vu, &il,&iu, &abstol, &m, w, z, &ldz, work, &lwork, rwork, iwork, ifail, &info);
427+
dsygvx_(&itype, &jobz, &range, &uplo, &n, a, &lda, b, &ldb, &vl,
428+
&vu, &il, &iu, &abstol, &m, w, z, &ldz, work, &lwork, iwork, ifail, &info);
429429
}
430430

431431
// wrap function of fortran lapack routine xhegvx ( pointer version ).

source/module_elecstate/module_charge/charge_mixing.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1708,11 +1708,6 @@ bool Charge_Mixing::if_scf_oscillate(const int iteration, const double drho, con
17081708
ModuleBase::TITLE("Charge_Mixing", "if_scf_oscillate");
17091709
ModuleBase::timer::tick("Charge_Mixing", "if_scf_oscillate");
17101710

1711-
if(threshold >= 0) // close the function
1712-
{
1713-
return false;
1714-
}
1715-
17161711
if(this->_drho_history.size() == 0)
17171712
{
17181713
this->_drho_history.resize(PARAM.inp.scf_nmax);
@@ -1721,8 +1716,13 @@ bool Charge_Mixing::if_scf_oscillate(const int iteration, const double drho, con
17211716
// add drho into history
17221717
this->_drho_history[iteration - 1] = drho;
17231718

1719+
if(threshold >= 0) // close the function
1720+
{
1721+
return false;
1722+
}
1723+
17241724
// check if the history is long enough
1725-
if(iteration < iternum_used)
1725+
if(iteration < iternum_used + this->mixing_restart_last)
17261726
{
17271727
return false;
17281728
}

source/module_elecstate/module_charge/charge_mixing.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,9 @@ class Charge_Mixing
102102
Base_Mixing::Mixing* get_mixing() const {return mixing;}
103103

104104
// for mixing restart
105-
int mixing_restart_step = 0; //which step to restart mixing during SCF
105+
int mixing_restart_step = 0; //which step to restart mixing during SCF, always equal to scf_namx except for the mixing restart
106106
int mixing_restart_count = 0; // the number of restart mixing during SCF. Do not set mixing_restart_count as bool since I want to keep some flexibility in the future
107+
int mixing_restart_last = 0; // the label of mixing restart step, store the step number of the last mixing restart
107108

108109
// to calculate the slope of drho curve during SCF, which is used to determine if SCF oscillate
109110
bool if_scf_oscillate(const int iteration, const double drho, const int iternum_used, const double threshold);

source/module_esolver/esolver_ks.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -681,6 +681,7 @@ void ESolver_KS<T, Device>::iter_finish(const int istep, int& iter)
681681
// notice for restart
682682
if (PARAM.inp.mixing_restart > 0 && iter == this->p_chgmix->mixing_restart_step - 1 && iter != PARAM.inp.scf_nmax)
683683
{
684+
this->p_chgmix->mixing_restart_last = iter;
684685
std::cout << " SCF restart after this step!" << std::endl;
685686
}
686687
}

source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.cpp

100755100644
Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,6 @@ void hamilt::DFTU<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* Grid
4747
ModuleBase::TITLE("DFTU", "initialize_HR");
4848
ModuleBase::timer::tick("DFTU", "initialize_HR");
4949

50-
auto* paraV = this->hR->get_paraV();// get parallel orbitals from HR
51-
// TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
52-
5350
this->adjs_all.clear();
5451
this->adjs_all.reserve(this->ucell->nat);
5552
for (int iat0 = 0; iat0 < ucell->nat; iat0++)
@@ -58,8 +55,9 @@ void hamilt::DFTU<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* Grid
5855
int T0, I0;
5956
ucell->iat2iait(iat0, &I0, &T0);
6057
const int target_L = this->dftu->orbital_corr[T0];
61-
if (target_L == -1)
58+
if (target_L == -1) {
6259
continue;
60+
}
6361

6462
AdjacentAtomInfo adjs;
6563
GridD->Find_atom(*ucell, tau0, T0, I0, &adjs);
@@ -92,8 +90,9 @@ template <typename TK, typename TR>
9290
void hamilt::DFTU<hamilt::OperatorLCAO<TK, TR>>::cal_nlm_all(const Parallel_Orbitals* paraV)
9391
{
9492
ModuleBase::TITLE("DFTU", "cal_nlm_all");
95-
if (this->precal_nlm_done)
93+
if (this->precal_nlm_done) {
9694
return;
95+
}
9796
ModuleBase::timer::tick("DFTU", "cal_nlm_all");
9897
nlm_tot.resize(this->ucell->nat);
9998
const int npol = this->ucell->get_npol();
@@ -104,8 +103,9 @@ void hamilt::DFTU<hamilt::OperatorLCAO<TK, TR>>::cal_nlm_all(const Parallel_Orbi
104103
int T0, I0;
105104
ucell->iat2iait(iat0, &I0, &T0);
106105
const int target_L = this->dftu->orbital_corr[T0];
107-
if (target_L == -1)
106+
if (target_L == -1) {
108107
continue;
108+
}
109109
const int tlp1 = 2 * target_L + 1;
110110
AdjacentAtomInfo& adjs = this->adjs_all[atom_index++];
111111

@@ -144,7 +144,7 @@ void hamilt::DFTU<hamilt::OperatorLCAO<TK, TR>>::cal_nlm_all(const Parallel_Orbi
144144
const int M1 = (m1 % 2 == 0) ? -m1 / 2 : (m1 + 1) / 2;
145145

146146
ModuleBase::Vector3<double> dtau = tau0 - tau1;
147-
intor_->snap(T1, L1, N1, M1, T0, dtau * this->ucell->lat0, 0 /*cal_deri*/, nlm);
147+
intor_->snap(T1, L1, N1, M1, T0, dtau * this->ucell->lat0, false /*cal_deri*/, nlm);
148148
// select the elements of nlm with target_L
149149
for (int iw = 0; iw < this->ucell->atoms[T0].nw; iw++)
150150
{
@@ -178,8 +178,9 @@ void hamilt::DFTU<hamilt::OperatorLCAO<TK, TR>>::contributeHR()
178178
else
179179
{
180180
// will update this->dftu->locale and this->dftu->EU
181-
if (this->current_spin == 0)
181+
if (this->current_spin == 0) {
182182
this->dftu->EU = 0.0;
183+
}
183184
}
184185
ModuleBase::timer::tick("DFTU", "contributeHR");
185186

@@ -196,8 +197,9 @@ void hamilt::DFTU<hamilt::OperatorLCAO<TK, TR>>::contributeHR()
196197
int T0, I0;
197198
ucell->iat2iait(iat0, &I0, &T0);
198199
const int target_L = this->dftu->orbital_corr[T0];
199-
if (target_L == -1)
200+
if (target_L == -1) {
200201
continue;
202+
}
201203
const int tlp1 = 2 * target_L + 1;
202204
AdjacentAtomInfo& adjs = this->adjs_all[atom_index++];
203205

@@ -241,8 +243,9 @@ void hamilt::DFTU<hamilt::OperatorLCAO<TK, TR>>::contributeHR()
241243
// save occ to dftu
242244
for (int i = 0; i < occ.size(); i++)
243245
{
244-
if (this->nspin == 1)
246+
if (this->nspin == 1) {
245247
occ[i] *= 0.5;
248+
}
246249
this->dftu->locale[iat0][target_L][0][this->current_spin].c[i] = occ[i];
247250
}
248251
}
@@ -297,17 +300,20 @@ void hamilt::DFTU<hamilt::OperatorLCAO<TK, TR>>::contributeHR()
297300
}
298301

299302
// energy correction for NSPIN=1
300-
if (this->nspin == 1)
303+
if (this->nspin == 1) {
301304
this->dftu->EU *= 2.0;
305+
}
302306
// for readin onsite_dm, set initialed_locale to false to avoid using readin locale in next iteration
303-
if (this->current_spin == this->nspin - 1 || this->nspin == 4)
307+
if (this->current_spin == this->nspin - 1 || this->nspin == 4) {
304308
this->dftu->initialed_locale = false;
309+
}
305310

306311
// update this->current_spin: only nspin=2 iterate change it between 0 and 1
307312
// the key point is only nspin=2 calculate spin-up and spin-down separately,
308313
// and won't calculate spin-up twice without spin-down
309-
if (this->nspin == 2)
314+
if (this->nspin == 2) {
310315
this->current_spin = 1 - this->current_spin;
316+
}
311317

312318
ModuleBase::timer::tick("DFTU", "contributeHR");
313319
}
@@ -488,7 +494,7 @@ void hamilt::DFTU<hamilt::OperatorLCAO<TK, TR>>::cal_v_of_u(const std::vector<do
488494
{
489495
// calculate the local matrix
490496
int spin_fold = occ.size() / m_size / m_size;
491-
if (spin_fold < 4)
497+
if (spin_fold < 4) {
492498
for (int is = 0; is < spin_fold; ++is)
493499
{
494500
int start = is * m_size * m_size;
@@ -501,7 +507,7 @@ void hamilt::DFTU<hamilt::OperatorLCAO<TK, TR>>::cal_v_of_u(const std::vector<do
501507
}
502508
}
503509
}
504-
else
510+
} else
505511
{
506512
for (int m1 = 0; m1 < m_size; m1++)
507513
{

source/module_hsolver/kernels/cuda/math_kernel_op.cu

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
namespace hsolver
1313
{
1414
const int warp_size = 32;
15-
const unsigned int full_mask = 0xffffffff;
15+
// const unsigned int full_mask = 0xffffffff;
1616
const int thread_per_block = 256;
1717
}
1818

@@ -65,11 +65,11 @@ void destoryBLAShandle(){
6565
}
6666
}
6767

68-
template <typename FPTYPE>
69-
__forceinline__ __device__ void warp_reduce(FPTYPE& val) {
70-
for (int offset = 16; offset > 0; offset >>= 1)
71-
val += __shfl_down_sync(full_mask, val, offset);
72-
}
68+
// template <typename FPTYPE>
69+
// __forceinline__ __device__ void warp_reduce(FPTYPE& val) {
70+
// for (int offset = 16; offset > 0; offset >>= 1)
71+
// val += __shfl_down_sync(full_mask, val, offset);
72+
// }
7373

7474
template <typename Real>
7575
__global__ void line_minimize_with_block(

0 commit comments

Comments
 (0)