Skip to content
16 changes: 16 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ option(USE_CUDA_MPI "Enable CUDA-aware MPI" OFF)
option(USE_CUDA_ON_DCU "Enable CUDA on DCU" OFF)
option(USE_ROCM "Enable ROCm" OFF)
option(USE_DSP "Enable DSP" OFF)
option(USE_SW "Enable SW Architecture" OFF)

option(USE_ABACUS_LIBM "Build libmath from source to speed up" OFF)
option(ENABLE_LIBXC "Enable using the LibXC package" OFF)
Expand Down Expand Up @@ -281,6 +282,18 @@ if (USE_DSP)
target_link_libraries(${ABACUS_BIN_NAME} ${MT_HOST_DIR}/hthreads/lib/libhthread_device.a)
target_link_libraries(${ABACUS_BIN_NAME} ${MT_HOST_DIR}/hthreads/lib/libhthread_host.a)
endif()
if (USE_SW)
add_compile_definitions(__SW)
set(SW ON)
include_directories(${SW_MATH}/include)
include_directories(${SW_FFT}/include)

target_link_libraries(${ABACUS_BIN_NAME} ${SW_FFT}/lib/libfftw3.a)
target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswfft.a)
target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswscalapack.a)
target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswlapack.a)
target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswblas.a)
endif()

find_package(Threads REQUIRED)
target_link_libraries(${ABACUS_BIN_NAME} Threads::Threads)
Expand Down Expand Up @@ -459,6 +472,9 @@ if(MKLROOT)
if(CMAKE_CXX_COMPILER_ID MATCHES Intel)
list(APPEND math_libs ifcore)
endif()
elseif(USE_SW)
list(APPEND math_libs gfortran)
# SW architecture can only use its own math library
else()
find_package(FFTW3 REQUIRED)
find_package(Lapack REQUIRED)
Expand Down
2 changes: 2 additions & 0 deletions source/source_base/global_function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ void OUT(std::ofstream &ofs, const std::string &name)
void MAKE_DIR(const std::string &fn)
{
// ModuleBase::TITLE("global_function","MAKE_DIR");
#ifndef __SW
if (GlobalV::MY_RANK == 0)
{
std::stringstream ss;
Expand All @@ -73,6 +74,7 @@ void MAKE_DIR(const std::string &fn)
ModuleBase::WARNING_QUIT("MAKE_DIR", fn);
}
}
#endif
return;
}

Expand Down
7 changes: 7 additions & 0 deletions source/source_basis/module_pw/pw_basis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ void PW_Basis::collect_local_pw()
delete[] this->gcar; this->gcar = new ModuleBase::Vector3<double>[this->npw];

ModuleBase::Vector3<double> f;
int gamma_num = 0;
for(int ig = 0 ; ig < this-> npw ; ++ig)
{
int isz = this->ig2isz[ig];
Expand Down Expand Up @@ -173,6 +174,12 @@ void PW_Basis::collect_local_pw()
if(this->gg[ig] < 1e-8)
{
this->ig_gge0 = ig;
++gamma_num;
if (gamma_num > 1)
{
ModuleBase::WARNING_QUIT("PW_Basis::collect_local_pw",
"More than one gamma point found in the plane wave basis set.\n");
}
}
}
return;
Expand Down
2 changes: 1 addition & 1 deletion source/source_basis/module_pw/pw_gatherscatter.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ void PW_Basis::gathers_scatterp(std::complex<T>* in, std::complex<T>* out) const
outp[iz] = inp[iz];
}
}
ModuleBase::timer::tick(this->classname, "gathers_scatterp");
// ModuleBase::timer::tick(this->classname, "gathers_scatterp");
return;
}
#ifdef __MPI
Expand Down
2 changes: 1 addition & 1 deletion source/source_cell/module_symmetry/symm_rho.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ void Symmetry::rhog_symmetry(std::complex<double> *rhogtot,
//assert(rot_count<=nrotk);
}//end if section
}//end c_index loop
sum /= rot_count;
if (rot_count!=0) sum/= rot_count;
for (int isym = 0; isym < rot_count; ++isym)
{
rhogtot[ipw_record[isym]] = sum/gphase_record[isym];
Expand Down
28 changes: 28 additions & 0 deletions source/source_cell/pseudo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,34 @@ pseudo::~pseudo()
{
}

void pseudo::check_betar()
{
bool min_flag = false;
for (int ib = 0; ib < nbeta; ib++)
{
for (int ir = 0; ir < mesh; ir++)
{
// Get the bit representation of the double
uint64_t bits = *(uint64_t*)&betar(ib, ir);
// Extract exponent field (bits 52-62)
uint64_t exponent = (bits >> 52) & 0x7FF;
// Define exponent threshold for 1e-30
// Calculated as: bias + floor(log2(1e-30))
// Where bias = 1023 and log2(1e-30) ≈ -99.657
// Thus threshold is approximately 923
if ((exponent <= 923))
{
min_flag = true;
betar(ib, ir) = 0.0;
}
}
}
if (min_flag)
{
std::cout << "WARNING: some of potential function is set to zero cause of less than 1e-30.\n";
}
}

void pseudo::print_pseudo(std::ofstream& ofs)
{
print_pseudo_vl(ofs);
Expand Down
6 changes: 6 additions & 0 deletions source/source_cell/pseudo.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@ class pseudo
// uspp
ModuleBase::realArray qfuncl; // qfuncl(2*lmax+1,nbeta*(nbeta+1)/2,mesh) Q_{mu,nu}(|r|) function for |r|> r_L
ModuleBase::matrix qqq; // qqq(nbeta,nbeta) q_{mu,nu}
/**
* @brief Check the input data for non-normal numbers in the betar.
* Subsequent values following non-normal numbers will be reset to zero
* to prevent potential computational issues arising from invalid data.
*/
void check_betar();

void print_pseudo_h(std::ofstream& ofs);
void print_pseudo_atom(std::ofstream& ofs);
Expand Down
3 changes: 2 additions & 1 deletion source/source_cell/read_pp_upf100.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,8 @@ void Pseudopot_upf::read_pseudo_nl(std::ifstream& ifs, Atom_pseudo& pp)
ModuleBase::GlobalFunc::SCAN_END(ifs, "</PP_BETA>");
pp.kkbeta = (this->kbeta[i] > pp.kkbeta) ? this->kbeta[i] : pp.kkbeta;
}

//check the betar for non-normal number
pp.check_betar();
// DIJ
ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "<PP_DIJ>", false);
ModuleBase::GlobalFunc::READ_VALUE(ifs, this->nd); // nl_4
Expand Down
3 changes: 2 additions & 1 deletion source/source_cell/read_pp_upf201.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,8 @@ void Pseudopot_upf::read_pseudo_upf201_nonlocal(std::ifstream& ifs, Atom_pseudo&
word = "</PP_BETA." + std::to_string(ib + 1) + ">";
ModuleBase::GlobalFunc::SCAN_END(ifs, word);
}

//check the betar for non-normal number
pp.check_betar();
// Read the hamiltonian terms D_ij
if (ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "<PP_DIJ", true, false))
{
Expand Down
7 changes: 5 additions & 2 deletions source/source_estate/module_charge/charge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,8 +279,11 @@ void Charge::atomic_rho(const int spin_number_need,
for (int ir = 0; ir < mesh; ++ir)
{
double r2 = atom->ncpp.r[ir] * atom->ncpp.r[ir];
rhoatm[ir] = atom->ncpp.rho_at[ir] / ModuleBase::FOUR_PI / r2;
}
if (r2!=0)
{
rhoatm[ir] = atom->ncpp.rho_at[ir] / ModuleBase::FOUR_PI / r2;
}
}
rhoatm[0]
= pow((rhoatm[2] / rhoatm[1]), atom->ncpp.r[1] / (atom->ncpp.r[2] - atom->ncpp.r[1])); // zws add, sunliang updated 2024-03-04
if (rhoatm[0] < 1e-12)
Expand Down
40 changes: 20 additions & 20 deletions source/source_estate/module_charge/charge_mixing_residual.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,15 +129,13 @@ double Charge_Mixing::inner_product_recip_rho(std::complex<double>* rho1, std::c
auto part_of_noncolin = [&]()
{
double sum = 0.0;
const int ig0 = this->rhopw->ig_gge0;
#ifdef _OPENMP
#pragma omp parallel for reduction(+ : sum)
#endif
for (int ig = 0; ig < this->rhopw->npw; ++ig)
{
if (this->rhopw->gg[ig] < 1e-8)
{
continue;
}
if (ig == ig0) {continue;}
sum += (conj(rhog1[0][ig]) * rhog2[0][ig]).real() / this->rhopw->gg[ig];
}
sum *= fac;
Expand All @@ -152,14 +150,13 @@ double Charge_Mixing::inner_product_recip_rho(std::complex<double>* rho1, std::c

case 2: {
// (1) First part of density error.
const int ig0 = this->rhopw->ig_gge0;
#ifdef _OPENMP
#pragma omp parallel for reduction(+ : sum)
#endif
for (int ig = 0; ig < this->rhopw->npw; ++ig)
{
if (this->rhopw->gg[ig] < 1e-8) {
continue;
}
if (ig == ig0) {continue;}
sum += (conj(rhog1[0][ig] + rhog1[1][ig]) * (rhog2[0][ig] + rhog2[1][ig])).real() / this->rhopw->gg[ig];
}
sum *= fac;
Expand Down Expand Up @@ -203,19 +200,19 @@ double Charge_Mixing::inner_product_recip_rho(std::complex<double>* rho1, std::c
} else
{
// another part with magnetization
const int ig0 = this->rhopw->ig_gge0;
#ifdef _OPENMP
#pragma omp parallel for reduction(+ : sum)
#endif
for (int ig = 0; ig < this->rhopw->npw; ig++)
{
if (ig == this->rhopw->ig_gge0)
if (ig == ig0)
{
continue;
}
sum += (conj(rhog1[0][ig]) * rhog2[0][ig]).real() / this->rhopw->gg[ig];
}
sum *= fac;
const int ig0 = this->rhopw->ig_gge0;
if (ig0 > 0)
{
sum += fac2
Expand Down Expand Up @@ -298,14 +295,16 @@ double Charge_Mixing::inner_product_recip_hartree(std::complex<double>* rhog1, s
auto part_of_rho = [&]()
{
double sum = 0.0;
const int ig0 = this->rhopw->ig_gge0;
#ifdef _OPENMP
#pragma omp parallel for reduction(+ : sum)
#endif
for (int ig = 0; ig < this->rhopw->npw; ++ig)
{
if (this->rhopw->gg[ig] < 1e-8) {
if (ig == ig0)
{
continue;
}
}
sum += (conj(rhog1[ig]) * rhog2[ig]).real() / this->rhopw->gg[ig];
}
sum *= fac;
Expand All @@ -319,14 +318,16 @@ double Charge_Mixing::inner_product_recip_hartree(std::complex<double>* rhog1, s
else if (PARAM.inp.nspin==2)
{
// charge density part
const int ig0 = this->rhopw->ig_gge0;
#ifdef _OPENMP
#pragma omp parallel for reduction(+ : sum)
#endif
for (int ig = 0; ig < this->rhopw->npw; ++ig)
{
if (this->rhopw->gg[ig] < 1e-8) {
if (ig == ig0)
{
continue;
}
}
sum += (conj(rhog1[ig]) * (rhog2[ig])).real() / this->rhopw->gg[ig];
}
sum *= fac;
Expand Down Expand Up @@ -369,18 +370,16 @@ double Charge_Mixing::inner_product_recip_hartree(std::complex<double>* rhog1, s
else if (this->mixing_angle <= 0)
{
// sum for tradtional mixing
const int ig0 = this->rhopw->ig_gge0;
#ifdef _OPENMP
#pragma omp parallel for reduction(+ : sum)
#endif
for (int ig = 0; ig < this->rhopw->npw; ig++)
{
if (ig == this->rhopw->ig_gge0) {
continue;
}
if (ig == ig0) {continue;}
sum += (conj(rhog1[ig]) * rhog2[ig]).real() / this->rhopw->gg[ig];
}
sum *= fac;
const int ig0 = this->rhopw->ig_gge0;
if (ig0 > 0)
{
sum += fac2
Expand Down Expand Up @@ -408,18 +407,19 @@ double Charge_Mixing::inner_product_recip_hartree(std::complex<double>* rhog1, s
else if (this->mixing_angle > 0)
{
// sum for angle mixing
const int ig0 = this->rhopw->ig_gge0;
#ifdef _OPENMP
#pragma omp parallel for reduction(+ : sum)
#endif
for (int ig = 0; ig < this->rhopw->npw; ig++)
{
if (ig == this->rhopw->ig_gge0) {
if (ig == ig0)
{
continue;
}
}
sum += (conj(rhog1[ig]) * rhog2[ig]).real() / this->rhopw->gg[ig];
}
sum *= fac;
const int ig0 = this->rhopw->ig_gge0;
if (ig0 > 0)
{
sum += fac2
Expand Down
12 changes: 7 additions & 5 deletions source/source_estate/module_pot/H_Hartree_pw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,18 +44,20 @@ ModuleBase::matrix H_Hartree_pw::v_hartree(const UnitCell &cell,
double ehart = 0.0;

std::vector<std::complex<double>> vh_g(rho_basis->npw);
const int ig0 = rho_basis->ig_gge0;
#ifdef _OPENMP
#pragma omp parallel for reduction(+:ehart)
#endif
for (int ig = 0; ig < rho_basis->npw; ig++)
{
if (rho_basis->gg[ig] >= 1.0e-8) // LiuXh 20180410
if (ig == ig0)
{
const double fac = ModuleBase::e2 * ModuleBase::FOUR_PI / (cell.tpiba2 * rho_basis->gg[ig]);

ehart += (conj(Porter[ig]) * Porter[ig]).real() * fac;
vh_g[ig] = fac * Porter[ig];
continue; // skip G=0
}
const double fac = ModuleBase::e2 * ModuleBase::FOUR_PI / (cell.tpiba2 * rho_basis->gg[ig]);
ehart += (conj(Porter[ig]) * Porter[ig]).real() * fac;
vh_g[ig] = fac * Porter[ig];

}

Parallel_Reduce::reduce_pool(ehart);
Expand Down
4 changes: 2 additions & 2 deletions source/source_hamilt/module_ewald/H_Ewald_pw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,10 @@ double H_Ewald_pw::compute_ewald(const UnitCell& cell,
fact = 1.0;

//GlobalV::ofs_running << "\n pwb.gstart = " << pwb.gstart << std::endl;

const int ig0 = rho_basis->ig_gge0;
for (int ig = 0; ig < rho_basis->npw; ig++)
{
if(ig == rho_basis->ig_gge0)
if(ig == ig0)
{
continue;
}
Expand Down
1 change: 1 addition & 0 deletions source/source_hamilt/module_surchem/cal_pseudo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ void surchem::gauss_charge(const UnitCell& cell,
Structure_Factor* sf)
{
sf->setup_structure_factor(&cell, pgrid, rho_basis); // call strucFac(ntype,ngmc)
const int ig0 = rho_basis->ig_gge0; // G=0 index
for (int it = 0; it < cell.ntype; it++)
{
double RCS = GetAtom.atom_RCS[cell.atoms[it].ncpp.psd];
Expand Down
13 changes: 6 additions & 7 deletions source/source_hamilt/module_surchem/cal_totn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ void surchem::cal_totn(const UnitCell& cell,
ModuleBase::GlobalFunc::ZEROS(vloc_g, rho_basis->npw);

rho_basis->real2recip(vlocal, vloc_g); // now n is vloc in Recispace
const int ig0 = rho_basis->ig_gge0; // G=0 index
for (int ig = 0; ig < rho_basis->npw; ig++) {
if(ig==rho_basis->ig_gge0)
if(ig==ig0)
{
N[ig] = Porter_g[ig];
continue;
Expand All @@ -39,17 +40,15 @@ void surchem::induced_charge(const UnitCell& cell, const ModulePW::PW_Basis* rho
std::complex<double> *induced_rhog = new std::complex<double>[rho_basis->npw];
ModuleBase::GlobalFunc::ZEROS(induced_rhog, rho_basis->npw);
rho_basis->real2recip(delta_phi, delta_phig);
const int ig0 = rho_basis->ig_gge0; // G=0 index
for (int ig = 0; ig < rho_basis->npw; ig++)
{
if(rho_basis->ig_gge0 == ig)
if(ig==ig0)
{
continue;
}
else
{
const double fac = ModuleBase::e2 * ModuleBase::FOUR_PI /(cell.tpiba2 * rho_basis->gg[ig]);
induced_rhog[ig] = -delta_phig[ig] / fac;
}
const double fac = ModuleBase::e2 * ModuleBase::FOUR_PI /(cell.tpiba2 * rho_basis->gg[ig]);
induced_rhog[ig] = -delta_phig[ig] / fac;
}

rho_basis->recip2real(induced_rhog, induced_rho);
Expand Down
Loading
Loading