Skip to content
16 changes: 16 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ option(USE_CUDA_MPI "Enable CUDA-aware MPI" OFF)
option(USE_CUDA_ON_DCU "Enable CUDA on DCU" OFF)
option(USE_ROCM "Enable ROCm" OFF)
option(USE_DSP "Enable DSP" OFF)
option(USE_SW "Enable SW Architecture" OFF)

option(USE_ABACUS_LIBM "Build libmath from source to speed up" OFF)
option(ENABLE_LIBXC "Enable using the LibXC package" OFF)
Expand Down Expand Up @@ -281,6 +282,18 @@ if (USE_DSP)
target_link_libraries(${ABACUS_BIN_NAME} ${MT_HOST_DIR}/hthreads/lib/libhthread_device.a)
target_link_libraries(${ABACUS_BIN_NAME} ${MT_HOST_DIR}/hthreads/lib/libhthread_host.a)
endif()
if (USE_SW)
add_compile_definitions(__SW)
set(SW ON)
include_directories(${SW_MATH}/include)
include_directories(${SW_FFT}/include)

target_link_libraries(${ABACUS_BIN_NAME} ${SW_FFT}/lib/libfftw3.a)
target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswfft.a)
target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswscalapack.a)
target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswlapack.a)
target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswblas.a)
endif()

find_package(Threads REQUIRED)
target_link_libraries(${ABACUS_BIN_NAME} Threads::Threads)
Expand Down Expand Up @@ -459,6 +472,9 @@ if(MKLROOT)
if(CMAKE_CXX_COMPILER_ID MATCHES Intel)
list(APPEND math_libs ifcore)
endif()
elseif(USE_SW)
list(APPEND math_libs gfortran)
# SW architecture can only use its own math library
else()
find_package(FFTW3 REQUIRED)
find_package(Lapack REQUIRED)
Expand Down
2 changes: 2 additions & 0 deletions source/source_base/global_function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ void OUT(std::ofstream &ofs, const std::string &name)
void MAKE_DIR(const std::string &fn)
{
// ModuleBase::TITLE("global_function","MAKE_DIR");
#ifndef __SW
if (GlobalV::MY_RANK == 0)
{
std::stringstream ss;
Expand All @@ -73,6 +74,7 @@ void MAKE_DIR(const std::string &fn)
ModuleBase::WARNING_QUIT("MAKE_DIR", fn);
}
}
#endif
return;
}

Expand Down
2 changes: 1 addition & 1 deletion source/source_basis/module_pw/pw_gatherscatter.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ void PW_Basis::gathers_scatterp(std::complex<T>* in, std::complex<T>* out) const
outp[iz] = inp[iz];
}
}
ModuleBase::timer::tick(this->classname, "gathers_scatterp");
// ModuleBase::timer::tick(this->classname, "gathers_scatterp");
return;
}
#ifdef __MPI
Expand Down
2 changes: 1 addition & 1 deletion source/source_cell/module_symmetry/symm_rho.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ void Symmetry::rhog_symmetry(std::complex<double> *rhogtot,
//assert(rot_count<=nrotk);
}//end if section
}//end c_index loop
sum /= rot_count;
if (rot_count!=0) sum/= rot_count;
for (int isym = 0; isym < rot_count; ++isym)
{
rhogtot[ipw_record[isym]] = sum/gphase_record[isym];
Expand Down
7 changes: 5 additions & 2 deletions source/source_estate/module_charge/charge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,8 +279,11 @@ void Charge::atomic_rho(const int spin_number_need,
for (int ir = 0; ir < mesh; ++ir)
{
double r2 = atom->ncpp.r[ir] * atom->ncpp.r[ir];
rhoatm[ir] = atom->ncpp.rho_at[ir] / ModuleBase::FOUR_PI / r2;
}
if (r2!=0)
{
rhoatm[ir] = atom->ncpp.rho_at[ir] / ModuleBase::FOUR_PI / r2;
}
}
rhoatm[0]
= pow((rhoatm[2] / rhoatm[1]), atom->ncpp.r[1] / (atom->ncpp.r[2] - atom->ncpp.r[1])); // zws add, sunliang updated 2024-03-04
if (rhoatm[0] < 1e-12)
Expand Down
6 changes: 3 additions & 3 deletions source/source_estate/module_pot/H_Hartree_pw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,18 +44,18 @@ ModuleBase::matrix H_Hartree_pw::v_hartree(const UnitCell &cell,
double ehart = 0.0;

std::vector<std::complex<double>> vh_g(rho_basis->npw);
const int ig0 = rho_basis->ig_gge0;
#ifdef _OPENMP
#pragma omp parallel for reduction(+:ehart)
#endif
for (int ig = 0; ig < rho_basis->npw; ig++)
{
if (rho_basis->gg[ig] >= 1.0e-8) // LiuXh 20180410
{
if (ig == ig0) continue; // skip G=0
const double fac = ModuleBase::e2 * ModuleBase::FOUR_PI / (cell.tpiba2 * rho_basis->gg[ig]);

ehart += (conj(Porter[ig]) * Porter[ig]).real() * fac;
vh_g[ig] = fac * Porter[ig];
}

}

Parallel_Reduce::reduce_pool(ehart);
Expand Down
4 changes: 2 additions & 2 deletions source/source_hamilt/module_ewald/H_Ewald_pw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,10 @@ double H_Ewald_pw::compute_ewald(const UnitCell& cell,
fact = 1.0;

//GlobalV::ofs_running << "\n pwb.gstart = " << pwb.gstart << std::endl;

const int ig0 = rho_basis->ig_gge0;
for (int ig = 0; ig < rho_basis->npw; ig++)
{
if(ig == rho_basis->ig_gge0)
if(ig == ig0)
{
continue;
}
Expand Down
1 change: 1 addition & 0 deletions source/source_hamilt/module_surchem/cal_pseudo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ void surchem::gauss_charge(const UnitCell& cell,
Structure_Factor* sf)
{
sf->setup_structure_factor(&cell, pgrid, rho_basis); // call strucFac(ntype,ngmc)
const int ig0 = rho_basis->ig_gge0; // G=0 index
for (int it = 0; it < cell.ntype; it++)
{
double RCS = GetAtom.atom_RCS[cell.atoms[it].ncpp.psd];
Expand Down
6 changes: 4 additions & 2 deletions source/source_hamilt/module_surchem/cal_totn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ void surchem::cal_totn(const UnitCell& cell,
ModuleBase::GlobalFunc::ZEROS(vloc_g, rho_basis->npw);

rho_basis->real2recip(vlocal, vloc_g); // now n is vloc in Recispace
const int ig0 = rho_basis->ig_gge0; // G=0 index
for (int ig = 0; ig < rho_basis->npw; ig++) {
if(ig==rho_basis->ig_gge0)
if(ig==ig0)
{
N[ig] = Porter_g[ig];
continue;
Expand All @@ -39,9 +40,10 @@ void surchem::induced_charge(const UnitCell& cell, const ModulePW::PW_Basis* rho
std::complex<double> *induced_rhog = new std::complex<double>[rho_basis->npw];
ModuleBase::GlobalFunc::ZEROS(induced_rhog, rho_basis->npw);
rho_basis->real2recip(delta_phi, delta_phig);
const int ig0 = rho_basis->ig_gge0; // G=0 index
for (int ig = 0; ig < rho_basis->npw; ig++)
{
if(rho_basis->ig_gge0 == ig)
if(ig==ig0)
{
continue;
}
Expand Down
6 changes: 4 additions & 2 deletions source/source_io/read_input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,19 +191,21 @@ void ReadInput::create_directory(const Parameter& param)
}
// NOTE: "make_dir_out" must be called by all processes!!!
// Maybe it is not good, because only rank 0 can create the directory.
#ifndef __SW
ModuleBase::Global_File::make_dir_out(param.input.suffix,
param.input.calculation,
out_dir,
this->rank,
param.input.mdp.md_restart,
param.input.out_alllog); // xiaohui add 2013-09-01

#endif
const std::string ss = "test -d " + PARAM.inp.read_file_dir;
#ifndef __SW
if (system(ss.c_str()))
{
ModuleBase::WARNING_QUIT("ReadInput", "please set right files directory for reading in.");
}

#endif
return;
}

Expand Down
35 changes: 28 additions & 7 deletions source/source_pw/module_pwdft/VNL_in_pw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -724,21 +724,42 @@ void pseudopot_cell_vnl::init_vnl(UnitCell& cell, const ModulePW::PW_Basis* rho_

double* jl = new double[kkbeta];
double* aux = new double[kkbeta];

#ifdef __SW
for (int ib = 0; ib < nbeta; ib++)
{
int flag = 0;
for (int ir = 0; ir < kkbeta; ir++)
{
if (std::abs(cell.atoms[it].ncpp.betar(ib, ir)) < 1e-280)
{
std::cout << "ir is " << ir << " " << std::abs(cell.atoms[it].ncpp.betar(ib, ir)) << std::endl;
flag = ir;
break;
}
}
if (flag!=0)
{
for (int ir = flag; ir < kkbeta; ir++)
{
cell.atoms[it].ncpp.betar(ib, ir) = 0;
}
}
}
#endif
for (int ib = 0; ib < nbeta; ib++)
{
const int l = cell.atoms[it].ncpp.lll[ib];
for (int iq = 0; iq < PARAM.globalv.nqx; iq++)
{
const double q = iq * PARAM.globalv.dq;
ModuleBase::Sphbes::Spherical_Bessel(kkbeta, cell.atoms[it].ncpp.r.data(), q, l, jl);

ModuleBase::Sphbes::Spherical_Bessel(kkbeta, cell.atoms[it].ncpp.r.data(), q, l, jl);
for (int ir = 0; ir < kkbeta; ir++)
{
aux[ir] = cell.atoms[it].ncpp.betar(ib, ir) * jl[ir] * cell.atoms[it].ncpp.r[ir];
{
aux[ir] = cell.atoms[it].ncpp.betar(ib, ir) * jl[ir] * cell.atoms[it].ncpp.r[ir];
}
double vqint;
ModuleBase::Integral::Simpson_Integral(kkbeta, aux, cell.atoms[it].ncpp.rab.data(), vqint);

double vqint;
ModuleBase::Integral::Simpson_Integral(kkbeta, aux, cell.atoms[it].ncpp.rab.data(), vqint);
this->tab(it, ib, iq) = vqint * pref;
}
}
Expand Down
13 changes: 10 additions & 3 deletions source/source_pw/module_pwdft/forces.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -472,14 +472,21 @@ void Forces<FPTYPE, Device>::cal_force_ew(const UnitCell& ucell,
upperbound = 2.0 * charge * charge * sqrt(2.0 * alpha / ModuleBase::TWO_PI)
* erfc(sqrt(ucell.tpiba2 * rho_basis->ggecut / 4.0 / alpha));
} while (upperbound > 1.0e-6);

const int ig0 = rho_basis->ig_gge0;
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int ig = 0; ig < rho_basis->npw; ig++)
{
aux[ig] *= ModuleBase::libm::exp(-1.0 * rho_basis->gg[ig] * ucell.tpiba2 / alpha / 4.0)
/ (rho_basis->gg[ig] * ucell.tpiba2);
if (ig== ig0)
{
continue; // skip G=0
}
else
{
aux[ig] *= ModuleBase::libm::exp(-1.0 * rho_basis->gg[ig] * ucell.tpiba2 / alpha / 4.0)
/ (rho_basis->gg[ig] * ucell.tpiba2);
}
}

// set pos rho_basis->ig_gge0 to zero
Expand Down
3 changes: 1 addition & 2 deletions source/source_pw/module_pwdft/stress_func_cc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,7 @@ void Stress_Func<FPTYPE, Device>::stress_cc(ModuleBase::matrix& sigma,
for(int ig = 0;ig< rho_basis->npw;ig++)
{
const FPTYPE norm_g = sqrt(rho_basis->gg[ig]);
if(norm_g < 1e-4) { continue;
}
if(norm_g < 1e-4) { continue;}
for (int l = 0; l < 3; l++)
{
for (int m = 0;m< 3;m++)
Expand Down
Loading