diff --git a/CMakeLists.txt b/CMakeLists.txt index 2cc19db8f2..97f5d06203 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,6 +17,7 @@ option(USE_CUDA_MPI "Enable CUDA-aware MPI" OFF) option(USE_CUDA_ON_DCU "Enable CUDA on DCU" OFF) option(USE_ROCM "Enable ROCm" OFF) option(USE_DSP "Enable DSP" OFF) +option(USE_SW "Enable SW Architecture" OFF) option(USE_ABACUS_LIBM "Build libmath from source to speed up" OFF) option(ENABLE_LIBXC "Enable using the LibXC package" OFF) @@ -281,6 +282,18 @@ if (USE_DSP) target_link_libraries(${ABACUS_BIN_NAME} ${MT_HOST_DIR}/hthreads/lib/libhthread_device.a) target_link_libraries(${ABACUS_BIN_NAME} ${MT_HOST_DIR}/hthreads/lib/libhthread_host.a) endif() +if (USE_SW) + add_compile_definitions(__SW) + set(SW ON) + include_directories(${SW_MATH}/include) + include_directories(${SW_FFT}/include) + include_directories(${SW_FFT}/lib/libfftw3.a) + + target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswfft.a) + target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswscalapack.a) + target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswlapack.a) + target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswblas.a) +endif() find_package(Threads REQUIRED) target_link_libraries(${ABACUS_BIN_NAME} Threads::Threads) @@ -459,6 +472,8 @@ if(MKLROOT) if(CMAKE_CXX_COMPILER_ID MATCHES Intel) list(APPEND math_libs ifcore) endif() +elseif(USE_SW) + # SW architecture can only use its own math library else() find_package(FFTW3 REQUIRED) find_package(Lapack REQUIRED) diff --git a/source/source_basis/module_pw/pw_gatherscatter.h b/source/source_basis/module_pw/pw_gatherscatter.h index 827182d997..e6b5998446 100644 --- a/source/source_basis/module_pw/pw_gatherscatter.h +++ b/source/source_basis/module_pw/pw_gatherscatter.h @@ -123,7 +123,7 @@ void PW_Basis::gathers_scatterp(std::complex* in, std::complex* out) const outp[iz] = inp[iz]; } } - ModuleBase::timer::tick(this->classname, "gathers_scatterp"); + // ModuleBase::timer::tick(this->classname, "gathers_scatterp"); return; } #ifdef __MPI diff --git a/source/source_cell/module_symmetry/symm_rho.cpp b/source/source_cell/module_symmetry/symm_rho.cpp index 05c4f322fe..754279ffbc 100644 --- a/source/source_cell/module_symmetry/symm_rho.cpp +++ b/source/source_cell/module_symmetry/symm_rho.cpp @@ -285,7 +285,7 @@ void Symmetry::rhog_symmetry(std::complex *rhogtot, //assert(rot_count<=nrotk); }//end if section }//end c_index loop - sum /= rot_count; + if (rot_count!=0) sum/= rot_count; for (int isym = 0; isym < rot_count; ++isym) { rhogtot[ipw_record[isym]] = sum/gphase_record[isym]; diff --git a/source/source_estate/module_charge/charge.cpp b/source/source_estate/module_charge/charge.cpp index e22dbaba8f..be86a04800 100644 --- a/source/source_estate/module_charge/charge.cpp +++ b/source/source_estate/module_charge/charge.cpp @@ -279,8 +279,11 @@ void Charge::atomic_rho(const int spin_number_need, for (int ir = 0; ir < mesh; ++ir) { double r2 = atom->ncpp.r[ir] * atom->ncpp.r[ir]; - rhoatm[ir] = atom->ncpp.rho_at[ir] / ModuleBase::FOUR_PI / r2; - } + if (r2!=0) + { + rhoatm[ir] = atom->ncpp.rho_at[ir] / ModuleBase::FOUR_PI / r2; + } + } rhoatm[0] = pow((rhoatm[2] / rhoatm[1]), atom->ncpp.r[1] / (atom->ncpp.r[2] - atom->ncpp.r[1])); // zws add, sunliang updated 2024-03-04 if (rhoatm[0] < 1e-12) diff --git a/source/source_estate/module_pot/H_Hartree_pw.cpp b/source/source_estate/module_pot/H_Hartree_pw.cpp index fab0230f2c..30f8d80484 100644 --- a/source/source_estate/module_pot/H_Hartree_pw.cpp +++ b/source/source_estate/module_pot/H_Hartree_pw.cpp @@ -44,18 +44,18 @@ ModuleBase::matrix H_Hartree_pw::v_hartree(const UnitCell &cell, double ehart = 0.0; std::vector> vh_g(rho_basis->npw); + const int ig0 = rho_basis->ig_gge0; #ifdef _OPENMP #pragma omp parallel for reduction(+:ehart) #endif for (int ig = 0; ig < rho_basis->npw; ig++) { - if (rho_basis->gg[ig] >= 1.0e-8) // LiuXh 20180410 - { + if (ig == ig0) continue; // skip G=0 const double fac = ModuleBase::e2 * ModuleBase::FOUR_PI / (cell.tpiba2 * rho_basis->gg[ig]); ehart += (conj(Porter[ig]) * Porter[ig]).real() * fac; vh_g[ig] = fac * Porter[ig]; - } + } Parallel_Reduce::reduce_pool(ehart); diff --git a/source/source_hamilt/module_ewald/H_Ewald_pw.cpp b/source/source_hamilt/module_ewald/H_Ewald_pw.cpp index 154fd0877d..20e6762cc0 100644 --- a/source/source_hamilt/module_ewald/H_Ewald_pw.cpp +++ b/source/source_hamilt/module_ewald/H_Ewald_pw.cpp @@ -124,10 +124,10 @@ double H_Ewald_pw::compute_ewald(const UnitCell& cell, fact = 1.0; //GlobalV::ofs_running << "\n pwb.gstart = " << pwb.gstart << std::endl; - + const int ig0 = rho_basis->ig_gge0; for (int ig = 0; ig < rho_basis->npw; ig++) { - if(ig == rho_basis->ig_gge0) + if(ig == ig0) { continue; } diff --git a/source/source_hamilt/module_surchem/cal_pseudo.cpp b/source/source_hamilt/module_surchem/cal_pseudo.cpp index c99324a5dc..1e0a64c5b5 100644 --- a/source/source_hamilt/module_surchem/cal_pseudo.cpp +++ b/source/source_hamilt/module_surchem/cal_pseudo.cpp @@ -10,6 +10,7 @@ void surchem::gauss_charge(const UnitCell& cell, Structure_Factor* sf) { sf->setup_structure_factor(&cell, pgrid, rho_basis); // call strucFac(ntype,ngmc) + const int ig0 = rho_basis->ig_gge0; // G=0 index for (int it = 0; it < cell.ntype; it++) { double RCS = GetAtom.atom_RCS[cell.atoms[it].ncpp.psd]; diff --git a/source/source_hamilt/module_surchem/cal_totn.cpp b/source/source_hamilt/module_surchem/cal_totn.cpp index b74e7e44d0..1e014c33f4 100644 --- a/source/source_hamilt/module_surchem/cal_totn.cpp +++ b/source/source_hamilt/module_surchem/cal_totn.cpp @@ -12,8 +12,9 @@ void surchem::cal_totn(const UnitCell& cell, ModuleBase::GlobalFunc::ZEROS(vloc_g, rho_basis->npw); rho_basis->real2recip(vlocal, vloc_g); // now n is vloc in Recispace + const int ig0 = rho_basis->ig_gge0; // G=0 index for (int ig = 0; ig < rho_basis->npw; ig++) { - if(ig==rho_basis->ig_gge0) + if(ig==ig0) { N[ig] = Porter_g[ig]; continue; @@ -39,9 +40,10 @@ void surchem::induced_charge(const UnitCell& cell, const ModulePW::PW_Basis* rho std::complex *induced_rhog = new std::complex[rho_basis->npw]; ModuleBase::GlobalFunc::ZEROS(induced_rhog, rho_basis->npw); rho_basis->real2recip(delta_phi, delta_phig); + const int ig0 = rho_basis->ig_gge0; // G=0 index for (int ig = 0; ig < rho_basis->npw; ig++) { - if(rho_basis->ig_gge0 == ig) + if(ig==ig0) { continue; } diff --git a/source/source_io/read_input.cpp b/source/source_io/read_input.cpp index e8ee9ef3fd..d08e1ac4de 100644 --- a/source/source_io/read_input.cpp +++ b/source/source_io/read_input.cpp @@ -191,19 +191,21 @@ void ReadInput::create_directory(const Parameter& param) } // NOTE: "make_dir_out" must be called by all processes!!! // Maybe it is not good, because only rank 0 can create the directory. + #ifndef __SW ModuleBase::Global_File::make_dir_out(param.input.suffix, param.input.calculation, out_dir, this->rank, param.input.mdp.md_restart, param.input.out_alllog); // xiaohui add 2013-09-01 - + #endif const std::string ss = "test -d " + PARAM.inp.read_file_dir; + #ifndef __SW if (system(ss.c_str())) { ModuleBase::WARNING_QUIT("ReadInput", "please set right files directory for reading in."); } - + #endif return; } diff --git a/source/source_pw/module_pwdft/VNL_in_pw.cpp b/source/source_pw/module_pwdft/VNL_in_pw.cpp index 7aaf2dce45..02911c5918 100644 --- a/source/source_pw/module_pwdft/VNL_in_pw.cpp +++ b/source/source_pw/module_pwdft/VNL_in_pw.cpp @@ -724,21 +724,42 @@ void pseudopot_cell_vnl::init_vnl(UnitCell& cell, const ModulePW::PW_Basis* rho_ double* jl = new double[kkbeta]; double* aux = new double[kkbeta]; - + #ifdef __SW + for (int ib = 0; ib < nbeta; ib++) + { + int flag = 0; + for (int ir = 0; ir < kkbeta; ir++) + { + if (std::abs(cell.atoms[it].ncpp.betar(ib, ir)) < 1e-280) + { + std::cout << "ir is " << ir << " " << std::abs(cell.atoms[it].ncpp.betar(ib, ir)) << std::endl; + flag = ir; + break; + } + } + if (flag!=0) + { + for (int ir = flag; ir < kkbeta; ir++) + { + cell.atoms[it].ncpp.betar(ib, ir) = 0; + } + } + } + #endif for (int ib = 0; ib < nbeta; ib++) { const int l = cell.atoms[it].ncpp.lll[ib]; for (int iq = 0; iq < PARAM.globalv.nqx; iq++) { const double q = iq * PARAM.globalv.dq; - ModuleBase::Sphbes::Spherical_Bessel(kkbeta, cell.atoms[it].ncpp.r.data(), q, l, jl); - + ModuleBase::Sphbes::Spherical_Bessel(kkbeta, cell.atoms[it].ncpp.r.data(), q, l, jl); for (int ir = 0; ir < kkbeta; ir++) - { - aux[ir] = cell.atoms[it].ncpp.betar(ib, ir) * jl[ir] * cell.atoms[it].ncpp.r[ir]; + { + aux[ir] = cell.atoms[it].ncpp.betar(ib, ir) * jl[ir] * cell.atoms[it].ncpp.r[ir]; } - double vqint; - ModuleBase::Integral::Simpson_Integral(kkbeta, aux, cell.atoms[it].ncpp.rab.data(), vqint); + + double vqint; + ModuleBase::Integral::Simpson_Integral(kkbeta, aux, cell.atoms[it].ncpp.rab.data(), vqint); this->tab(it, ib, iq) = vqint * pref; } } diff --git a/source/source_pw/module_pwdft/forces.cpp b/source/source_pw/module_pwdft/forces.cpp index 0fb9d63325..155ffa0484 100644 --- a/source/source_pw/module_pwdft/forces.cpp +++ b/source/source_pw/module_pwdft/forces.cpp @@ -472,14 +472,21 @@ void Forces::cal_force_ew(const UnitCell& ucell, upperbound = 2.0 * charge * charge * sqrt(2.0 * alpha / ModuleBase::TWO_PI) * erfc(sqrt(ucell.tpiba2 * rho_basis->ggecut / 4.0 / alpha)); } while (upperbound > 1.0e-6); - + const int ig0 = rho_basis->ig_gge0; #ifdef _OPENMP #pragma omp parallel for #endif for (int ig = 0; ig < rho_basis->npw; ig++) { - aux[ig] *= ModuleBase::libm::exp(-1.0 * rho_basis->gg[ig] * ucell.tpiba2 / alpha / 4.0) - / (rho_basis->gg[ig] * ucell.tpiba2); + if (ig== ig0) + { + continue; // skip G=0 + } + else + { + aux[ig] *= ModuleBase::libm::exp(-1.0 * rho_basis->gg[ig] * ucell.tpiba2 / alpha / 4.0) + / (rho_basis->gg[ig] * ucell.tpiba2); + } } // set pos rho_basis->ig_gge0 to zero diff --git a/source/source_pw/module_pwdft/stress_func_cc.cpp b/source/source_pw/module_pwdft/stress_func_cc.cpp index 2e062bca8b..b58eb41295 100644 --- a/source/source_pw/module_pwdft/stress_func_cc.cpp +++ b/source/source_pw/module_pwdft/stress_func_cc.cpp @@ -159,8 +159,7 @@ void Stress_Func::stress_cc(ModuleBase::matrix& sigma, for(int ig = 0;ig< rho_basis->npw;ig++) { const FPTYPE norm_g = sqrt(rho_basis->gg[ig]); - if(norm_g < 1e-4) { continue; -} + if(norm_g < 1e-4) { continue;} for (int l = 0; l < 3; l++) { for (int m = 0;m< 3;m++)