From 8c713022b44356cc167da72faf59bbeb1ea51443 Mon Sep 17 00:00:00 2001 From: ubuntu <3158793232@qq.com> Date: Mon, 7 Jul 2025 16:56:34 +0800 Subject: [PATCH 1/3] remove float error in sunway --- CMakeLists.txt | 15 ++++++++ .../module_basis/module_pw/pw_gatherscatter.h | 2 +- .../module_cell/module_symmetry/symm_rho.cpp | 2 +- .../module_elecstate/module_charge/charge.cpp | 7 ++-- .../hamilt_pwdft/VNL_in_pw.cpp | 35 +++++++++++++++---- .../module_hamilt_pw/hamilt_pwdft/forces.cpp | 7 ++-- source/module_io/read_input.cpp | 6 ++-- 7 files changed, 59 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e5afbe4793..3b25f6c3e8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,6 +17,7 @@ option(USE_CUDA_MPI "Enable CUDA-aware MPI" OFF) option(USE_CUDA_ON_DCU "Enable CUDA on DCU" OFF) option(USE_ROCM "Enable ROCm" OFF) option(USE_DSP "Enable DSP" OFF) +option(USE_SUNWAY "Enable Sunway Architecture" OFF) option(USE_ABACUS_LIBM "Build libmath from source to speed up" OFF) option(ENABLE_LIBXC "Enable using the LibXC package" OFF) @@ -278,6 +279,18 @@ if (USE_DSP) target_link_libraries(${ABACUS_BIN_NAME} ${MT_HOST_DIR}/hthreads/lib/libhthread_device.a) target_link_libraries(${ABACUS_BIN_NAME} ${MT_HOST_DIR}/hthreads/lib/libhthread_host.a) endif() +if (USE_SUNWAY) + add_compile_definitions(__SUNWAY) + set(SUNWAY ON) + include_directories(${SUNWAY_MATH}/include) + include_directories(${SUNWAY_FFT}/include) + include_directories(${SUNWAY_FFT}/lib/libfftw3.a) + + target_link_libraries(${ABACUS_BIN_NAME} ${SUNWAY_MATH}/libswfft.a) + target_link_libraries(${ABACUS_BIN_NAME} ${SUNWAY_MATH}/libswscalapack.a) + target_link_libraries(${ABACUS_BIN_NAME} ${SUNWAY_MATH}/libswlapack.a) + target_link_libraries(${ABACUS_BIN_NAME} ${SUNWAY_MATH}/libswblas.a) +endif() find_package(Threads REQUIRED) target_link_libraries(${ABACUS_BIN_NAME} Threads::Threads) @@ -452,6 +465,8 @@ if(MKLROOT) if(CMAKE_CXX_COMPILER_ID MATCHES Intel) list(APPEND math_libs ifcore) endif() +elseif(USE_SUNWAY) + # Sunway architecture can only use its own math library else() find_package(FFTW3 REQUIRED) find_package(Lapack REQUIRED) diff --git a/source/module_basis/module_pw/pw_gatherscatter.h b/source/module_basis/module_pw/pw_gatherscatter.h index 97be6e5c23..3488ec2ddb 100644 --- a/source/module_basis/module_pw/pw_gatherscatter.h +++ b/source/module_basis/module_pw/pw_gatherscatter.h @@ -123,7 +123,7 @@ void PW_Basis::gathers_scatterp(std::complex* in, std::complex* out) const outp[iz] = inp[iz]; } } - ModuleBase::timer::tick(this->classname, "gathers_scatterp"); + // ModuleBase::timer::tick(this->classname, "gathers_scatterp"); return; } #ifdef __MPI diff --git a/source/module_cell/module_symmetry/symm_rho.cpp b/source/module_cell/module_symmetry/symm_rho.cpp index 858054df23..3b369e9592 100644 --- a/source/module_cell/module_symmetry/symm_rho.cpp +++ b/source/module_cell/module_symmetry/symm_rho.cpp @@ -285,7 +285,7 @@ void Symmetry::rhog_symmetry(std::complex *rhogtot, //assert(rot_count<=nrotk); }//end if section }//end c_index loop - sum /= rot_count; + if (rot_count!=0) sum/= rot_count; for (int isym = 0; isym < rot_count; ++isym) { rhogtot[ipw_record[isym]] = sum/gphase_record[isym]; diff --git a/source/module_elecstate/module_charge/charge.cpp b/source/module_elecstate/module_charge/charge.cpp index 252b336af6..30e020e420 100644 --- a/source/module_elecstate/module_charge/charge.cpp +++ b/source/module_elecstate/module_charge/charge.cpp @@ -279,8 +279,11 @@ void Charge::atomic_rho(const int spin_number_need, for (int ir = 0; ir < mesh; ++ir) { double r2 = atom->ncpp.r[ir] * atom->ncpp.r[ir]; - rhoatm[ir] = atom->ncpp.rho_at[ir] / ModuleBase::FOUR_PI / r2; - } + if (r2!=0) + { + rhoatm[ir] = atom->ncpp.rho_at[ir] / ModuleBase::FOUR_PI / r2; + } + } rhoatm[0] = pow((rhoatm[2] / rhoatm[1]), atom->ncpp.r[1] / (atom->ncpp.r[2] - atom->ncpp.r[1])); // zws add, sunliang updated 2024-03-04 if (rhoatm[0] < 1e-12) diff --git a/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp index ee9b2bb659..751a5c25e8 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp @@ -724,21 +724,42 @@ void pseudopot_cell_vnl::init_vnl(UnitCell& cell, const ModulePW::PW_Basis* rho_ double* jl = new double[kkbeta]; double* aux = new double[kkbeta]; - + #ifdef __SUNWAY + for (int ib = 0; ib < nbeta; ib++) + { + int flag = 0; + for (int ir = 0; ir < kkbeta; ir++) + { + if (std::abs(cell.atoms[it].ncpp.betar(ib, ir)) < 1e-280) + { + std::cout << "ir is " << ir << " " << std::abs(cell.atoms[it].ncpp.betar(ib, ir)) << std::endl; + flag = ir; + break; + } + } + if (flag!=0) + { + for (int ir = flag; ir < kkbeta; ir++) + { + cell.atoms[it].ncpp.betar(ib, ir) = 0; + } + } + } + #endif for (int ib = 0; ib < nbeta; ib++) { const int l = cell.atoms[it].ncpp.lll[ib]; for (int iq = 0; iq < PARAM.globalv.nqx; iq++) { const double q = iq * PARAM.globalv.dq; - ModuleBase::Sphbes::Spherical_Bessel(kkbeta, cell.atoms[it].ncpp.r.data(), q, l, jl); - + ModuleBase::Sphbes::Spherical_Bessel(kkbeta, cell.atoms[it].ncpp.r.data(), q, l, jl); for (int ir = 0; ir < kkbeta; ir++) - { - aux[ir] = cell.atoms[it].ncpp.betar(ib, ir) * jl[ir] * cell.atoms[it].ncpp.r[ir]; + { + aux[ir] = cell.atoms[it].ncpp.betar(ib, ir) * jl[ir] * cell.atoms[it].ncpp.r[ir]; } - double vqint; - ModuleBase::Integral::Simpson_Integral(kkbeta, aux, cell.atoms[it].ncpp.rab.data(), vqint); + + double vqint; + ModuleBase::Integral::Simpson_Integral(kkbeta, aux, cell.atoms[it].ncpp.rab.data(), vqint); this->tab(it, ib, iq) = vqint * pref; } } diff --git a/source/module_hamilt_pw/hamilt_pwdft/forces.cpp b/source/module_hamilt_pw/hamilt_pwdft/forces.cpp index a722bc5449..de9343ff4d 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/forces.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/forces.cpp @@ -478,8 +478,11 @@ void Forces::cal_force_ew(const UnitCell& ucell, #endif for (int ig = 0; ig < rho_basis->npw; ig++) { - aux[ig] *= ModuleBase::libm::exp(-1.0 * rho_basis->gg[ig] * ucell.tpiba2 / alpha / 4.0) - / (rho_basis->gg[ig] * ucell.tpiba2); + if (rho_basis->gg[ig] !=0) + { + aux[ig] *= ModuleBase::libm::exp(-1.0 * rho_basis->gg[ig] * ucell.tpiba2 / alpha / 4.0) + / (rho_basis->gg[ig] * ucell.tpiba2); + } } // set pos rho_basis->ig_gge0 to zero diff --git a/source/module_io/read_input.cpp b/source/module_io/read_input.cpp index 14d6438010..09111824ff 100644 --- a/source/module_io/read_input.cpp +++ b/source/module_io/read_input.cpp @@ -188,19 +188,21 @@ void ReadInput::create_directory(const Parameter& param) } // NOTE: "make_dir_out" must be called by all processes!!! // Maybe it is not good, because only rank 0 can create the directory. + #ifndef __SUNWAY ModuleBase::Global_File::make_dir_out(param.input.suffix, param.input.calculation, out_dir, this->rank, param.input.mdp.md_restart, param.input.out_alllog); // xiaohui add 2013-09-01 - + #endif const std::string ss = "test -d " + PARAM.inp.read_file_dir; + #ifndef __SUNWAY if (system(ss.c_str())) { ModuleBase::WARNING_QUIT("ReadInput", "please set right files directory for reading in."); } - + #endif return; } From 03c1172159bda22340507ac3c2a5c7f15adb048b Mon Sep 17 00:00:00 2001 From: ubuntu <3158793232@qq.com> Date: Mon, 7 Jul 2025 17:03:27 +0800 Subject: [PATCH 2/3] fix ig=0 --- source/module_elecstate/module_pot/H_Hartree_pw.cpp | 6 +++--- source/module_hamilt_general/module_ewald/H_Ewald_pw.cpp | 4 ++-- .../module_hamilt_general/module_surchem/cal_pseudo.cpp | 1 + source/module_hamilt_general/module_surchem/cal_totn.cpp | 6 ++++-- source/module_hamilt_pw/hamilt_pwdft/forces.cpp | 8 ++++++-- source/module_hamilt_pw/hamilt_pwdft/stress_func_cc.cpp | 3 +-- 6 files changed, 17 insertions(+), 11 deletions(-) diff --git a/source/module_elecstate/module_pot/H_Hartree_pw.cpp b/source/module_elecstate/module_pot/H_Hartree_pw.cpp index 898817f736..f3c4ffe70e 100644 --- a/source/module_elecstate/module_pot/H_Hartree_pw.cpp +++ b/source/module_elecstate/module_pot/H_Hartree_pw.cpp @@ -44,18 +44,18 @@ ModuleBase::matrix H_Hartree_pw::v_hartree(const UnitCell &cell, double ehart = 0.0; std::vector> vh_g(rho_basis->npw); + const int ig0 = rho_basis->ig_gge0; #ifdef _OPENMP #pragma omp parallel for reduction(+:ehart) #endif for (int ig = 0; ig < rho_basis->npw; ig++) { - if (rho_basis->gg[ig] >= 1.0e-8) // LiuXh 20180410 - { + if (ig == ig0) continue; // skip G=0 const double fac = ModuleBase::e2 * ModuleBase::FOUR_PI / (cell.tpiba2 * rho_basis->gg[ig]); ehart += (conj(Porter[ig]) * Porter[ig]).real() * fac; vh_g[ig] = fac * Porter[ig]; - } + } Parallel_Reduce::reduce_pool(ehart); diff --git a/source/module_hamilt_general/module_ewald/H_Ewald_pw.cpp b/source/module_hamilt_general/module_ewald/H_Ewald_pw.cpp index 2c8cc60f9d..fea3368a06 100644 --- a/source/module_hamilt_general/module_ewald/H_Ewald_pw.cpp +++ b/source/module_hamilt_general/module_ewald/H_Ewald_pw.cpp @@ -124,10 +124,10 @@ double H_Ewald_pw::compute_ewald(const UnitCell& cell, fact = 1.0; //GlobalV::ofs_running << "\n pwb.gstart = " << pwb.gstart << std::endl; - + const int ig0 = rho_basis->ig_gge0; for (int ig = 0; ig < rho_basis->npw; ig++) { - if(ig == rho_basis->ig_gge0) + if(ig == ig0) { continue; } diff --git a/source/module_hamilt_general/module_surchem/cal_pseudo.cpp b/source/module_hamilt_general/module_surchem/cal_pseudo.cpp index 7ff5bb507c..81ef7eb6fc 100644 --- a/source/module_hamilt_general/module_surchem/cal_pseudo.cpp +++ b/source/module_hamilt_general/module_surchem/cal_pseudo.cpp @@ -10,6 +10,7 @@ void surchem::gauss_charge(const UnitCell& cell, Structure_Factor* sf) { sf->setup_structure_factor(&cell, pgrid, rho_basis); // call strucFac(ntype,ngmc) + const int ig0 = rho_basis->ig_gge0; // G=0 index for (int it = 0; it < cell.ntype; it++) { double RCS = GetAtom.atom_RCS[cell.atoms[it].ncpp.psd]; diff --git a/source/module_hamilt_general/module_surchem/cal_totn.cpp b/source/module_hamilt_general/module_surchem/cal_totn.cpp index 3650a67dfa..34b4a4da39 100644 --- a/source/module_hamilt_general/module_surchem/cal_totn.cpp +++ b/source/module_hamilt_general/module_surchem/cal_totn.cpp @@ -12,8 +12,9 @@ void surchem::cal_totn(const UnitCell& cell, ModuleBase::GlobalFunc::ZEROS(vloc_g, rho_basis->npw); rho_basis->real2recip(vlocal, vloc_g); // now n is vloc in Recispace + const int ig0 = rho_basis->ig_gge0; // G=0 index for (int ig = 0; ig < rho_basis->npw; ig++) { - if(ig==rho_basis->ig_gge0) + if(ig==ig0) { N[ig] = Porter_g[ig]; continue; @@ -39,9 +40,10 @@ void surchem::induced_charge(const UnitCell& cell, const ModulePW::PW_Basis* rho std::complex *induced_rhog = new complex[rho_basis->npw]; ModuleBase::GlobalFunc::ZEROS(induced_rhog, rho_basis->npw); rho_basis->real2recip(delta_phi, delta_phig); + const int ig0 = rho_basis->ig_gge0; // G=0 index for (int ig = 0; ig < rho_basis->npw; ig++) { - if(rho_basis->ig_gge0 == ig) + if(ig==ig0) { continue; } diff --git a/source/module_hamilt_pw/hamilt_pwdft/forces.cpp b/source/module_hamilt_pw/hamilt_pwdft/forces.cpp index de9343ff4d..89d06af65b 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/forces.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/forces.cpp @@ -472,13 +472,17 @@ void Forces::cal_force_ew(const UnitCell& ucell, upperbound = 2.0 * charge * charge * sqrt(2.0 * alpha / ModuleBase::TWO_PI) * erfc(sqrt(ucell.tpiba2 * rho_basis->ggecut / 4.0 / alpha)); } while (upperbound > 1.0e-6); - + const int ig0 = rho_basis->ig_gge0; #ifdef _OPENMP #pragma omp parallel for #endif for (int ig = 0; ig < rho_basis->npw; ig++) { - if (rho_basis->gg[ig] !=0) + if (ig== ig0) + { + continue; // skip G=0 + } + else { aux[ig] *= ModuleBase::libm::exp(-1.0 * rho_basis->gg[ig] * ucell.tpiba2 / alpha / 4.0) / (rho_basis->gg[ig] * ucell.tpiba2); diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_cc.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_cc.cpp index 32d5c2339d..c87231149c 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_cc.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_cc.cpp @@ -159,8 +159,7 @@ void Stress_Func::stress_cc(ModuleBase::matrix& sigma, for(int ig = 0;ig< rho_basis->npw;ig++) { const FPTYPE norm_g = sqrt(rho_basis->gg[ig]); - if(norm_g < 1e-4) { continue; -} + if(norm_g < 1e-4) { continue;} for (int l = 0; l < 3; l++) { for (int m = 0;m< 3;m++) From cf96e9398433bcb6192f0d580ce0ae081ab1e7ef Mon Sep 17 00:00:00 2001 From: ubuntu <3158793232@qq.com> Date: Mon, 7 Jul 2025 17:30:12 +0800 Subject: [PATCH 3/3] add the sw --- CMakeLists.txt | 26 ++++++++++----------- source/source_io/read_input.cpp | 4 ++-- source/source_pw/module_pwdft/VNL_in_pw.cpp | 2 +- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4505306534..97f5d06203 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,7 +17,7 @@ option(USE_CUDA_MPI "Enable CUDA-aware MPI" OFF) option(USE_CUDA_ON_DCU "Enable CUDA on DCU" OFF) option(USE_ROCM "Enable ROCm" OFF) option(USE_DSP "Enable DSP" OFF) -option(USE_SUNWAY "Enable Sunway Architecture" OFF) +option(USE_SW "Enable SW Architecture" OFF) option(USE_ABACUS_LIBM "Build libmath from source to speed up" OFF) option(ENABLE_LIBXC "Enable using the LibXC package" OFF) @@ -282,17 +282,17 @@ if (USE_DSP) target_link_libraries(${ABACUS_BIN_NAME} ${MT_HOST_DIR}/hthreads/lib/libhthread_device.a) target_link_libraries(${ABACUS_BIN_NAME} ${MT_HOST_DIR}/hthreads/lib/libhthread_host.a) endif() -if (USE_SUNWAY) - add_compile_definitions(__SUNWAY) - set(SUNWAY ON) - include_directories(${SUNWAY_MATH}/include) - include_directories(${SUNWAY_FFT}/include) - include_directories(${SUNWAY_FFT}/lib/libfftw3.a) +if (USE_SW) + add_compile_definitions(__SW) + set(SW ON) + include_directories(${SW_MATH}/include) + include_directories(${SW_FFT}/include) + include_directories(${SW_FFT}/lib/libfftw3.a) - target_link_libraries(${ABACUS_BIN_NAME} ${SUNWAY_MATH}/libswfft.a) - target_link_libraries(${ABACUS_BIN_NAME} ${SUNWAY_MATH}/libswscalapack.a) - target_link_libraries(${ABACUS_BIN_NAME} ${SUNWAY_MATH}/libswlapack.a) - target_link_libraries(${ABACUS_BIN_NAME} ${SUNWAY_MATH}/libswblas.a) + target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswfft.a) + target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswscalapack.a) + target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswlapack.a) + target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswblas.a) endif() find_package(Threads REQUIRED) @@ -472,8 +472,8 @@ if(MKLROOT) if(CMAKE_CXX_COMPILER_ID MATCHES Intel) list(APPEND math_libs ifcore) endif() -elseif(USE_SUNWAY) - # Sunway architecture can only use its own math library +elseif(USE_SW) + # SW architecture can only use its own math library else() find_package(FFTW3 REQUIRED) find_package(Lapack REQUIRED) diff --git a/source/source_io/read_input.cpp b/source/source_io/read_input.cpp index 17502e3f86..d08e1ac4de 100644 --- a/source/source_io/read_input.cpp +++ b/source/source_io/read_input.cpp @@ -191,7 +191,7 @@ void ReadInput::create_directory(const Parameter& param) } // NOTE: "make_dir_out" must be called by all processes!!! // Maybe it is not good, because only rank 0 can create the directory. - #ifndef __SUNWAY + #ifndef __SW ModuleBase::Global_File::make_dir_out(param.input.suffix, param.input.calculation, out_dir, @@ -200,7 +200,7 @@ void ReadInput::create_directory(const Parameter& param) param.input.out_alllog); // xiaohui add 2013-09-01 #endif const std::string ss = "test -d " + PARAM.inp.read_file_dir; - #ifndef __SUNWAY + #ifndef __SW if (system(ss.c_str())) { ModuleBase::WARNING_QUIT("ReadInput", "please set right files directory for reading in."); diff --git a/source/source_pw/module_pwdft/VNL_in_pw.cpp b/source/source_pw/module_pwdft/VNL_in_pw.cpp index e874634e3b..02911c5918 100644 --- a/source/source_pw/module_pwdft/VNL_in_pw.cpp +++ b/source/source_pw/module_pwdft/VNL_in_pw.cpp @@ -724,7 +724,7 @@ void pseudopot_cell_vnl::init_vnl(UnitCell& cell, const ModulePW::PW_Basis* rho_ double* jl = new double[kkbeta]; double* aux = new double[kkbeta]; - #ifdef __SUNWAY + #ifdef __SW for (int ib = 0; ib < nbeta; ib++) { int flag = 0;