deepmodeling · mohanchen · Jul 12, 2025 · Jul 7, 2025 · Jul 7, 2025 · Jul 7, 2025
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -17,6 +17,7 @@ option(USE_CUDA_MPI "Enable CUDA-aware MPI" OFF)
 option(USE_CUDA_ON_DCU "Enable CUDA on DCU" OFF)
 option(USE_ROCM "Enable ROCm" OFF)
 option(USE_DSP "Enable DSP" OFF)
+option(USE_SW "Enable SW Architecture" OFF)
 
 option(USE_ABACUS_LIBM "Build libmath from source to speed up" OFF)
 option(ENABLE_LIBXC "Enable using the LibXC package" OFF)
@@ -281,6 +282,18 @@ if (USE_DSP)
   target_link_libraries(${ABACUS_BIN_NAME} ${MT_HOST_DIR}/hthreads/lib/libhthread_device.a)
   target_link_libraries(${ABACUS_BIN_NAME} ${MT_HOST_DIR}/hthreads/lib/libhthread_host.a)
 endif()
+if (USE_SW)
+  add_compile_definitions(__SW)
+  set(SW ON)
+  include_directories(${SW_MATH}/include)
+  include_directories(${SW_FFT}/include)
+
+  target_link_libraries(${ABACUS_BIN_NAME} ${SW_FFT}/lib/libfftw3.a)
+  target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswfft.a)
+  target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswscalapack.a)
+  target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswlapack.a)
+  target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswblas.a)
+endif()
 
 find_package(Threads REQUIRED)
 target_link_libraries(${ABACUS_BIN_NAME} Threads::Threads)
@@ -459,6 +472,9 @@ if(MKLROOT)
   if(CMAKE_CXX_COMPILER_ID MATCHES Intel)
     list(APPEND math_libs ifcore)
   endif()
+elseif(USE_SW)
+   list(APPEND math_libs gfortran)
+  # SW architecture can only use its own math library
 else()
   find_package(FFTW3 REQUIRED)
   find_package(Lapack REQUIRED)

diff --git a/source/source_base/global_function.cpp b/source/source_base/global_function.cpp
@@ -61,6 +61,7 @@ void OUT(std::ofstream &ofs, const std::string &name)
 void MAKE_DIR(const std::string &fn)
 {
     //	ModuleBase::TITLE("global_function","MAKE_DIR");
+    #ifndef __SW
     if (GlobalV::MY_RANK == 0)
     {
         std::stringstream ss;
@@ -73,6 +74,7 @@ void MAKE_DIR(const std::string &fn)
             ModuleBase::WARNING_QUIT("MAKE_DIR", fn);
         }
     }
+    #endif
     return;
 }
 

diff --git a/source/source_basis/module_pw/pw_basis.cpp b/source/source_basis/module_pw/pw_basis.cpp
@@ -144,6 +144,7 @@ void PW_Basis::collect_local_pw()
     delete[] this->gcar; this->gcar = new ModuleBase::Vector3<double>[this->npw];
 
     ModuleBase::Vector3<double> f;
+    int gamma_num = 0;
     for(int ig = 0 ; ig < this-> npw ; ++ig)
     {
         int isz = this->ig2isz[ig];
@@ -173,6 +174,12 @@ void PW_Basis::collect_local_pw()
         if(this->gg[ig] < 1e-8)
         {
             this->ig_gge0 = ig;
+            ++gamma_num;
+            if (gamma_num > 1)
+            {
+                ModuleBase::WARNING_QUIT("PW_Basis::collect_local_pw", 
+                                        "More than one gamma point found in the plane wave basis set.\n");
+            }
         }
     }
     return;

diff --git a/source/source_basis/module_pw/pw_gatherscatter.h b/source/source_basis/module_pw/pw_gatherscatter.h
@@ -123,7 +123,7 @@ void PW_Basis::gathers_scatterp(std::complex<T>* in, std::complex<T>* out) const
                 outp[iz] = inp[iz];
             }
         }
-        ModuleBase::timer::tick(this->classname, "gathers_scatterp");
+        // ModuleBase::timer::tick(this->classname, "gathers_scatterp");
         return;
     }
 #ifdef __MPI

diff --git a/source/source_cell/module_symmetry/symm_rho.cpp b/source/source_cell/module_symmetry/symm_rho.cpp
@@ -285,7 +285,7 @@ void Symmetry::rhog_symmetry(std::complex<double> *rhogtot,
 				//assert(rot_count<=nrotk);
 			}//end if section
 		}//end c_index loop
-		sum /= rot_count;
+		if (rot_count!=0) sum/= rot_count;
 		for (int isym = 0; isym < rot_count; ++isym)
 		{
 			rhogtot[ipw_record[isym]] = sum/gphase_record[isym];

diff --git a/source/source_cell/pseudo.cpp b/source/source_cell/pseudo.cpp
@@ -9,6 +9,34 @@ pseudo::~pseudo()
 {
 }
 
+void pseudo::check_betar()
+{
+	bool min_flag = false;
+	for (int ib = 0; ib < nbeta; ib++)
+	{
+		for (int ir = 0; ir < mesh; ir++)
+		{
+			// Get the bit representation of the double
+			uint64_t bits = *(uint64_t*)&betar(ib, ir);
+    		// Extract exponent field (bits 52-62)
+			uint64_t exponent = (bits >> 52) & 0x7FF;
+			// Define exponent threshold for 1e-30
+    		// Calculated as: bias + floor(log2(1e-30))
+    		// Where bias = 1023 and log2(1e-30) ≈ -99.657
+			// Thus threshold is approximately 923
+			if ((exponent <= 923))
+			{
+				min_flag = true;
+				betar(ib, ir) = 0.0;
+			}
+		}
+	}
+	if (min_flag)
+	{
+		std::cout << "WARNING: some of potential function is set to zero cause of less than 1e-30.\n";
+	}
+}
+
 void pseudo::print_pseudo(std::ofstream& ofs)
 {
 	print_pseudo_vl(ofs);

diff --git a/source/source_cell/pseudo.h b/source/source_cell/pseudo.h
@@ -74,6 +74,12 @@ class pseudo
     // uspp
     ModuleBase::realArray qfuncl; // qfuncl(2*lmax+1,nbeta*(nbeta+1)/2,mesh) Q_{mu,nu}(|r|) function for |r|> r_L
     ModuleBase::matrix qqq;       // qqq(nbeta,nbeta) q_{mu,nu}
+    /**
+     * @brief Check the input data for non-normal numbers in the betar.
+     * Subsequent values following non-normal numbers will be reset to zero
+     * to prevent potential computational issues arising from invalid data.
+     */
+    void check_betar();
 
     void print_pseudo_h(std::ofstream& ofs);
     void print_pseudo_atom(std::ofstream& ofs);

diff --git a/source/source_cell/read_pp_upf100.cpp b/source/source_cell/read_pp_upf100.cpp
@@ -319,7 +319,8 @@ void Pseudopot_upf::read_pseudo_nl(std::ifstream& ifs, Atom_pseudo& pp)
             ModuleBase::GlobalFunc::SCAN_END(ifs, "</PP_BETA>");
             pp.kkbeta = (this->kbeta[i] > pp.kkbeta) ? this->kbeta[i] : pp.kkbeta;
         }
-
+        //check the betar for non-normal number
+        pp.check_betar();
         // DIJ
         ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "<PP_DIJ>", false);
         ModuleBase::GlobalFunc::READ_VALUE(ifs, this->nd); // nl_4

diff --git a/source/source_cell/read_pp_upf201.cpp b/source/source_cell/read_pp_upf201.cpp
@@ -530,7 +530,8 @@ void Pseudopot_upf::read_pseudo_upf201_nonlocal(std::ifstream& ifs, Atom_pseudo&
         word = "</PP_BETA." + std::to_string(ib + 1) + ">";
         ModuleBase::GlobalFunc::SCAN_END(ifs, word);
     }
-
+    //check the betar for non-normal number
+    pp.check_betar();
     // Read the hamiltonian terms D_ij
     if (ModuleBase::GlobalFunc::SCAN_BEGIN(ifs, "<PP_DIJ", true, false))
     {

diff --git a/source/source_estate/module_charge/charge.cpp b/source/source_estate/module_charge/charge.cpp
@@ -279,8 +279,11 @@ void Charge::atomic_rho(const int spin_number_need,
                                 for (int ir = 0; ir < mesh; ++ir)
                                 {
                                     double r2 = atom->ncpp.r[ir] * atom->ncpp.r[ir];
-                                    rhoatm[ir] = atom->ncpp.rho_at[ir] / ModuleBase::FOUR_PI / r2;
-                                }
+                                    if (r2!=0)
+                                    {
+                                     rhoatm[ir] = atom->ncpp.rho_at[ir] / ModuleBase::FOUR_PI / r2;
+                                    }
+                                 }
                                 rhoatm[0]
                                     = pow((rhoatm[2] / rhoatm[1]), atom->ncpp.r[1] / (atom->ncpp.r[2] - atom->ncpp.r[1])); // zws add, sunliang updated 2024-03-04
                                 if (rhoatm[0] < 1e-12)

diff --git a/source/source_estate/module_charge/charge_mixing_residual.cpp b/source/source_estate/module_charge/charge_mixing_residual.cpp
@@ -129,15 +129,13 @@ double Charge_Mixing::inner_product_recip_rho(std::complex<double>* rho1, std::c
     auto part_of_noncolin = [&]()
     {
         double sum = 0.0;
+        const int ig0 = this->rhopw->ig_gge0;
 #ifdef _OPENMP
 #pragma omp parallel for reduction(+ : sum)
 #endif
         for (int ig = 0; ig < this->rhopw->npw; ++ig)
         {
-			if (this->rhopw->gg[ig] < 1e-8) 
-			{
-				continue;
-			}
+			if (ig == ig0) {continue;}
 			sum += (conj(rhog1[0][ig]) * rhog2[0][ig]).real() / this->rhopw->gg[ig];
         }
         sum *= fac;
@@ -152,14 +150,13 @@ double Charge_Mixing::inner_product_recip_rho(std::complex<double>* rho1, std::c
 
     case 2: {
         // (1) First part of density error.
+        const int ig0 = this->rhopw->ig_gge0;
 #ifdef _OPENMP
 #pragma omp parallel for reduction(+ : sum)
 #endif
         for (int ig = 0; ig < this->rhopw->npw; ++ig)
         {
-            if (this->rhopw->gg[ig] < 1e-8) {
-                continue;
-}
+            if (ig == ig0) {continue;}
             sum += (conj(rhog1[0][ig] + rhog1[1][ig]) * (rhog2[0][ig] + rhog2[1][ig])).real() / this->rhopw->gg[ig];
         }
         sum *= fac;
@@ -203,19 +200,19 @@ double Charge_Mixing::inner_product_recip_rho(std::complex<double>* rho1, std::c
         } else
         {
             // another part with magnetization
+            const int ig0 = this->rhopw->ig_gge0;
 #ifdef _OPENMP
 #pragma omp parallel for reduction(+ : sum)
 #endif
             for (int ig = 0; ig < this->rhopw->npw; ig++)
             {
-				if (ig == this->rhopw->ig_gge0) 
+				if (ig == ig0) 
 				{
 					continue;
 				}
                 sum += (conj(rhog1[0][ig]) * rhog2[0][ig]).real() / this->rhopw->gg[ig];
             }
             sum *= fac;
-            const int ig0 = this->rhopw->ig_gge0;
             if (ig0 > 0)
             {
                 sum += fac2
@@ -298,14 +295,16 @@ double Charge_Mixing::inner_product_recip_hartree(std::complex<double>* rhog1, s
     auto part_of_rho = [&]()
     {
         double sum = 0.0;
+        const int ig0 = this->rhopw->ig_gge0;
 #ifdef _OPENMP
 #pragma omp parallel for reduction(+ : sum)
 #endif
         for (int ig = 0; ig < this->rhopw->npw; ++ig)
         {
-            if (this->rhopw->gg[ig] < 1e-8) {
+            if (ig == ig0) 
+            {
                 continue;
-}
+            }
             sum += (conj(rhog1[ig]) * rhog2[ig]).real() / this->rhopw->gg[ig];
         }
         sum *= fac;
@@ -319,14 +318,16 @@ double Charge_Mixing::inner_product_recip_hartree(std::complex<double>* rhog1, s
     else if (PARAM.inp.nspin==2)
     {
         // charge density part
+        const int ig0 = this->rhopw->ig_gge0;
 #ifdef _OPENMP
 #pragma omp parallel for reduction(+ : sum)
 #endif
         for (int ig = 0; ig < this->rhopw->npw; ++ig)
         {
-            if (this->rhopw->gg[ig] < 1e-8) {
+            if (ig == ig0) 
+            {
                 continue;
-}
+            }
             sum += (conj(rhog1[ig]) * (rhog2[ig])).real() / this->rhopw->gg[ig];
         }
         sum *= fac;
@@ -369,18 +370,16 @@ double Charge_Mixing::inner_product_recip_hartree(std::complex<double>* rhog1, s
         else if (this->mixing_angle <= 0)
         {
             // sum for tradtional mixing
+            const int ig0 = this->rhopw->ig_gge0;
 #ifdef _OPENMP
 #pragma omp parallel for reduction(+ : sum)
 #endif
             for (int ig = 0; ig < this->rhopw->npw; ig++)
             {
-                if (ig == this->rhopw->ig_gge0) {
-                    continue;
-}
+                if (ig == ig0) {continue;}
                 sum += (conj(rhog1[ig]) * rhog2[ig]).real() / this->rhopw->gg[ig];
             }
             sum *= fac;
-            const int ig0 = this->rhopw->ig_gge0;
             if (ig0 > 0)
             {
                 sum += fac2
@@ -408,18 +407,19 @@ double Charge_Mixing::inner_product_recip_hartree(std::complex<double>* rhog1, s
         else if (this->mixing_angle > 0)
         {
             // sum for angle mixing
+            const int ig0 = this->rhopw->ig_gge0;
 #ifdef _OPENMP
 #pragma omp parallel for reduction(+ : sum)
 #endif
             for (int ig = 0; ig < this->rhopw->npw; ig++)
             {
-                if (ig == this->rhopw->ig_gge0) {
+                if (ig == ig0) 
+                {
                     continue;
-}
+                }
                 sum += (conj(rhog1[ig]) * rhog2[ig]).real() / this->rhopw->gg[ig];
             }
             sum *= fac;
-            const int ig0 = this->rhopw->ig_gge0;
             if (ig0 > 0)
             {
                 sum += fac2

diff --git a/source/source_estate/module_pot/H_Hartree_pw.cpp b/source/source_estate/module_pot/H_Hartree_pw.cpp
@@ -44,18 +44,20 @@ ModuleBase::matrix H_Hartree_pw::v_hartree(const UnitCell &cell,
     double ehart = 0.0;
 
     std::vector<std::complex<double>> vh_g(rho_basis->npw);
+    const int ig0 = rho_basis->ig_gge0;
 #ifdef _OPENMP
 #pragma omp parallel for reduction(+:ehart)
 #endif
     for (int ig = 0; ig < rho_basis->npw; ig++)
     {
-        if (rho_basis->gg[ig] >= 1.0e-8) // LiuXh 20180410
+        if (ig == ig0) 
         {
-            const double fac = ModuleBase::e2 * ModuleBase::FOUR_PI / (cell.tpiba2 * rho_basis->gg[ig]);
-
-            ehart += (conj(Porter[ig]) * Porter[ig]).real() * fac;
-            vh_g[ig] = fac * Porter[ig];
+            continue; // skip G=0
         }
+        const double fac = ModuleBase::e2 * ModuleBase::FOUR_PI / (cell.tpiba2 * rho_basis->gg[ig]);
+        ehart += (conj(Porter[ig]) * Porter[ig]).real() * fac;
+        vh_g[ig] = fac * Porter[ig];
+
     }
 
     Parallel_Reduce::reduce_pool(ehart);

diff --git a/source/source_hamilt/module_ewald/H_Ewald_pw.cpp b/source/source_hamilt/module_ewald/H_Ewald_pw.cpp
@@ -124,10 +124,10 @@ double H_Ewald_pw::compute_ewald(const UnitCell& cell,
 	fact = 1.0;
 
     //GlobalV::ofs_running << "\n pwb.gstart = " << pwb.gstart << std::endl;
-
+    const int ig0 = rho_basis->ig_gge0;
     for (int ig = 0; ig < rho_basis->npw; ig++)
     {
-        if(ig == rho_basis->ig_gge0) 
+        if(ig == ig0) 
         { 
             continue;
         }

diff --git a/source/source_hamilt/module_surchem/cal_pseudo.cpp b/source/source_hamilt/module_surchem/cal_pseudo.cpp
@@ -10,6 +10,7 @@ void surchem::gauss_charge(const UnitCell& cell,
                            Structure_Factor* sf)
 {
     sf->setup_structure_factor(&cell, pgrid, rho_basis); // call strucFac(ntype,ngmc)
+    const int ig0 = rho_basis->ig_gge0; // G=0 index
     for (int it = 0; it < cell.ntype; it++)
     {
         double RCS = GetAtom.atom_RCS[cell.atoms[it].ncpp.psd];

diff --git a/source/source_hamilt/module_surchem/cal_totn.cpp b/source/source_hamilt/module_surchem/cal_totn.cpp
@@ -12,8 +12,9 @@ void surchem::cal_totn(const UnitCell& cell,
     ModuleBase::GlobalFunc::ZEROS(vloc_g, rho_basis->npw);
 
     rho_basis->real2recip(vlocal, vloc_g);  // now n is vloc in Recispace
+    const int ig0 = rho_basis->ig_gge0; // G=0 index
     for (int ig = 0; ig < rho_basis->npw; ig++) {
-        if(ig==rho_basis->ig_gge0)
+        if(ig==ig0)
         {
             N[ig] = Porter_g[ig];
             continue;
@@ -39,17 +40,15 @@ void surchem::induced_charge(const UnitCell& cell, const ModulePW::PW_Basis* rho
     std::complex<double> *induced_rhog = new std::complex<double>[rho_basis->npw];
     ModuleBase::GlobalFunc::ZEROS(induced_rhog, rho_basis->npw);
     rho_basis->real2recip(delta_phi, delta_phig);
+    const int ig0 = rho_basis->ig_gge0; // G=0 index
     for (int ig = 0; ig < rho_basis->npw; ig++)
     {   
-        if(rho_basis->ig_gge0 == ig)
+        if(ig==ig0)
         {
             continue;
         }
-        else
-        {
-            const double fac = ModuleBase::e2 * ModuleBase::FOUR_PI /(cell.tpiba2 * rho_basis->gg[ig]);
-            induced_rhog[ig] = -delta_phig[ig] / fac;
-        }
+        const double fac = ModuleBase::e2 * ModuleBase::FOUR_PI /(cell.tpiba2 * rho_basis->gg[ig]);
+        induced_rhog[ig] = -delta_phig[ig] / fac;
     }
 
     rho_basis->recip2real(induced_rhog, induced_rho);