diff --git a/source/driver_run.cpp b/source/driver_run.cpp
index 9955503d90..95066d0fa7 100644
--- a/source/driver_run.cpp
+++ b/source/driver_run.cpp
@@ -46,7 +46,7 @@ void Driver::driver_run()
                 PARAM.inp.fixed_axes);
 
     ucell.setup_cell(PARAM.globalv.global_in_stru, GlobalV::ofs_running);
-    Check_Atomic_Stru::check_atomic_stru(ucell, PARAM.inp.min_dist_coef);
+    unitcell::check_atomic_stru(ucell, PARAM.inp.min_dist_coef);
 
     //! 2: initialize the ESolver (depends on a set-up ucell after `setup_cell`)
     ModuleESolver::ESolver* p_esolver = ModuleESolver::init_esolver(PARAM.inp, ucell);
diff --git a/source/module_base/mathzone_add1.cpp b/source/module_base/mathzone_add1.cpp
index 0ec48e29b7..770226263a 100644
--- a/source/module_base/mathzone_add1.cpp
+++ b/source/module_base/mathzone_add1.cpp
@@ -138,7 +138,7 @@ void Mathzone_Add1::Cubic_Spline_Interpolation
 	double * const dy
 )
 {	
-	ModuleBase::timer::tick("Mathzone_Add1","Cubic_Spline_Interpolation");
+	ModuleBase::timer::tick("Mathzone","cubic_spline");
 
 #ifdef _OPENMP
 	#pragma omp parallel for schedule(static)
@@ -175,7 +175,7 @@ void Mathzone_Add1::Cubic_Spline_Interpolation
 		//ddy[m] = ddy_tmp;
 	}
 
-	ModuleBase::timer::tick("Mathzone_Add1","Cubic_Spline_Interpolation");
+	ModuleBase::timer::tick("Mathzone","cubic_spline");
 }
 
 /// Interpolation for Numerical Orbitals
diff --git a/source/module_base/module_mixing/broyden_mixing.cpp b/source/module_base/module_mixing/broyden_mixing.cpp
index 3b0f53d19d..0d769ede0e 100644
--- a/source/module_base/module_mixing/broyden_mixing.cpp
+++ b/source/module_base/module_mixing/broyden_mixing.cpp
@@ -104,15 +104,20 @@ template void Broyden_Mixing::tem_cal_coef(
 template <class FPTYPE>
 void Broyden_Mixing::tem_cal_coef(const Mixing_Data& mdata, std::function<double(FPTYPE*, FPTYPE*)> inner_product)
 {
-    ModuleBase::TITLE("Charge_Mixing", "Simplified_Broyden_mixing");
-    ModuleBase::timer::tick("Charge", "Broyden_mixing");
-    if (address != &mdata && address != nullptr)
-        ModuleBase::WARNING_QUIT(
-            "Broyden_mixing",
-            "One Broyden_Mixing object can only bind one Mixing_Data object to calculate coefficients");
-    const int length = mdata.length;
+    ModuleBase::TITLE("Broyden_Mixing", "Simplified_Broyden_mixing");
+    ModuleBase::timer::tick("Broyden_Mixing", "tem_cal_coef");
+
+	if (address != &mdata && address != nullptr)
+	{
+		ModuleBase::WARNING_QUIT(
+				"Broyden_mixing",
+				"One Broyden_Mixing object can only bind one Mixing_Data object to calculate coefficients");
+	}
+
+	const int length = mdata.length;
     FPTYPE* FP_dF = static_cast<FPTYPE*>(dF);
     FPTYPE* FP_F = static_cast<FPTYPE*>(F);
+
     if (ndim_cal_dF > 0)
     {
         ModuleBase::matrix beta_tmp(ndim_cal_dF, ndim_cal_dF);
@@ -140,7 +145,7 @@ void Broyden_Mixing::tem_cal_coef(const Mixing_Data& mdata, std::function<double
         double* work = new double[ndim_cal_dF];   // workspace
         int* iwork = new int[ndim_cal_dF];   // ipiv
         char uu = 'U';
-        int info;
+        int info = 0;
         int m = 1;
         // gamma means the coeficients for mixing
         // but now gamma store <dFi|Fm>, namely c
@@ -150,10 +155,25 @@ void Broyden_Mixing::tem_cal_coef(const Mixing_Data& mdata, std::function<double
             FPTYPE* dFi = FP_dF + i * length;
             gamma[i] = inner_product(dFi, FP_F);
         }
-        // solve aG = c 
-        dsysv_(&uu, &ndim_cal_dF, &m, beta_tmp.c, &ndim_cal_dF, iwork, gamma.data(), &ndim_cal_dF, work, &ndim_cal_dF, &info);
-        if (info != 0)
-            ModuleBase::WARNING_QUIT("Charge_Mixing", "Error when DSYSV.");
+
+		// solve aG = c 
+		dsysv_(&uu, 
+				&ndim_cal_dF, 
+				&m, 
+				beta_tmp.c, 
+				&ndim_cal_dF, 
+				iwork, 
+				gamma.data(), 
+				&ndim_cal_dF, 
+				work, 
+				&ndim_cal_dF, 
+				&info);
+
+		if (info != 0)
+		{
+			ModuleBase::WARNING_QUIT("Charge_Mixing", "Error when DSYSV.");
+		}
+
         // after solving, gamma store the coeficients for mixing
         coef[mdata.start] = 1 + gamma[dFindex_move(0)];
         for (int i = 1; i < ndim_cal_dF; ++i)
@@ -178,6 +198,6 @@ void Broyden_Mixing::tem_cal_coef(const Mixing_Data& mdata, std::function<double
     {
         dFnext[i] = FP_F[i];
     }
-    ModuleBase::timer::tick("Charge", "Broyden_mixing");
+    ModuleBase::timer::tick("Broyden_Mixing", "tem_cal_coef");
 };
 } // namespace Base_Mixing
diff --git a/source/module_base/timer.cpp b/source/module_base/timer.cpp
index 25e676c6e1..824f923557 100644
--- a/source/module_base/timer.cpp
+++ b/source/module_base/timer.cpp
@@ -1,8 +1,3 @@
-//==========================================================
-// AUTHOR : fangwei , mohan
-// DATE : 2008-11-06,
-// UPDATE : Peize Lin at 2019-11-21
-//==========================================================
 #include "timer.h"
 
 #include <cmath>
@@ -53,19 +48,10 @@ double timer::cpu_time()
 // only first call can let t0 = 0,clock begin
 // when enter this function second time , t0 > 0
 //----------------------------------------------------------
-	// static clock_t t0 = clock();
-	// const clock_t t1 = clock() - t0;
-	// return (t1<0) ? 0 : (double)t1/CLOCKS_PER_SEC;
-
-	// static time_t t0 = time(NULL);
-	// const time_t t1 = time(NULL);
-	// double res = difftime(t1, t0);
-	// return (res<0) ? 0 : res;
 	static auto t1 = std::chrono::system_clock::now();
 	const auto t2 = std::chrono::system_clock::now();
 	auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1);
 	return double(duration.count()) * std::chrono::microseconds::period::num / std::chrono::microseconds::period::den;
-		// mohan add, abandon the cross point time 2^32 ~ -2^32 .
 }
 
 void timer::tick(const std::string &class_name,const std::string &name)
@@ -118,13 +104,6 @@ void timer::tick(const std::string &class_name,const std::string &name)
 				timer_one.cpu_second += MPI_Wtime() - timer_one.cpu_start;
 			}
 #else
-			// if(class_name=="electrons"&&name=="c_bands")
-			// {
-			// 	cout<<"call times"<<timer_one.calls<<endl;
-			// 	cout<<"electrons c_bands cost time:"<<endl;
-			// 	cout<<cpu_time()<<"-"<<timer_one.cpu_start<<endl;
-			// }
-
 			timer_one.cpu_second += (cpu_time() - timer_one.cpu_start);
 #endif
 			timer_one.start_flag = true;
@@ -201,7 +180,9 @@ void timer::write_to_json(std::string file_name)
 		order_a ++;
 		// if calss_name == "", it means total time, so we skip it
 		if(timer_pool_A.first == "")
+		{
 			continue;
+		}
 		int order_b = 0;
 		const std::string class_name = timer_pool_A.first;
 		ofs << indent << indent << "{\n";
@@ -214,20 +195,32 @@ void timer::write_to_json(std::string file_name)
 			const Timer_One timer_one = timer_pool_B.second;
 			ofs << indent << indent << indent << indent << "{\n";
 			ofs << indent << indent << indent << indent << "\"name\": \"" << name << "\",\n";
-			ofs << indent << indent << indent << indent << "\"cpu_second\": " << std::setprecision(15) << timer_one.cpu_second << ",\n";
+			ofs << indent << indent << indent << indent << "\"cpu_second\": " 
+				<< std::setprecision(15) << timer_one.cpu_second << ",\n";
 			ofs << indent << indent << indent << indent << "\"calls\": " << timer_one.calls << ",\n";
-			ofs << indent << indent << indent << indent << "\"cpu_second_per_call\": " << double_to_string(timer_one.cpu_second/timer_one.calls) << ",\n";
-			ofs << indent << indent << indent << indent << "\"cpu_second_per_total\": " << double_to_string(timer_one.cpu_second/timer_pool[""]["total"].cpu_second) << "\n";
+			ofs << indent << indent << indent << indent << "\"cpu_second_per_call\": " 
+				<< double_to_string(timer_one.cpu_second/timer_one.calls) << ",\n";
+			ofs << indent << indent << indent << indent << "\"cpu_second_per_total\": " 
+				<< double_to_string(timer_one.cpu_second/timer_pool[""]["total"].cpu_second) << "\n";
+
 			if (order_b == timer_pool_A.second.size())
+			{
 				ofs << indent << indent << indent << indent << "}\n";
+			}
 			else
+			{
 				ofs << indent << indent << indent << indent << "},\n";
+			}
 		}
 		ofs << indent << indent << indent << "]\n";
 		if (order_a == timer_pool.size())
+		{
 			ofs << indent << indent << "}\n";
+		}
 		else
+		{
 			ofs << indent << indent << "},\n";
+		}
 	}
 	ofs << indent << "]\n";
 	ofs << "}\n";
@@ -248,9 +241,12 @@ void timer::print_all(std::ofstream &ofs)
 			const std::string name = timer_pool_B.first;
 			const Timer_One timer_one = timer_pool_B.second;
 			if(timer_pool_order.size() < timer_one.order+1)
+			{
 				timer_pool_order.resize(timer_one.order+1);
+			}
 			//timer_pool_order[timer_one.order] = {{class_name, name}, timer_one}; //qianrui change it to make it compatible with old compiler version
-			timer_pool_order[timer_one.order] = std::pair<std::pair<std::string,std::string>, Timer_One> {std::pair<std::string,std::string >{class_name,name}, timer_one};
+			timer_pool_order[timer_one.order] = std::pair<std::pair<std::string,std::string>, Timer_One> {
+            std::pair<std::string,std::string >{class_name,name}, timer_one};
 		}
 	}
 	std::vector<std::string> class_names;
@@ -266,18 +262,34 @@ void timer::print_all(std::ofstream &ofs)
 		const Timer_One &timer_one = timer_pool_order_A.second;
 
 		if(timer_one.cpu_second < 0)
+		{
 			continue;
+		}
+
+		// only print out timers that are larger than 1%
+		// mohan add 2025-03-09
+		const double percentage_thr = 1.0;
+        const double percentage = timer_one.cpu_second / timer_pool_order[0].second.cpu_second * 100;
+		if(percentage<percentage_thr)
+		{
+			continue;
+		}
+
 		class_names.push_back(class_name);
 		names.push_back(name);
 		times.push_back(timer_one.cpu_second);
 		calls.push_back(timer_one.calls);
 		avgs.push_back(timer_one.cpu_second/timer_one.calls);
 
+
 		// if the total time is too small, we do not calculate the percentage
-		if (timer_pool_order[0].second.cpu_second < 1e-9) {
-    		pers.push_back(0);
-		} else {
-    		pers.push_back(timer_one.cpu_second / timer_pool_order[0].second.cpu_second * 100);
+		if (timer_pool_order[0].second.cpu_second < 1e-9) 
+		{
+			pers.push_back(0);
+		} 
+		else 
+		{
+			pers.push_back(percentage);
 		}
 	}
 	assert(class_names.size() == names.size());
@@ -285,6 +297,7 @@ void timer::print_all(std::ofstream &ofs)
 	assert(class_names.size() == calls.size());
 	assert(class_names.size() == avgs.size());
 	assert(class_names.size() == pers.size());
+
 	std::vector<std::string> titles = {"CLASS_NAME", "NAME", "TIME/s", "CALLS", "AVG/s", "PER/%"};
 	std::vector<std::string> formats = {"%-10s", "%-10s", "%6.2f", "%8d", "%6.2f", "%6.2f"};
 	FmtTable time_statistics(titles, pers.size(), formats, {FmtTable::Align::LEFT, FmtTable::Align::CENTER});
@@ -293,107 +306,6 @@ void timer::print_all(std::ofstream &ofs)
 	std::cout<<table<<std::endl;
 	ofs<<table<<std::endl;
 	write_to_json("time.json");
-}
-}
 
-/*
-void timer::print_all(std::ofstream &ofs)
-{
-//	std::cout<<"\n timer::print_all()"<<std::endl;
-	const double small = 0.1; // cpu = 10^6
-	// if want to print > 1s , set small = 10^6
-
-	std::cout << std::setprecision(2);
-
-	// prepare
-	bool *print_flag = new bool[n_clock];
-	for(int i=0; i<n_clock; i++)
-	{
-		print_flag[i] = false;
-	}
-
-	int type = 1; // 2:calls 1:total_time
-	bool non_reorder = 1;
-
-	std::cout<<"\n  |CLASS_NAME---------|NAME---------------|TIME(Sec)-----|CALLS----|AVG------|PER%-------" << std::endl;
-	ofs <<"\n\n\n\n  |CLASS_NAME---------|NAME---------------|TIME(Sec)-----|CALLS----|AVG------|PER%-------" << std::endl;
-	ofs << std::setprecision(3);
-	for (int i=0; i<n_clock; i++)
-	{
-		int k = 0;
-		double tmp = -1.0;
-
-		if(non_reorder)
-		{
-			k = i;
-		}
-		else
-		{
-			// search in all clocks
-			for(int j=0; j<n_clock; j++)
-			{
-				if(print_flag[j])
-				{
-					continue;
-				}
-				if(type==1)
-				{
-					if(tmp < cpu_second[j])
-					{
-						k = j;
-						tmp = cpu_second[j];
-					}
-				}
-				else if(type==2)
-				{
-					if(tmp < calls[j])
-					{
-						k = j;
-						tmp = calls[j];
-					}
-				}
-			}
-		}
-		print_flag[k]=true;
-
-		if ((cpu_second[k] >= 0 && cpu_second[k] < small) ||
-		        (cpu_second[k] <= 0 && cpu_second[k] > -small))
-		{
-			continue;
-		}
-
-		if( level[k] > 'X' ) continue;
-
-
-		const long double spend_time = cpu_second[k];
-		const double average_spend_time = spend_time/calls[k];
-
-
-		ofs  << " "
-			 << std::setw(2) << level[k]
-			 << std::setw(20) << class_name[k]
-			 << std::setw(20) << name[k]
-			 << std::setw(15) << spend_time
-			 << std::setw(10) << calls[k]
-			 << std::setw(10) << std::setprecision(2) << average_spend_time
-			 << std::setw(10) << spend_time / cpu_second[0] * 100 << "%" << std::endl;
-
-
-		std::cout << std::resetiosflags(ios::scientific);
-
-		std::cout  << " "
-		     << std::setw(2) << level[k]
-			 << std::setw(20) << class_name[k]
-			 << std::setw(20) << name[k]
-			 << std::setw(15) << spend_time
-			 << std::setw(10) << calls[k]
-			 << std::setw(10) << std::setprecision(2) << average_spend_time
-			 << std::setw(10) << spend_time / cpu_second[0] * 100 << "%" << std::endl;
-
-	}
-	std::cout<<" ----------------------------------------------------------------------------------------"<<std::endl;
-	ofs <<" ----------------------------------------------------------------------------------------"<<std::endl;
-	delete[] print_flag;
-	return;
 }
-*/
+}
diff --git a/source/module_cell/check_atomic_stru.cpp b/source/module_cell/check_atomic_stru.cpp
index acdc30076f..b22cfe985d 100644
--- a/source/module_cell/check_atomic_stru.cpp
+++ b/source/module_cell/check_atomic_stru.cpp
@@ -3,8 +3,12 @@
 #include "module_base/element_covalent_radius.h"
 #include "module_base/timer.h"
 
-void Check_Atomic_Stru::check_atomic_stru(UnitCell& ucell, const double& factor)
+namespace unitcell
 {
+
+void check_atomic_stru(UnitCell& ucell, const double& factor)
+{
+    ModuleBase::timer::tick("unitcell", "check_atomic_stru");
     // First we calculate all bond length in the structure,
     // and compare with the covalent_bond_length,
     // if there has bond length is shorter than covalent_bond_length * factor,
@@ -17,7 +21,6 @@ void Check_Atomic_Stru::check_atomic_stru(UnitCell& ucell, const double& factor)
 
     if (GlobalV::MY_RANK == 0)
     {
-        ModuleBase::timer::tick("Check_Atomic_Stru", "Check_Atomic_Stru");
         
         const int ntype = ucell.ntype;
         const double lat0 = ucell.lat0;
@@ -138,7 +141,6 @@ void Check_Atomic_Stru::check_atomic_stru(UnitCell& ucell, const double& factor)
                 } // it2
             } // iat
         }
-        ModuleBase::timer::tick("Check_Atomic_Stru", "Check_Atomic_Stru");
     }
     if (!all_pass || !no_warning)
     {
@@ -165,4 +167,8 @@ void Check_Atomic_Stru::check_atomic_stru(UnitCell& ucell, const double& factor)
             ModuleBase::WARNING_QUIT("Input", "The structure is unreasonable!");
         }
     }
+
+    ModuleBase::timer::tick("unitcell", "check_atomic_stru");
+}
+
 }
diff --git a/source/module_cell/check_atomic_stru.h b/source/module_cell/check_atomic_stru.h
index 03e69918d4..15cc218f83 100644
--- a/source/module_cell/check_atomic_stru.h
+++ b/source/module_cell/check_atomic_stru.h
@@ -3,9 +3,9 @@
 
 #include "unitcell.h"
 
-class Check_Atomic_Stru {
-  public:
-    static void check_atomic_stru(UnitCell& ucell, const double& factor);
+namespace unitcell
+{
+    void check_atomic_stru(UnitCell& ucell, const double& factor);
 };
 
-#endif
\ No newline at end of file
+#endif
diff --git a/source/module_cell/module_neighbor/sltk_grid.cpp b/source/module_cell/module_neighbor/sltk_grid.cpp
index 6fd6575c23..4a409940fe 100644
--- a/source/module_cell/module_neighbor/sltk_grid.cpp
+++ b/source/module_cell/module_neighbor/sltk_grid.cpp
@@ -266,7 +266,7 @@ void Grid::setMemberVariables(std::ofstream& ofs_in, //  output data to ofs
 
 void Grid::Construct_Adjacent(const UnitCell& ucell)
 {
-    ModuleBase::timer::tick("Grid", "Construct_Adjacent_expand");
+    ModuleBase::timer::tick("Grid", "constru_adj");
 
     for  (int i_type = 0; i_type < ucell.ntype; i_type++)
     {
@@ -283,23 +283,17 @@ void Grid::Construct_Adjacent(const UnitCell& ucell)
             this->Construct_Adjacent_near_box(atom);
         }
     }
-    ModuleBase::timer::tick("Grid", "Construct_Adjacent_expand");
+    ModuleBase::timer::tick("Grid", "constru_adj");
 }
 
 void Grid::Construct_Adjacent_near_box(const FAtom& fatom)
 {
-    //	if (test_grid)ModuleBase::TITLE(ofs_running, "Grid", "Construct_Adjacent_expand_periodic");
-    ModuleBase::timer::tick("Grid", "Construct_Adjacent_expand_periodic");
-    int box_i_x, box_i_y, box_i_z;
+    ModuleBase::timer::tick("Grid", "adj_near_box");
+    int box_i_x=0;
+    int box_i_y=0;
+    int box_i_z=0;
     this->getBox(box_i_x, box_i_y, box_i_z, fatom.x, fatom.y, fatom.z);
 
-/*     for (int box_i_x_adj = std::max(box_i_x - 1, 0); box_i_x_adj <= std::min(box_i_x + 1, box_nx - 1); box_i_x_adj++)
-    {
-        for (int box_i_y_adj = std::max(box_i_y - 1, 0); box_i_y_adj <= std::min(box_i_y + 1, box_ny - 1); box_i_y_adj++)
-        {
-            for (int box_i_z_adj = std::max(box_i_z - 1, 0); box_i_z_adj <= std::min(box_i_z + 1, box_nz - 1); box_i_z_adj++)
-            {
- */             
     for (int box_i_x_adj = 0; box_i_x_adj < glayerX + glayerX_minus; box_i_x_adj++)
     {
         for (int box_i_y_adj = 0; box_i_y_adj < glayerY + glayerY_minus; box_i_y_adj++)
@@ -313,7 +307,7 @@ void Grid::Construct_Adjacent_near_box(const FAtom& fatom)
             }
         }
     }
-    ModuleBase::timer::tick("Grid", "Construct_Adjacent_expand_periodic");
+    ModuleBase::timer::tick("Grid", "adj_near_box");
 }
 
 void Grid::Construct_Adjacent_final(const FAtom& fatom1,
diff --git a/source/module_cell/module_symmetry/symmetry.cpp b/source/module_cell/module_symmetry/symmetry.cpp
index 7d892725eb..e9b9624b22 100644
--- a/source/module_cell/module_symmetry/symmetry.cpp
+++ b/source/module_cell/module_symmetry/symmetry.cpp
@@ -1566,7 +1566,7 @@ void Symmetry::rhog_symmetry(std::complex<double> *rhogtot,
     by using int* symflag.
 */ 
 // -------------------------------------------
-ModuleBase::timer::tick("Symmetry","group fft grids");
+    ModuleBase::timer::tick("Symmetry","group_fft_grids");
     for (int i = 0; i< fftnx; ++i)
     {
         //tmp variable
@@ -1622,7 +1622,7 @@ ModuleBase::timer::tick("Symmetry","group fft grids");
             }
         }
     }
-ModuleBase::timer::tick("Symmetry","group fft grids");
+    ModuleBase::timer::tick("Symmetry","group_fft_grids");
 
 // -------------------------------------------
 /*  This code performs symmetry operations on the reciprocal space 
diff --git a/source/module_cell/test/unitcell_test_readpp.cpp b/source/module_cell/test/unitcell_test_readpp.cpp
index 0f25123799..efd8a34336 100644
--- a/source/module_cell/test/unitcell_test_readpp.cpp
+++ b/source/module_cell/test/unitcell_test_readpp.cpp
@@ -346,14 +346,14 @@ TEST_F(UcellDeathTest, CheckStructure) {
     testing::internal::CaptureStdout();
     double factor = 0.2;
     ucell->set_iat2itia();
-    EXPECT_NO_THROW(Check_Atomic_Stru::check_atomic_stru(*ucell, factor));
+    EXPECT_NO_THROW(unitcell::check_atomic_stru(*ucell, factor));
     output = testing::internal::GetCapturedStdout();
     EXPECT_THAT(output,testing::HasSubstr("WARNING: Some atoms are too close!!!"));
     // trial 2
     GlobalV::ofs_running.open("CheckStructure2.txt");
     ::testing::FLAGS_gtest_death_test_style = "threadsafe";
     factor = 0.4;
-    EXPECT_EXIT(Check_Atomic_Stru::check_atomic_stru(*ucell, factor),
+    EXPECT_EXIT(unitcell::check_atomic_stru(*ucell, factor),
                 ::testing::ExitedWithCode(1),
                 "");
     std::ifstream ifs("CheckStructure2.txt");
@@ -370,7 +370,7 @@ TEST_F(UcellDeathTest, CheckStructure) {
     ucell->atoms[0].label = "arbitrary";
     testing::internal::CaptureStdout();
     factor = 0.2;
-    EXPECT_NO_THROW(Check_Atomic_Stru::check_atomic_stru(*ucell, factor));
+    EXPECT_NO_THROW(unitcell::check_atomic_stru(*ucell, factor));
     output = testing::internal::GetCapturedStdout();
     EXPECT_THAT(output,testing::HasSubstr("Notice: symbol 'arbitrary' is not an element "
                            "symbol!!!! set the covalent radius to be 0."));
@@ -378,7 +378,7 @@ TEST_F(UcellDeathTest, CheckStructure) {
     ucell->atoms[0].label = "Fe1";
     testing::internal::CaptureStdout();
     factor = 0.2;
-    EXPECT_NO_THROW(Check_Atomic_Stru::check_atomic_stru(*ucell, factor));
+    EXPECT_NO_THROW(unitcell::check_atomic_stru(*ucell, factor));
     output = testing::internal::GetCapturedStdout();
     EXPECT_THAT(output,testing::HasSubstr("WARNING: Some atoms are too close!!!"));
 }
diff --git a/source/module_elecstate/module_charge/charge.cpp b/source/module_elecstate/module_charge/charge.cpp
index dfd9b4d633..b0d04e9226 100644
--- a/source/module_elecstate/module_charge/charge.cpp
+++ b/source/module_elecstate/module_charge/charge.cpp
@@ -282,8 +282,10 @@ void Charge::atomic_rho(const int spin_number_need,
         double ne_tot = 0.0;
         std::vector<double> ne(spin_number_need);
         int spin0 = 1;
-        if (spin_number_need == 2) { spin0 = spin_number_need;
-}
+		if (spin_number_need == 2) 
+		{ 
+			spin0 = spin_number_need;
+		}
 
         for (int is = 0; is < spin0; ++is)
         {
@@ -301,27 +303,32 @@ void Charge::atomic_rho(const int spin_number_need,
         }
         ModuleBase::GlobalFunc::OUT(GlobalV::ofs_warning, "total electron number from rho", ne_tot);
         ModuleBase::GlobalFunc::OUT(GlobalV::ofs_warning, "should be", PARAM.inp.nelec);
-        for (int is = 0; is < spin_number_need; ++is) {
-            for (int ir = 0; ir < this->rhopw->nrxx; ++ir) {
-                rho_in[is][ir] = rho_in[is][ir] / ne_tot * PARAM.inp.nelec;
-}
-}
+		for (int is = 0; is < spin_number_need; ++is) 
+		{
+			for (int ir = 0; ir < this->rhopw->nrxx; ++ir) 
+			{
+				rho_in[is][ir] = rho_in[is][ir] / ne_tot * PARAM.inp.nelec;
+			}
+		}
         
-        double* nhatgr;
+        double* nhatgr=nullptr;
         GlobalC::paw_cell.get_nhat(nhat,nhatgr);
 
-        for (int is = 0; is < spin_number_need; ++is) {
-            for (int ir = 0; ir < this->rhopw->nrxx; ++ir) {
-                rho_in[is][ir] -= nhat[is][ir];
-}
-}
+		for (int is = 0; is < spin_number_need; ++is) 
+		{
+			for (int ir = 0; ir < this->rhopw->nrxx; ++ir) 
+			{
+				rho_in[is][ir] -= nhat[is][ir];
+			}
+		}
 #endif
     }
     else
     {
-        ModuleBase::ComplexMatrix rho_g3d = [&]() -> ModuleBase::ComplexMatrix {
-            // use interpolation to get three dimension charge density.
-            ModuleBase::ComplexMatrix rho_g3d(spin_number_need, this->rhopw->npw);
+		ModuleBase::ComplexMatrix rho_g3d = [&]() -> ModuleBase::ComplexMatrix 
+		{
+			// use interpolation to get three dimension charge density.
+			ModuleBase::ComplexMatrix rho_g3d(spin_number_need, this->rhopw->npw);
 
             for (int it = 0; it < ucell.ntype; it++)
             {
@@ -469,8 +476,10 @@ void Charge::atomic_rho(const int spin_number_need,
     #ifdef _OPENMP
     #pragma omp for
     #endif
-                            for (int igg = 0; igg < this->rhopw->ngg; igg++)
-                                rho_lgl[igg] /= omega;
+							for (int igg = 0; igg < this->rhopw->ngg; igg++)
+							{
+								rho_lgl[igg] /= omega;
+							}
     #ifdef _OPENMP
                         }
     #endif
@@ -632,10 +641,12 @@ void Charge::atomic_rho(const int spin_number_need,
         {
             this->rhopw->recip2real(&rho_g3d(is, 0), rho_in[is]);
 
-            for (int ir = 0; ir < this->rhopw->nrxx; ++ir) {
-                ne[is] += rho_in[is][ir];
-}
-            ne[is] *= omega / (double)this->rhopw->nxyz;
+			for (int ir = 0; ir < this->rhopw->nrxx; ++ir) 
+			{
+				ne[is] += rho_in[is][ir];
+			}
+
+			ne[is] *= omega / (double)this->rhopw->nxyz;
     #ifdef __MPI
             Parallel_Reduce::reduce_pool(ne[is]);
     #endif
@@ -668,16 +679,14 @@ void Charge::atomic_rho(const int spin_number_need,
                 GlobalV::ofs_warning << " neg = " << neg << " ima = " << ima << " SPIN = " << is << std::endl;
             }
 
-            //		std::cout << " sum rho for spin " << is << " = " << sumrea << std::endl;
-            //		std::cout << " sum rho for spin " << is << " = " << sumrea << std::endl;
-
         } // end is
 
         double ne_tot = 0.0;
         int spin0 = 1;
-        if (spin_number_need == 2) {
-            spin0 = spin_number_need;
-}
+		if (spin_number_need == 2) 
+		{
+			spin0 = spin_number_need;
+		}
         for (int is = 0; is < spin0; ++is)
         {
             GlobalV::ofs_warning << "\n SETUP ATOMIC RHO FOR SPIN " << is + 1 << std::endl;
@@ -686,11 +695,14 @@ void Charge::atomic_rho(const int spin_number_need,
         }
         ModuleBase::GlobalFunc::OUT(GlobalV::ofs_warning, "total electron number from rho", ne_tot);
         ModuleBase::GlobalFunc::OUT(GlobalV::ofs_warning, "should be", PARAM.inp.nelec);
-        for (int is = 0; is < spin_number_need; ++is) {
-            for (int ir = 0; ir < this->rhopw->nrxx; ++ir) {
-                rho_in[is][ir] = rho_in[is][ir] / ne_tot * PARAM.inp.nelec;
-}
-}
+
+        for (int is = 0; is < spin_number_need; ++is) 
+		{
+			for (int ir = 0; ir < this->rhopw->nrxx; ++ir) 
+			{
+				rho_in[is][ir] = rho_in[is][ir] / ne_tot * PARAM.inp.nelec;
+			}
+		}
     }
 
     ModuleBase::timer::tick("Charge", "atomic_rho");
@@ -707,9 +719,10 @@ void Charge::save_rho_before_sum_band()
             ModuleBase::GlobalFunc::DCOPY(kin_r[is], kin_r_save[is], this->rhopw->nrxx);
         }
 #ifdef USE_PAW
-        if(PARAM.inp.use_paw) {
-            ModuleBase::GlobalFunc::DCOPY(nhat[is], nhat_save[is], this->rhopw->nrxx);
-}
+		if(PARAM.inp.use_paw) 
+		{
+			ModuleBase::GlobalFunc::DCOPY(nhat[is], nhat_save[is], this->rhopw->nrxx);
+		}
 #endif
     }
     return;
@@ -810,4 +823,4 @@ void Charge::init_final_scf()
 
     this->allocate_rho_final_scf = true;
     return;
-}
\ No newline at end of file
+}
diff --git a/source/module_elecstate/module_charge/charge.h b/source/module_elecstate/module_charge/charge.h
index 3d18ee7134..1a2c874fd7 100644
--- a/source/module_elecstate/module_charge/charge.h
+++ b/source/module_elecstate/module_charge/charge.h
@@ -46,10 +46,14 @@ class Charge
                                    // wenfei 2021-07-28
     const Parallel_Grid* pgrid = nullptr;
   private:
+
     //temporary
-    double *_space_rho = nullptr, *_space_rho_save = nullptr;
-    std::complex<double> *_space_rhog = nullptr, *_space_rhog_save = nullptr;
-    double *_space_kin_r = nullptr, *_space_kin_r_save = nullptr;
+    double *_space_rho = nullptr; 
+    double *_space_rho_save = nullptr;
+    std::complex<double> *_space_rhog = nullptr;
+    std::complex<double> *_space_rhog_save = nullptr;
+    double *_space_kin_r = nullptr;
+    double *_space_kin_r_save = nullptr;
 
   public:
     double **nhat = nullptr; //compensation charge for PAW
diff --git a/source/module_elecstate/module_charge/charge_mixing_residual.cpp b/source/module_elecstate/module_charge/charge_mixing_residual.cpp
index 5d97dc0fce..b195f1c790 100644
--- a/source/module_elecstate/module_charge/charge_mixing_residual.cpp
+++ b/source/module_elecstate/module_charge/charge_mixing_residual.cpp
@@ -110,8 +110,8 @@ double Charge_Mixing::get_dkin(Charge* chr, const double nelec)
 
 double Charge_Mixing::inner_product_recip_rho(std::complex<double>* rho1, std::complex<double>* rho2)
 {
-    ModuleBase::TITLE("Charge_Mixing", "inner_product_recip_rho");
-    ModuleBase::timer::tick("Charge_Mixing", "inner_product_recip_rho");
+    ModuleBase::TITLE("Charge_Mixing", "recip_rho");
+    ModuleBase::timer::tick("Charge_Mixing", "recip_rho");
 
     std::complex<double>** rhog1 = new std::complex<double>*[PARAM.inp.nspin];
     std::complex<double>** rhog2 = new std::complex<double>*[PARAM.inp.nspin];
@@ -134,10 +134,11 @@ double Charge_Mixing::inner_product_recip_rho(std::complex<double>* rho1, std::c
 #endif
         for (int ig = 0; ig < this->rhopw->npw; ++ig)
         {
-            if (this->rhopw->gg[ig] < 1e-8) {
-                continue;
-}
-            sum += (conj(rhog1[0][ig]) * rhog2[0][ig]).real() / this->rhopw->gg[ig];
+			if (this->rhopw->gg[ig] < 1e-8) 
+			{
+				continue;
+			}
+			sum += (conj(rhog1[0][ig]) * rhog2[0][ig]).real() / this->rhopw->gg[ig];
         }
         sum *= fac;
         return sum;
@@ -207,9 +208,10 @@ double Charge_Mixing::inner_product_recip_rho(std::complex<double>* rho1, std::c
 #endif
             for (int ig = 0; ig < this->rhopw->npw; ig++)
             {
-                if (ig == this->rhopw->ig_gge0) {
-                    continue;
-}
+				if (ig == this->rhopw->ig_gge0) 
+				{
+					continue;
+				}
                 sum += (conj(rhog1[0][ig]) * rhog2[0][ig]).real() / this->rhopw->gg[ig];
             }
             sum *= fac;
@@ -243,20 +245,21 @@ double Charge_Mixing::inner_product_recip_rho(std::complex<double>* rho1, std::c
 #ifdef __MPI
     Parallel_Reduce::reduce_pool(sum);
 #endif
-    ModuleBase::timer::tick("Charge_Mixing", "inner_product_recip_rho");
 
     sum *= *this->omega * 0.5;
 
     delete[] rhog1;
     delete[] rhog2;
+
+    ModuleBase::timer::tick("Charge_Mixing", "recip_rho");
     return sum;
 }
 
 // a simple inner product, now is not used anywhere. For test only.
 double Charge_Mixing::inner_product_recip_simple(std::complex<double>* rho1, std::complex<double>* rho2)
 {
-    ModuleBase::TITLE("Charge_Mixing", "inner_product_recip_simple");
-    ModuleBase::timer::tick("Charge_Mixing", "inner_product_recip_simple");
+    ModuleBase::TITLE("Charge_Mixing", "recip_simple");
+    ModuleBase::timer::tick("Charge_Mixing", "recip_simple");
 
     double rnorm = 0.0;
     // consider a resize for mixing_angle
@@ -274,7 +277,7 @@ double Charge_Mixing::inner_product_recip_simple(std::complex<double>* rho1, std
     Parallel_Reduce::reduce_pool(rnorm);
 #endif
 
-    ModuleBase::timer::tick("Charge_Mixing", "inner_product_recip_simple");
+    ModuleBase::timer::tick("Charge_Mixing", "recip_simple");
 
     return rnorm;
 }
@@ -282,8 +285,8 @@ double Charge_Mixing::inner_product_recip_simple(std::complex<double>* rho1, std
 // a Hartree-like inner product
 double Charge_Mixing::inner_product_recip_hartree(std::complex<double>* rhog1, std::complex<double>* rhog2)
 {
-    ModuleBase::TITLE("Charge_Mixing", "inner_product_recip_hartree");
-    ModuleBase::timer::tick("Charge_Mixing", "inner_product_recip_hartree");
+    ModuleBase::TITLE("Charge_Mixing", "recip_hartree");
+    ModuleBase::timer::tick("Charge_Mixing", "recip_hartree");
 
     static const double fac = ModuleBase::e2 * ModuleBase::FOUR_PI / ((*this->tpiba) * (*this->tpiba));
     static const double fac2 = ModuleBase::e2 * ModuleBase::FOUR_PI / (ModuleBase::TWO_PI * ModuleBase::TWO_PI);
@@ -444,10 +447,10 @@ double Charge_Mixing::inner_product_recip_hartree(std::complex<double>* rhog1, s
     Parallel_Reduce::reduce_pool(sum);
 #endif
 
-    ModuleBase::timer::tick("Charge_Mixing", "inner_product_recip_hartree");
-
     sum *= *this->omega * 0.5;
 
+    ModuleBase::timer::tick("Charge_Mixing", "recip_hartree");
+
     return sum;
 }
 
@@ -456,8 +459,10 @@ double Charge_Mixing::inner_product_real(double* rho1, double* rho2)
     double rnorm = 0.0;
     // consider a resize for mixing_angle
     int resize_tmp = 1;
-    if (PARAM.inp.nspin == 4 && this->mixing_angle > 0) { resize_tmp = 2;
-}
+	if (PARAM.inp.nspin == 4 && this->mixing_angle > 0) 
+	{ 
+		resize_tmp = 2;
+	}
 
 #ifdef _OPENMP
 #pragma omp parallel for reduction(+ : rnorm)
@@ -470,4 +475,4 @@ double Charge_Mixing::inner_product_real(double* rho1, double* rho2)
     Parallel_Reduce::reduce_pool(rnorm);
 #endif
     return rnorm;
-}
\ No newline at end of file
+}
diff --git a/source/module_elecstate/module_charge/charge_mixing_rho.cpp b/source/module_elecstate/module_charge/charge_mixing_rho.cpp
index 772cd69666..724f587ab2 100644
--- a/source/module_elecstate/module_charge/charge_mixing_rho.cpp
+++ b/source/module_elecstate/module_charge/charge_mixing_rho.cpp
@@ -313,8 +313,9 @@ void Charge_Mixing::mix_rho_recip(Charge* chr)
 
 void Charge_Mixing::mix_rho_real(Charge* chr)
 {
-    double* rhor_in;
-    double* rhor_out;
+    double* rhor_in=nullptr;
+    double* rhor_out=nullptr;
+
     if (PARAM.inp.nspin == 1)
     {
         rhor_in = chr->rho_save[0];
@@ -437,12 +438,17 @@ void Charge_Mixing::mix_rho_real(Charge* chr)
             rho_magabs[ir] = chr->rho[0][ir]; // rho
             rho_magabs_save[ir] = chr->rho_save[0][ir]; // rho_save
             // |m| for rho
-            rho_magabs[nrxx + ir] = std::sqrt(chr->rho[1][ir] * chr->rho[1][ir] + chr->rho[2][ir] * chr->rho[2][ir] + chr->rho[3][ir] * chr->rho[3][ir]);
-            // |m| for rho_save
-            rho_magabs_save[nrxx + ir] = std::sqrt(chr->rho_save[1][ir] * chr->rho_save[1][ir] + chr->rho_save[2][ir] * chr->rho_save[2][ir] + chr->rho_save[3][ir] * chr->rho_save[3][ir]);
-        }
+			rho_magabs[nrxx + ir] = std::sqrt(chr->rho[1][ir] * chr->rho[1][ir] 
+					+ chr->rho[2][ir] * chr->rho[2][ir] 
+					+ chr->rho[3][ir] * chr->rho[3][ir]);
+			// |m| for rho_save
+			rho_magabs_save[nrxx + ir] = std::sqrt(chr->rho_save[1][ir] * chr->rho_save[1][ir] 
+					+ chr->rho_save[2][ir] * chr->rho_save[2][ir] 
+					+ chr->rho_save[3][ir] * chr->rho_save[3][ir]);
+		}
         rhor_in = rho_magabs_save;
         rhor_out = rho_magabs;
+
         auto screen = std::bind(&Charge_Mixing::Kerker_screen_real, this, std::placeholders::_1);
         auto twobeta_mix
             = [this, nrxx](double* out, const double* in, const double* sres) {
@@ -467,13 +473,19 @@ void Charge_Mixing::mix_rho_real(Charge* chr)
             = std::bind(&Charge_Mixing::inner_product_real, this, std::placeholders::_1, std::placeholders::_2);
         this->mixing->cal_coef(this->rho_mdata, inner_product);
         this->mixing->mix_data(this->rho_mdata, rhor_out);
+
         // use new |m| and angle to update {mx, my, mz}
         for (int ir = 0; ir < nrxx; ir++)
         {
             chr->rho[0][ir] = rho_magabs[ir]; // rho
-            double norm = std::sqrt(chr->rho[1][ir] * chr->rho[1][ir] + chr->rho[2][ir] * chr->rho[2][ir] + chr->rho[3][ir] * chr->rho[3][ir]);
-            if (norm < 1e-10) { continue;
-}
+			double norm = std::sqrt(chr->rho[1][ir] * chr->rho[1][ir] 
+					+ chr->rho[2][ir] * chr->rho[2][ir] 
+					+ chr->rho[3][ir] * chr->rho[3][ir]);
+
+			if (norm < 1e-10) 
+			{ 
+				continue;
+			}
             double rescale_tmp = rho_magabs[nrxx + ir] / norm; 
             chr->rho[1][ir] *= rescale_tmp;
             chr->rho[2][ir] *= rescale_tmp;
@@ -484,7 +496,8 @@ void Charge_Mixing::mix_rho_real(Charge* chr)
         delete[] rho_magabs_save;
     }
     
-    double *taur_out, *taur_in;
+    double *taur_out=nullptr;
+    double *taur_in=nullptr;
     if ((XC_Functional::get_ked_flag()) && mixing_tau)
     {
         taur_in = chr->kin_r_save[0];
@@ -498,10 +511,11 @@ void Charge_Mixing::mix_rho_real(Charge* chr)
 
 }
 
+
 void Charge_Mixing::mix_rho(Charge* chr)
 {
     ModuleBase::TITLE("Charge_Mixing", "mix_rho");
-    ModuleBase::timer::tick("Charge", "mix_rho");
+    ModuleBase::timer::tick("Charge_Mixing", "mix_rho");
 
     // the charge before mixing.
     const int nrxx = chr->rhopw->nrxx;
@@ -612,9 +626,11 @@ void Charge_Mixing::mix_rho(Charge* chr)
     }
 #endif
 
-    if (new_e_iteration) {
-        new_e_iteration = false;
-}
-    ModuleBase::timer::tick("Charge", "mix_rho");
+	if (new_e_iteration) 
+	{
+		new_e_iteration = false;
+	}
+
+	ModuleBase::timer::tick("Charge_Mixing", "mix_rho");
     return;
-}
\ No newline at end of file
+}
diff --git a/source/module_elecstate/potentials/pot_local.cpp b/source/module_elecstate/potentials/pot_local.cpp
index b387fc0328..101f527efc 100644
--- a/source/module_elecstate/potentials/pot_local.cpp
+++ b/source/module_elecstate/potentials/pot_local.cpp
@@ -11,8 +11,7 @@ namespace elecstate
 //==========================================================
 // This routine computes the local potential in real space
 //==========================================================
-void PotLocal::cal_fixed_v(double *vl_pseudo // store the local pseudopotential
-)
+void PotLocal::cal_fixed_v(double *vl_pseudo) // store the local pseudopotential
 {
     ModuleBase::TITLE("PotLocal", "cal_fixed_v");
     ModuleBase::timer::tick("PotLocal", "cal_fixed_v");
@@ -46,4 +45,4 @@ void PotLocal::cal_fixed_v(double *vl_pseudo // store the local pseudopotential
     return;
 }
 
-} // namespace elecstate
\ No newline at end of file
+} // namespace elecstate
diff --git a/source/module_elecstate/potentials/potential_new.cpp b/source/module_elecstate/potentials/potential_new.cpp
index a5e992beee..0f6b7fd9a1 100644
--- a/source/module_elecstate/potentials/potential_new.cpp
+++ b/source/module_elecstate/potentials/potential_new.cpp
@@ -18,6 +18,7 @@
 
 namespace elecstate
 {
+
 Potential::Potential(const ModulePW::PW_Basis* rho_basis_in,
                      const ModulePW::PW_Basis* rho_basis_smooth_in,
                      const UnitCell* ucell_in,
@@ -101,12 +102,14 @@ void Potential::allocate()
     ModuleBase::TITLE("Potential", "allocate");
     int nrxx = this->rho_basis_->nrxx;
     int nrxx_smooth = this->rho_basis_smooth_->nrxx;
-    if (nrxx == 0) {
-        return;
-}
-    if (nrxx_smooth == 0) {
-        return;
-}
+    if (nrxx == 0) 
+	{
+		return;
+	}
+	if (nrxx_smooth == 0) 
+	{
+		return;
+	}
 
     this->v_effective_fixed.resize(nrxx);
     ModuleBase::Memory::record("Pot::veff_fix", sizeof(double) * nrxx);
@@ -363,4 +366,4 @@ double* Potential::get_vofk_smooth_data()
     return this->vofk_smooth.nc > 0 ? this->d_vofk_smooth : nullptr;
 }
 
-} // namespace elecstate
\ No newline at end of file
+} // namespace elecstate
diff --git a/source/module_esolver/esolver_ks_lcao_tddft.cpp b/source/module_esolver/esolver_ks_lcao_tddft.cpp
index 664e666aac..c1ec58ba6c 100644
--- a/source/module_esolver/esolver_ks_lcao_tddft.cpp
+++ b/source/module_esolver/esolver_ks_lcao_tddft.cpp
@@ -41,7 +41,7 @@ namespace ModuleESolver
 template <typename Device>
 ESolver_KS_LCAO_TDDFT<Device>::ESolver_KS_LCAO_TDDFT()
 {
-    classname = "ESolver_KS_LCAO_TDDFT";
+    classname = "ESolver_rtTDDFT";
     basisname = "LCAO";
 
     // If the device is GPU, we must open use_tensor and use_lapack
@@ -172,24 +172,23 @@ void ESolver_KS_LCAO_TDDFT<Device>::iter_finish(
     // print occupation of each band
     if (iter == 1 && istep <= 2)
     {
-        GlobalV::ofs_running << "---------------------------------------------------------------"
-                                "---------------------------------"
+        GlobalV::ofs_running << " ---------------------------------------------------------"
                              << std::endl;
-        GlobalV::ofs_running << "occupation : " << std::endl;
-        GlobalV::ofs_running << "ik  iband     occ " << std::endl;
-        GlobalV::ofs_running << std::setprecision(6);
+        GlobalV::ofs_running << " occupations of electrons" << std::endl;
+        GlobalV::ofs_running << " k-point  state   occupation" << std::endl;
         GlobalV::ofs_running << std::setiosflags(std::ios::showpoint);
+        GlobalV::ofs_running << std::left;
+        std::setprecision(6);
         for (int ik = 0; ik < kv.get_nks(); ik++)
         {
             for (int ib = 0; ib < PARAM.inp.nbands; ib++)
             {
-                std::setprecision(6);
-                GlobalV::ofs_running << ik + 1 << "     " << ib + 1 << "      " << this->pelec->wg(ik, ib) << std::endl;
+                GlobalV::ofs_running << " " << std::setw(9) 
+                 << ik+1 << std::setw(8) << ib + 1 
+                 << std::setw(12) << this->pelec->wg(ik, ib) << std::endl;
             }
         }
-        GlobalV::ofs_running << std::endl;
-        GlobalV::ofs_running << "---------------------------------------------------------------"
-                                "---------------------------------"
+        GlobalV::ofs_running << " ---------------------------------------------------------"
                              << std::endl;
     }
 
@@ -324,13 +323,16 @@ void ESolver_KS_LCAO_TDDFT<Device>::update_pot(UnitCell& ucell,
     }
 
     // print "eigen value" for tddft
+// it seems uncessary to print out E_ii because the band energies are printed
+/*
     if (conv_esolver)
     {
-        GlobalV::ofs_running << "---------------------------------------------------------------"
-                                "---------------------------------"
+        GlobalV::ofs_running << "----------------------------------------------------------"
+                             << std::endl;
+        GlobalV::ofs_running << " Print E=<psi_i|H|psi_i> " << std::endl;
+        GlobalV::ofs_running << " k-point  state    energy (eV)" << std::endl;
+        GlobalV::ofs_running << "----------------------------------------------------------"
                              << std::endl;
-        GlobalV::ofs_running << "Eii : " << std::endl;
-        GlobalV::ofs_running << "ik  iband    Eii (eV)" << std::endl;
         GlobalV::ofs_running << std::setprecision(6);
         GlobalV::ofs_running << std::setiosflags(std::ios::showpoint);
 
@@ -338,15 +340,14 @@ void ESolver_KS_LCAO_TDDFT<Device>::update_pot(UnitCell& ucell,
         {
             for (int ib = 0; ib < PARAM.inp.nbands; ib++)
             {
-                GlobalV::ofs_running << ik + 1 << "     " << ib + 1 << "      "
-                                     << this->pelec->ekb(ik, ib) * ModuleBase::Ry_to_eV << std::endl;
+                GlobalV::ofs_running << " " << std::setw(7) << ik + 1 
+                                     << std::setw(7) << ib + 1 
+                                     << std::setw(10) << this->pelec->ekb(ik, ib) * ModuleBase::Ry_to_eV 
+                                     << std::endl;
             }
         }
-        GlobalV::ofs_running << std::endl;
-        GlobalV::ofs_running << "---------------------------------------------------------------"
-                                "---------------------------------"
-                             << std::endl;
     }
+*/
 }
 
 template <typename Device>
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp
index 5a48c52c73..2c556ae11e 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp
@@ -25,8 +25,8 @@ void Force_LCAO<double>::allocate(const UnitCell& ucell,
                                   const int& nks,
                                   const std::vector<ModuleBase::Vector3<double>>& kvec_d)
 {
-    ModuleBase::TITLE("Force_LCAO", "allocate");
-    ModuleBase::timer::tick("Force_LCAO", "allocate");
+    ModuleBase::TITLE("Forces", "allocate");
+    ModuleBase::timer::tick("Forces", "allocate");
 
     // need to calculate the derivative in build_ST_new
     bool cal_deri = true;
@@ -36,6 +36,8 @@ void Force_LCAO<double>::allocate(const UnitCell& ucell,
     // liaochen add on 2010/7/12
     // save the results in dense matrix by now.
     // pv.nloc: number of H elements in this proc.
+
+    assert(pv.nloc>0);
     fsr.DSloc_x = new double[pv.nloc];
     fsr.DSloc_y = new double[pv.nloc];
     fsr.DSloc_z = new double[pv.nloc];
@@ -112,10 +114,11 @@ void Force_LCAO<double>::allocate(const UnitCell& ucell,
     if (PARAM.inp.cal_syns)
     {
         cal_deri = false;
+        ModuleBase::timer::tick("Forces", "allocate");
         ModuleBase::WARNING_QUIT("cal_syns", "this function has been broken and will be fixed later.");
     }
 
-    ModuleBase::timer::tick("Force_LCAO", "allocate");
+    ModuleBase::timer::tick("Forces", "allocate");
     return;
 }
 
@@ -147,29 +150,6 @@ void Force_LCAO<double>::finish_ftable(ForceStressArrays& fsr)
     return;
 }
 
-// template <>
-// void Force_LCAO<double>::test(Parallel_Orbitals& pv, double* mm, const std::string& name)
-//{
-//     std::cout << "\n PRINT " << name << std::endl;
-//     std::cout << std::setprecision(6) << std::endl;
-//     for (int i = 0; i < PARAM.globalv.nlocal; i++)
-//     {
-//         for (int j = 0; j < PARAM.globalv.nlocal; j++)
-//         {
-//             if (std::abs(mm[i * PARAM.globalv.nlocal + j]) > 1.0e-5)
-//             {
-//                 std::cout << std::setw(12) << mm[i * PARAM.globalv.nlocal + j];
-//             }
-//             else
-//             {
-//                 std::cout << std::setw(12) << "0";
-//             }
-//         }
-//         std::cout << std::endl;
-//     }
-//     return;
-// }
-
 // be called in force_lo.cpp
 template <>
 void Force_LCAO<double>::ftable(const bool isforce,
@@ -199,8 +179,8 @@ void Force_LCAO<double>::ftable(const bool isforce,
                                 const K_Vectors* kv,
                                 Record_adj* ra)
 {
-    ModuleBase::TITLE("Force_LCAO", "ftable");
-    ModuleBase::timer::tick("Force_LCAO", "ftable");
+    ModuleBase::TITLE("Forces", "ftable");
+    ModuleBase::timer::tick("Forces", "ftable");
 
     // get DM
     const elecstate::DensityMatrix<double, double>* dm
@@ -302,6 +282,6 @@ void Force_LCAO<double>::ftable(const bool isforce,
     // delete DHloc_fixed_x, DHloc_fixed_y, DHloc_fixed_z
     this->finish_ftable(fsr);
 
-    ModuleBase::timer::tick("Force_LCAO", "ftable");
+    ModuleBase::timer::tick("Forces", "ftable");
     return;
 }
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp
index 3fc8390b7c..4b89aa93ed 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp
@@ -35,8 +35,8 @@ void Force_LCAO<std::complex<double>>::allocate(const UnitCell& ucell,
                                                 const int& nks,
                                                 const std::vector<ModuleBase::Vector3<double>>& kvec_d)
 {
-    ModuleBase::TITLE("Force_LCAO", "allocate");
-    ModuleBase::timer::tick("Force_LCAO", "allocate");
+    ModuleBase::TITLE("Forces", "allocate");
+    ModuleBase::timer::tick("Forces", "allocate");
 
     const int nnr = pv.nnr;
 
@@ -160,7 +160,7 @@ void Force_LCAO<std::complex<double>>::allocate(const UnitCell& ucell,
         }
     }
 
-    ModuleBase::timer::tick("Force_LCAO", "allocate");
+    ModuleBase::timer::tick("Forces", "allocate");
     return;
 }
 
@@ -216,8 +216,8 @@ void Force_LCAO<std::complex<double>>::ftable(const bool isforce,
                                               const K_Vectors* kv,
                                               Record_adj* ra)
 {
-    ModuleBase::TITLE("Force_LCAO", "ftable");
-    ModuleBase::timer::tick("Force_LCAO", "ftable");
+    ModuleBase::TITLE("Forces", "ftable");
+    ModuleBase::timer::tick("Forces", "ftable");
 
     elecstate::DensityMatrix<complex<double>, double>* dm
         = dynamic_cast<const elecstate::ElecStateLCAO<std::complex<double>>*>(pelec)->get_DM();
@@ -318,6 +318,6 @@ void Force_LCAO<std::complex<double>>::ftable(const bool isforce,
     }
 #endif
 
-    ModuleBase::timer::tick("Force_LCAO", "ftable");
+    ModuleBase::timer::tick("Forces", "ftable");
     return;
 }
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/pulay_force_stress_center2.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/pulay_force_stress_center2.cpp
index ea49096da5..37f75cd5f0 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/pulay_force_stress_center2.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/pulay_force_stress_center2.cpp
@@ -16,8 +16,8 @@ namespace PulayForceStress
         const double& factor_force,
         const double& factor_stress)
     {
-        ModuleBase::TITLE("Force_LCAO", "cal_pulay_fs_center2");
-        ModuleBase::timer::tick("Force_LCAO", "cal_pulay_fs_center2");
+        ModuleBase::TITLE("Forces", "cal_pulay");
+        ModuleBase::timer::tick("Forces", "cal_pulay");
 
         const int nspin = PARAM.inp.nspin;
         const int nlocal = PARAM.globalv.nlocal;
@@ -52,7 +52,7 @@ namespace PulayForceStress
 
         if (isstress) { StressTools::stress_fill(ucell.lat0, ucell.omega, s); }
 
-        ModuleBase::timer::tick("Force_LCAO", "cal_pulay_fs_center2");
+        ModuleBase::timer::tick("Forces", "cal_pulay");
     }
 
     template<>  //multi-k, provided xy
@@ -70,13 +70,26 @@ namespace PulayForceStress
         const double& factor_force,
         const double& factor_stress)
     {
-        auto stress_func = [](ModuleBase::matrix& local_s, const double& dm2d1_s, const double** dHSx, const double** dHSxy, const double* dtau, const int& irr)
-            {
+		auto stress_func = [](ModuleBase::matrix& local_s, 
+				const double& dm2d1_s, 
+				const double** dHSx, 
+				const double** dHSxy, 
+				const double* dtau, 
+				const int& irr)
+		{
                 int ij = 0;
-                for (int i = 0; i < 3; ++i) { for (int j = i; j < 3; ++j) { local_s(i, j) += dm2d1_s * dHSxy[ij++][irr]; } }
-            };
-        cal_pulay_fs(f, s, dm, ucell, pv, dHSx, dHSxy, nullptr, isforce, isstress, ra, factor_force, factor_stress, stress_func);
-    }
+				for (int i = 0; i < 3; ++i) 
+				{ 
+					for (int j = i; j < 3; ++j) 
+					{ 
+						local_s(i, j) += dm2d1_s * dHSxy[ij++][irr]; 
+					} 
+				}
+		};
+		cal_pulay_fs(f, s, dm, ucell, pv, dHSx, dHSxy, 
+				nullptr, isforce, isstress, ra, 
+				factor_force, factor_stress, stress_func);
+	}
 
     template<>  // multi-k, provided x
     void cal_pulay_fs(
@@ -93,11 +106,24 @@ namespace PulayForceStress
         const double& factor_force,
         const double& factor_stress)
     {
-        auto stress_func = [](ModuleBase::matrix& local_s, const double& dm2d1_s, const double** dHSx, const double** dHSxy, const double* dtau, const int& irr)
+		auto stress_func = [](ModuleBase::matrix& local_s, 
+				const double& dm2d1_s, 
+				const double** dHSx, 
+				const double** dHSxy, 
+				const double* dtau, 
+				const int& irr)
             {
-                for (int i = 0; i < 3; ++i) { for (int j = i; j < 3; ++j) { local_s(i, j) += dm2d1_s * dHSx[i][irr] * dtau[irr * 3 + j]; } }
-            };
-        cal_pulay_fs(f, s, dm, ucell, pv, dHSx, nullptr, dtau, isforce, isstress, ra, factor_force, factor_stress, stress_func);
+                for (int i = 0; i < 3; ++i) 
+				{ 
+					for (int j = i; j < 3; ++j) 
+					{ 
+						local_s(i, j) += dm2d1_s * dHSx[i][irr] * dtau[irr * 3 + j]; 
+					} 
+				}
+			};
+		cal_pulay_fs(f, s, dm, ucell, pv, dHSx, 
+				nullptr, dtau, isforce, isstress, ra, 
+				factor_force, factor_stress, stress_func);
     }
 
-}
\ No newline at end of file
+}
diff --git a/source/module_hamilt_lcao/module_tddft/evolve_psi.cpp b/source/module_hamilt_lcao/module_tddft/evolve_psi.cpp
index b05d062e20..b92a7215f8 100644
--- a/source/module_hamilt_lcao/module_tddft/evolve_psi.cpp
+++ b/source/module_hamilt_lcao/module_tddft/evolve_psi.cpp
@@ -33,15 +33,16 @@ void evolve_psi(const int nband,
                 std::ofstream& ofs_running,
                 const int print_matrix)
 {
-    ofs_running << " evolve_psi::start " << std::endl;
-
     ModuleBase::TITLE("Evolve_psi", "evolve_psi");
+    ofs_running << " Evolving electronic wave functions begins" << std::endl;
+
     time_t time_start = time(nullptr);
     ofs_running << " Start Time : " << ctime(&time_start);
 
 #ifdef __MPI
 
-    hamilt::MatrixBlock<std::complex<double>> h_mat, s_mat;
+    hamilt::MatrixBlock<std::complex<double>> h_mat;
+    hamilt::MatrixBlock<std::complex<double>> s_mat;
     p_hamilt->matrix(h_mat, s_mat);
 
     std::complex<double>* Stmp = new std::complex<double>[pv->nloc];
@@ -59,7 +60,7 @@ void evolve_psi(const int nband,
     std::complex<double>* U_operator = new std::complex<double>[pv->nloc];
     ModuleBase::GlobalFunc::ZEROS(U_operator, pv->nloc);
 
-    // (1)->>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+    // (1)->>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
 
     /// @brief compute H(t+dt/2)
     /// @input H_laststep, Htmp, print_matrix
@@ -69,7 +70,7 @@ void evolve_psi(const int nband,
         half_Hmatrix(pv, nband, nlocal, Htmp, Stmp, H_laststep, S_laststep, ofs_running, print_matrix);
     }
 
-    // (2)->>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+    // (2)->>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
     if (propagator != 3)
     {
         /// @brief compute U_operator
@@ -79,7 +80,7 @@ void evolve_psi(const int nband,
         prop.compute_propagator(nlocal, Stmp, Htmp, H_laststep, U_operator, ofs_running, print_matrix);
     }
 
-    // (3)->>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+    // (3)->>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
     if (propagator != 3)
     {
         /// @brief apply U_operator to the wave function of the previous step for new wave function
@@ -95,14 +96,14 @@ void evolve_psi(const int nband,
         solve_propagation(pv, nband, nlocal, PARAM.mdp.md_dt / ModuleBase::AU_to_FS, Stmp, Htmp, psi_k_laststep, psi_k);
     }
 
-    // (4)->>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+    // (4)->>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
 
     /// @brief normalize psi_k
     /// @input Stmp, psi_not_norm, psi_k, print_matrix
     /// @output psi_k
     norm_psi(pv, nband, nlocal, Stmp, psi_k, ofs_running, print_matrix);
 
-    // (5)->>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+    // (5)->>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
 
     /// @brief compute ekb
     /// @input Htmp, psi_k
@@ -119,7 +120,7 @@ void evolve_psi(const int nband,
     time_t time_end = time(nullptr);
     ModuleBase::GlobalFunc::OUT_TIME("evolve(std::complex)", time_start, time_end);
 
-    ofs_running << " evolve_psi::end " << std::endl;
+    ofs_running << " Evolving electronic wave functions ends" << std::endl;
 
     return;
 }
@@ -348,4 +349,4 @@ template void evolve_psi_tensor<base_device::DEVICE_GPU>(const int nband,
                                                          const bool use_lapack);
 #endif // __CUDA
 
-} // namespace module_tddft
\ No newline at end of file
+} // namespace module_tddft
diff --git a/source/module_hamilt_pw/hamilt_pwdft/fs_nonlocal_tools.cpp b/source/module_hamilt_pw/hamilt_pwdft/fs_nonlocal_tools.cpp
index 22dc8c38e8..e59f7d4dab 100644
--- a/source/module_hamilt_pw/hamilt_pwdft/fs_nonlocal_tools.cpp
+++ b/source/module_hamilt_pw/hamilt_pwdft/fs_nonlocal_tools.cpp
@@ -282,7 +282,6 @@ void FS_Nonlocal_tools<FPTYPE, Device>::cal_becp(const int& ik,
               &ModuleBase::ZERO,
               this->becp + index0,
               this->nkb);
-    ModuleBase::timer::tick("FS_Nonlocal_tools", "cal_becp");
 }
 
 template <typename FPTYPE, typename Device>
diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_cc.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_cc.cpp
index af297fba60..32d5c2339d 100644
--- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_cc.cpp
+++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_cc.cpp
@@ -21,8 +21,8 @@ void Stress_Func<FPTYPE, Device>::stress_cc(ModuleBase::matrix& sigma,
 											const bool *numeric,
                                             const Charge* const chr)
 {
-    ModuleBase::TITLE("Stress_Func","stress_cc");
-	ModuleBase::timer::tick("Stress_Func","stress_cc");
+    ModuleBase::TITLE("Stress","stress_cc");
+	ModuleBase::timer::tick("Stress","stress_cc");
         
 	FPTYPE fact=1.0;
 
@@ -45,7 +45,7 @@ void Stress_Func<FPTYPE, Device>::stress_cc(ModuleBase::matrix& sigma,
 
 	if(judge==0) 
 	{
-		ModuleBase::timer::tick("Stress_Func","stress_cc");
+		ModuleBase::timer::tick("Stress","stress_cc");
 		return;
 	}
 
@@ -192,7 +192,6 @@ void Stress_Func<FPTYPE, Device>::stress_cc(ModuleBase::matrix& sigma,
 	for(int l = 0;l< 3;l++)
 	{
 		sigma(l,l) += sigmadiag;
-//		sigmacc [l][ l] += sigmadiag.real();
 	}
 	for(int l = 0;l< 3;l++)
 	{
@@ -205,7 +204,7 @@ void Stress_Func<FPTYPE, Device>::stress_cc(ModuleBase::matrix& sigma,
 	delete[] rhocg;
 	delete[] psic;
 
-	ModuleBase::timer::tick("Stress_Func","stress_cc");
+	ModuleBase::timer::tick("Stress","stress_cc");
 	return;
 }
 
@@ -225,17 +224,18 @@ void Stress_Func<FPTYPE, Device>::deriv_drhoc
 	int type
 )
 {
-	int  igl0;
-	double gx = 0, rhocg1 = 0;
-	//double *aux = new double[mesh];
+	int igl0=0;
+	double gx = 0.0;
+    double rhocg1 = 0.0;
 	std::vector<double> aux(mesh);
 	this->device = base_device::get_device_type<Device>(this->ctx);
+
 	// the modulus of g for a given shell
 	// the fourier transform
 	// auxiliary memory for integration
-	//double *gx_arr = new double[rho_basis->ngg];
 	std::vector<double> gx_arr(rho_basis->ngg);
 	double *gx_arr_d = nullptr;
+
 	// counter on radial mesh points
 	// counter on g shells
 	// lower limit for loop on ngl
@@ -243,7 +243,8 @@ void Stress_Func<FPTYPE, Device>::deriv_drhoc
 	//
 	// G=0 term
 	//
-	if(type == 0){
+	if(type == 0)
+	{
 		if (rho_basis->gg_uniq[0] < 1.0e-8)
 		{
 			drhocg [0] = 0.0;
@@ -253,7 +254,9 @@ void Stress_Func<FPTYPE, Device>::deriv_drhoc
 		{
 			igl0 = 0;
 		}
-	} else {
+	} 
+	else 
+	{
 		if (rho_basis->gg_uniq[0] < 1.0e-8)
 		{
 			for (int ir = 0;ir < mesh; ir++)
@@ -273,7 +276,7 @@ void Stress_Func<FPTYPE, Device>::deriv_drhoc
 
 	//
 	// G <> 0 term
-	//]
+	//
 
 #ifdef _OPENMP
 #pragma omp parallel for
@@ -288,7 +291,9 @@ void Stress_Func<FPTYPE, Device>::deriv_drhoc
 	double *rab_d = nullptr;
 	double *aux_d = nullptr;
 	double *drhocg_d = nullptr;
-	if(this->device == base_device::GpuDevice ) {
+
+	if(this->device == base_device::GpuDevice) 
+	{
 		resmem_var_op()(r_d, mesh);
 		resmem_var_op()(rhoc_d, mesh);
 		resmem_var_op()(rab_d, mesh);
@@ -303,25 +308,29 @@ void Stress_Func<FPTYPE, Device>::deriv_drhoc
 		syncmem_var_h2d_op()(rhoc_d, rhoc, mesh);
 	}
 
-	if(this->device == base_device::GpuDevice) {
+	if(this->device == base_device::GpuDevice) 
+	{
 		hamilt::cal_stress_drhoc_aux_op<FPTYPE, Device>()(
-			r_d,rhoc_d,gx_arr_d+igl0,rab_d,drhocg_d+igl0,mesh,igl0,rho_basis->ngg-igl0,omega,type);
+				r_d,rhoc_d,gx_arr_d+igl0,rab_d,drhocg_d+igl0,mesh,igl0,rho_basis->ngg-igl0,omega,type);
 		syncmem_var_d2h_op()(drhocg+igl0, drhocg_d+igl0, rho_basis->ngg-igl0);	
 
-	} else {
+	} 
+	else 
+	{
 		hamilt::cal_stress_drhoc_aux_op<FPTYPE, Device>()(
-			r,rhoc,gx_arr.data()+igl0,rab,drhocg+igl0,mesh,igl0,rho_basis->ngg-igl0,omega,type);
+				r,rhoc,gx_arr.data()+igl0,rab,drhocg+igl0,mesh,igl0,rho_basis->ngg-igl0,omega,type);
 
 	}
-    delmem_var_op()(r_d);
+	delmem_var_op()(r_d);
     delmem_var_op()(rhoc_d);
     delmem_var_op()(rab_d);
     delmem_var_op()(gx_arr_d);
     delmem_var_op()(drhocg_d);
+
 	return;
 }
 
 template class Stress_Func<double, base_device::DEVICE_CPU>;
 #if ((defined __CUDA) || (defined __ROCM))
 template class Stress_Func<double, base_device::DEVICE_GPU>;
-#endif
\ No newline at end of file
+#endif
diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_ewa.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_ewa.cpp
index 5dbe2cd6b4..8cd845e313 100644
--- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_ewa.cpp
+++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_ewa.cpp
@@ -16,8 +16,8 @@ void Stress_Func<FPTYPE, Device>::stress_ewa(const UnitCell& ucell,
 											 ModulePW::PW_Basis* rho_basis, 
 											 const bool is_pw)
 {
-    ModuleBase::TITLE("Stress_Func","stress_ewa");
-    ModuleBase::timer::tick("Stress_Func","stress_ewa");
+    ModuleBase::TITLE("Stress","stress_ewa");
+    ModuleBase::timer::tick("Stress","stress_ewa");
 
     FPTYPE charge=0;
     for(int it=0; it < ucell.ntype; it++)
@@ -28,18 +28,23 @@ void Stress_Func<FPTYPE, Device>::stress_ewa(const UnitCell& ucell,
     //upperbound is a safe upper bound for the error ON THE ENERGY
 
     FPTYPE alpha=2.9;
-    FPTYPE upperbound;
-    do{
-       alpha-=0.1;
-       if(alpha==0.0)
-          ModuleBase::WARNING_QUIT("stres_ew", "optimal alpha not found");
-       upperbound =ModuleBase::e2 * pow(charge,2) * sqrt( 2 * alpha / (ModuleBase::TWO_PI)) * erfc(sqrt(ucell.tpiba2 * rho_basis->ggecut / 4.0 / alpha));
-    }
+    FPTYPE upperbound=0.0;
+
+	do{
+		alpha-=0.1;
+		if(alpha==0.0)
+		{
+			ModuleBase::WARNING_QUIT("stres_ew", "optimal alpha not found");
+		}
+		upperbound =ModuleBase::e2 * pow(charge,2) * 
+         sqrt( 2 * alpha / (ModuleBase::TWO_PI)) 
+         * erfc(sqrt(ucell.tpiba2 * rho_basis->ggecut / 4.0 / alpha));
+	}
     while(upperbound>1e-7);
 
     //G-space sum here
     //Determine if this processor contains G=0 and set the constant term 
-    FPTYPE sdewald;
+    FPTYPE sdewald=0.0;
 	const int ig0 = rho_basis->ig_gge0;
     if( ig0 >= 0)
 	{
@@ -53,7 +58,10 @@ void Stress_Func<FPTYPE, Device>::stress_ewa(const UnitCell& ucell,
     //sdewald is the diagonal term 
 
     FPTYPE fact=1.0;
-    if (PARAM.globalv.gamma_only_pw && is_pw) fact=2.0;
+	if (PARAM.globalv.gamma_only_pw && is_pw) 
+	{
+		fact=2.0;
+	}
 //    else fact=1.0;
 
 #ifdef _OPENMP
@@ -71,7 +79,8 @@ void Stress_Func<FPTYPE, Device>::stress_ewa(const UnitCell& ucell,
 #endif
 
 	// Calculate ig range of this thread, avoid thread sync
-	int ig, ig_end;
+	int ig=0;
+    int ig_end=0;
 	ModuleBase::TASK_DIST_1D(num_threads, thread_id, rho_basis->npw, ig, ig_end);
 	ig_end = ig + ig_end;
 
@@ -79,12 +88,18 @@ void Stress_Func<FPTYPE, Device>::stress_ewa(const UnitCell& ucell,
     FPTYPE arg;
     std::complex<FPTYPE> rhostar;
     FPTYPE sewald;
+
     for(; ig < ig_end; ig++)
 	{
-		if(ig == ig0)  continue;
+		if(ig == ig0)  
+		{
+			continue;
+		}
+
 		g2 = rho_basis->gg[ig]* ucell.tpiba2;
 		g2a = g2 /4.0/alpha;
 		rhostar=std::complex<FPTYPE>(0.0,0.0);
+
 		for(int it=0; it < ucell.ntype; it++)
 		{
 			for(int i=0; i<ucell.atoms[it].na; i++)
@@ -102,22 +117,24 @@ void Stress_Func<FPTYPE, Device>::stress_ewa(const UnitCell& ucell,
 		{
 			for(int m=0;m<l+1;m++)
 			{
-				local_sigma(l, m) += sewald * ucell.tpiba2 * 2.0 * rho_basis->gcar[ig][l] * rho_basis->gcar[ig][m] / g2 * (g2a + 1);
+				local_sigma(l, m) += sewald * ucell.tpiba2 * 2.0 
+					* rho_basis->gcar[ig][l] * rho_basis->gcar[ig][m] / g2 * (g2a + 1);
 			}
 		}
 	}
 
     //R-space sum here (only for the processor that contains G=0) 
     int mxr = 200;
-    int *irr;
+    int *irr=nullptr;
     ModuleBase::Vector3<FPTYPE> *r;
-    FPTYPE *r2;
-    FPTYPE rr;
+    FPTYPE *r2=nullptr;
+    FPTYPE rr=0.0;
     ModuleBase::Vector3<FPTYPE> d_tau;
     FPTYPE r0[3];
     FPTYPE rmax=0.0;
     int nrm=0;
-    FPTYPE fac;
+    FPTYPE fac=0.0;
+
 	if(ig0 >= 0)
 	{
 		r = new ModuleBase::Vector3<FPTYPE>[mxr];
@@ -129,8 +146,13 @@ void Stress_Func<FPTYPE, Device>::stress_ewa(const UnitCell& ucell,
 		rmax = 4.0/sqa/ucell.lat0;
 
 		// collapse it, ia, jt, ja loop into a single loop
-		long long ijat, ijat_end;
-		int it, i, jt, j;
+		long long ijat;
+        long long ijat_end;
+		int it=0;
+        int i=0;
+        int jt=0;
+        int j=0;
+
 		ModuleBase::TASK_DIST_1D(num_threads, thread_id, (long long)ucell.nat * ucell.nat, ijat, ijat_end);
 		ijat_end = ijat + ijat_end;
 		ucell.ijat2iaitjajt(ijat, &i, &it, &j, &jt);
@@ -143,10 +165,13 @@ void Stress_Func<FPTYPE, Device>::stress_ewa(const UnitCell& ucell,
 				d_tau = ucell.atoms[it].tau[i] - ucell.atoms[jt].tau[j];
 				//generates nearest-neighbors shells 
 				H_Ewald_pw::rgen(d_tau, rmax, irr, ucell.latvec, ucell.G, r, r2, nrm);
-				for(int nr=0 ; nr<nrm ; nr++)
+				for(int nr=0; nr<nrm; nr++)
 				{
 					rr=sqrt(r2[nr]) * ucell.lat0;
-					fac = -ModuleBase::e2/2.0/ucell.omega*pow(ucell.lat0,2)*ucell.atoms[it].ncpp.zv * ucell.atoms[jt].ncpp.zv / pow(rr,3) * (erfc(sqa*rr)+rr * sq8a_2pi *  ModuleBase::libm::exp(-alpha * pow(rr,2)));
+					fac = -ModuleBase::e2/2.0/ucell.omega*
+                          pow(ucell.lat0,2)*ucell.atoms[it].ncpp.zv * ucell.atoms[jt].ncpp.zv 
+                          / pow(rr,3) * (erfc(sqa*rr)+rr * sq8a_2pi *  ModuleBase::libm::exp(-alpha * pow(rr,2)));
+
 					for(int l=0; l<3; l++)
 					{
 						for(int m=0; m<l+1; m++)
@@ -204,8 +229,7 @@ void Stress_Func<FPTYPE, Device>::stress_ewa(const UnitCell& ucell,
 		}
 	}
 
-	// this->print(GlobalV::ofs_running, "ewald stress", stression);
-	ModuleBase::timer::tick("Stress_Func","stress_ewa");
+	ModuleBase::timer::tick("Stress","stress_ewa");
 
 	return;
 }
@@ -213,4 +237,4 @@ void Stress_Func<FPTYPE, Device>::stress_ewa(const UnitCell& ucell,
 template class Stress_Func<double, base_device::DEVICE_CPU>;
 #if ((defined __CUDA) || (defined __ROCM))
 template class Stress_Func<double, base_device::DEVICE_GPU>;
-#endif
\ No newline at end of file
+#endif
diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_gga.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_gga.cpp
index 54c2ee3b51..8fd5dcd7e4 100644
--- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_gga.cpp
+++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_gga.cpp
@@ -10,19 +10,20 @@ void Stress_Func<FPTYPE, Device>::stress_gga(const UnitCell& ucell,
                                              ModulePW::PW_Basis* rho_basis,
                                              const Charge* const chr)
 {
-    ModuleBase::TITLE("Stress_Func","stress_gga");
-	ModuleBase::timer::tick("Stress_Func","stress_gga");
+    ModuleBase::TITLE("Stress","stress_gga");
+	ModuleBase::timer::tick("Stress","stress_gga");
      
 	int func_type = XC_Functional::get_func_type();
 	if (func_type == 0 || func_type == 1)
 	{
-		ModuleBase::timer::tick("Stress_Func","stress_gga");
+		ModuleBase::timer::tick("Stress","stress_gga");
 		return;
 	}
 
 	FPTYPE sigma_gradcorr[3][3];
 	std::vector<FPTYPE> stress_gga;
-	FPTYPE dum1, dum2;
+	FPTYPE dum1=0.0;
+    FPTYPE dum2=0.0;
 	ModuleBase::matrix dum3;
 	// call gradcorr to evaluate gradient correction to stress
 	// the first three terms are etxc, vtxc and v, which
@@ -55,11 +56,11 @@ void Stress_Func<FPTYPE, Device>::stress_gga(const UnitCell& ucell,
         }
 	}
 
-	ModuleBase::timer::tick("Stress_Func","stress_gga");
+	ModuleBase::timer::tick("Stress","stress_gga");
 	return;
 }
 
 template class Stress_Func<double, base_device::DEVICE_CPU>;
 #if ((defined __CUDA) || (defined __ROCM))
 template class Stress_Func<double, base_device::DEVICE_GPU>;
-#endif
\ No newline at end of file
+#endif
diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_har.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_har.cpp
index 9f2e702147..4d6cfb3387 100644
--- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_har.cpp
+++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_har.cpp
@@ -12,8 +12,10 @@ void Stress_Func<FPTYPE, Device>::stress_har(const UnitCell& ucell,
 											 const bool is_pw, 
 											 const Charge* const chr)
 {
-    ModuleBase::TITLE("Stress_Func","stress_har");
-	ModuleBase::timer::tick("Stress_Func","stress_har");
+    ModuleBase::TITLE("Stress","stress_har");
+	ModuleBase::timer::tick("Stress","stress_har");
+
+    assert(rho_basis->nmaxgr>0);
 
 	std::complex<FPTYPE> *aux = new std::complex<FPTYPE>[rho_basis->nmaxgr];
 
@@ -55,44 +57,44 @@ void Stress_Func<FPTYPE, Device>::stress_har(const UnitCell& ucell,
 	//=============================
 	rho_basis->real2recip(aux, aux);
 
-//	FPTYPE ehart=0;
+
 #ifndef _OPENMP
 	ModuleBase::matrix& local_sigma = sigma;
 #else
 #pragma omp parallel
-{
-	ModuleBase::matrix local_sigma(3, 3);
-	#pragma omp for
-#endif
-	for (int ig = 0 ; ig < rho_basis->npw ; ++ig)
 	{
-		const FPTYPE g2 = rho_basis->gg[ig];
-		if(g2 < 1e-8) { continue;
-}
-		//const FPTYPE fac = ModuleBase::e2 * ModuleBase::FOUR_PI / (ucell.tpiba2 * GlobalC::sf.gg [ig]);
-		//ehart += ( conj( Porter[j] ) * Porter[j] ).real() * fac;
-		//vh_g[ig] = fac * Porter[j];
-		FPTYPE shart= ( conj( aux[ig] ) * aux[ig] ).real()/(ucell.tpiba2 * g2);
-		for(int l=0;l<3;l++)
+		ModuleBase::matrix local_sigma(3, 3);
+#pragma omp for
+#endif
+		for (int ig = 0 ; ig < rho_basis->npw ; ++ig)
 		{
-			for(int m=0;m<l+1;m++)
+			const FPTYPE g2 = rho_basis->gg[ig];
+			if(g2 < 1e-8) 
+			{ 
+				continue;
+			}
+
+			FPTYPE shart= ( conj( aux[ig] ) * aux[ig] ).real()/(ucell.tpiba2 * g2);
+			for(int l=0;l<3;l++)
 			{
-				local_sigma(l, m) += shart * 2 * rho_basis->gcar[ig][l] * rho_basis->gcar[ig][m] / g2;
+				for(int m=0;m<l+1;m++)
+				{
+					local_sigma(l, m) += shart * 2 * rho_basis->gcar[ig][l] * rho_basis->gcar[ig][m] / g2;
+				}
 			}
 		}
-	}
 #ifdef _OPENMP
-	#pragma omp critical(stress_har_reduce)
-	{
-		for(int l=0;l<3;l++)
+#pragma omp critical(stress_har_reduce)
 		{
-			for(int m=0;m<l+1;m++)
+			for(int l=0;l<3;l++)
 			{
-				sigma(l,m) += local_sigma(l,m);
+				for(int m=0;m<l+1;m++)
+				{
+					sigma(l,m) += local_sigma(l,m);
+				}
 			}
 		}
 	}
-}
 #endif
 
     for(int l=0;l<3;l++)
@@ -103,10 +105,7 @@ void Stress_Func<FPTYPE, Device>::stress_har(const UnitCell& ucell,
 		}
 	}
 
-    //        Parallel_Reduce::reduce_pool( ehart );
-//        ehart *= 0.5 * ucell.omega;
-        //psic(:)=(0.0,0.0)
-	if(is_pw&&PARAM.globalv.gamma_only_pw)
+	if(is_pw && PARAM.globalv.gamma_only_pw)
 	{
 		for(int l=0;l<3;l++)
 		{
@@ -126,18 +125,23 @@ void Stress_Func<FPTYPE, Device>::stress_har(const UnitCell& ucell,
 			}
 		}
 	}
-	
+
 	for(int l=0;l<3;l++)
 	{
-		if(is_pw) { sigma(l,l) -= elecstate::H_Hartree_pw::hartree_energy /ucell.omega;
-		} else { sigma(l,l) += elecstate::H_Hartree_pw::hartree_energy /ucell.omega;
-}
+		if(is_pw) 
+		{ 
+			sigma(l,l) -= elecstate::H_Hartree_pw::hartree_energy /ucell.omega;
+		} 
+		else 
+		{ 
+			sigma(l,l) += elecstate::H_Hartree_pw::hartree_energy /ucell.omega;
+		}
 		for(int m=0;m<l;m++)
 		{
 			sigma(m,l)=sigma(l,m);
 		}
 	}
-	
+
 	for(int l=0;l<3;l++)
 	{
 		for(int m=0;m<3;m++)
@@ -147,11 +151,11 @@ void Stress_Func<FPTYPE, Device>::stress_har(const UnitCell& ucell,
 	}
 
 	delete[] aux;
-	ModuleBase::timer::tick("Stress_Func","stress_har");
+	ModuleBase::timer::tick("Stress","stress_har");
 	return;
 }
 
 template class Stress_Func<double, base_device::DEVICE_CPU>;
 #if ((defined __CUDA) || (defined __ROCM))
 template class Stress_Func<double, base_device::DEVICE_GPU>;
-#endif
\ No newline at end of file
+#endif
diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_kin.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_kin.cpp
index a5afae2340..0de21d6178 100644
--- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_kin.cpp
+++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_kin.cpp
@@ -14,9 +14,11 @@ void Stress_Func<FPTYPE, Device>::stress_kin(ModuleBase::matrix& sigma,
                                              const UnitCell& ucell_in,
                                              const psi::Psi<complex<FPTYPE>, Device>* psi_in)
 {
-    ModuleBase::TITLE("Stress_Func","stress_kin");
-	ModuleBase::timer::tick("Stress_Func","stress_kin");
+    ModuleBase::TITLE("Stress","stress_kin");
+	ModuleBase::timer::tick("Stress","stress_kin");
+
 	this->ucell = &ucell_in;
+
 	hamilt::FS_Kin_tools<FPTYPE, Device> kin_tool(*this->ucell, p_kv, wfc_basis, wg);
     for (int ik = 0; ik < wfc_basis->nks; ++ik)
     {
@@ -35,11 +37,11 @@ void Stress_Func<FPTYPE, Device>::stress_kin(ModuleBase::matrix& sigma,
     }
     kin_tool.symmetrize_stress(p_symm, sigma);
 		
-	ModuleBase::timer::tick("Stress_Func","stress_kin");
+	ModuleBase::timer::tick("Stress","stress_kin");
 	return;
 }
 
 template class Stress_Func<double, base_device::DEVICE_CPU>;
 #if ((defined __CUDA) || (defined __ROCM))
 template class Stress_Func<double, base_device::DEVICE_GPU>;
-#endif
\ No newline at end of file
+#endif
diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_loc.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_loc.cpp
index 42e619c9bc..a24fe639d1 100644
--- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_loc.cpp
+++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_loc.cpp
@@ -16,8 +16,8 @@ void Stress_Func<FPTYPE, Device>::stress_loc(const UnitCell& ucell,
                                              const bool is_pw,
                                              const Charge* const chr)
 {
-    ModuleBase::TITLE("Stress_Func","stress_loc");
-    ModuleBase::timer::tick("Stress_Func","stress_loc");
+    ModuleBase::TITLE("Stress","stress_loc");
+    ModuleBase::timer::tick("Stress","stress_loc");
 
 	std::vector<FPTYPE> dvloc(rho_basis->npw);
     FPTYPE evloc=0.0;
@@ -48,16 +48,15 @@ void Stress_Func<FPTYPE, Device>::stress_loc(const UnitCell& ucell,
 		// calculate the actual task length of this block
 		int ir_end = std::min(irb + block_ir, rho_basis->nrxx);
 
-		{ // is = 0
-			for (int ir = irb; ir < ir_end; ++ir)
-			{ // initialize aux
-				aux[ir] = std::complex<FPTYPE>(chr->rho[0][ir], 0.0 );
-			}
+		for (int ir = irb; ir < ir_end; ++ir)
+		{
+			aux[ir] = std::complex<FPTYPE>(chr->rho[0][ir], 0.0 );
 		}
+
 		if(nspin_rho == 2)
 		{
 			for (int ir = irb; ir < ir_end; ++ir)
-			{ // accumulate aux
+			{ 
 				aux[ir] += std::complex<FPTYPE>(chr->rho[1][ir], 0.0 );
 			}
 		}
@@ -74,69 +73,68 @@ void Stress_Func<FPTYPE, Device>::stress_loc(const UnitCell& ucell,
 		{
 			for (int ig=0; ig<rho_basis->npw; ig++)
 			{
-                if (rho_basis->ig_gge0 == ig) {
-                    evloc += vloc(it, rho_basis->ig2igg[ig])
-                             * (p_sf->strucFac(it, ig) * conj(aux[ig])).real();
-                } else {
-                    evloc += vloc(it, rho_basis->ig2igg[ig])
-                             * (p_sf->strucFac(it, ig) * conj(aux[ig]) * fact).real();
-}
-            }
+				if (rho_basis->ig_gge0 == ig) 
+				{
+					evloc += vloc(it, rho_basis->ig2igg[ig])
+						* (p_sf->strucFac(it, ig) * conj(aux[ig])).real();
+				} 
+				else 
+				{
+					evloc += vloc(it, rho_basis->ig2igg[ig])
+						* (p_sf->strucFac(it, ig) * conj(aux[ig]) * fact).real();
+				}
+			}
 		}
     }
+
+
     for (int it = 0; it < ucell.ntype; ++it)
     {
         const Atom* atom = &ucell.atoms[it];
 		if(atom->coulomb_potential)
 		{
-		//
-		// special case: pseudopotential is coulomb 1/r potential
-		//
+		    // special case: pseudopotential is coulomb 1/r potential
 			this->dvloc_coulomb (ucell,atom->ncpp.zv, dvloc.data(), rho_basis);
-		//
 		}
 		else
 		{
-		//
-		// normal case: dvloc contains dV_loc(G)/dG
-		//
+		    // normal case: dvloc contains dV_loc(G)/dG
 			this->dvloc_of_g ( atom->ncpp.msh, atom->ncpp.rab.data(), atom->ncpp.r.data(),
 					atom->ncpp.vloc_at.data(), atom->ncpp.zv, dvloc.data(), rho_basis, ucell);
-		//
 		}
 #ifndef _OPENMP
 		ModuleBase::matrix &local_sigma = sigma;
 #else
 #pragma omp parallel
-{
-		ModuleBase::matrix local_sigma(3, 3);
-		#pragma omp for
-#endif
-		for(int ig = 0;ig< rho_basis->npw;ig++)
 		{
-			for (int l = 0;l< 3;l++)
+			ModuleBase::matrix local_sigma(3, 3);
+#pragma omp for
+#endif
+			for(int ig = 0;ig< rho_basis->npw;ig++)
 			{
-				for (int m = 0; m<l+1;m++)
+				for (int l = 0;l< 3;l++)
 				{
-                    local_sigma(l, m) = local_sigma(l, m)
-                                        + (conj(aux[ig]) * p_sf->strucFac(it, ig)).real() * 2.0
-                                              * dvloc[rho_basis->ig2igg[ig]] * ucell.tpiba2
-                                              * rho_basis->gcar[ig][l] * rho_basis->gcar[ig][m] * fact;
-                }
+					for (int m = 0; m<l+1;m++)
+					{
+						local_sigma(l, m) = local_sigma(l, m)
+							+ (conj(aux[ig]) * p_sf->strucFac(it, ig)).real() * 2.0
+							* dvloc[rho_basis->ig2igg[ig]] * ucell.tpiba2
+							* rho_basis->gcar[ig][l] * rho_basis->gcar[ig][m] * fact;
+					}
+				}
 			}
-		}
 #ifdef _OPENMP
-		#pragma omp critical(stress_loc_reduce)
-		{
-			for(int l=0;l<3;l++)
+#pragma omp critical(stress_loc_reduce)
 			{
-				for(int m=0;m<l+1;m++)
+				for(int l=0;l<3;l++)
 				{
-					sigma(l,m) += local_sigma(l,m);
+					for(int m=0;m<l+1;m++)
+					{
+						sigma(l,m) += local_sigma(l,m);
+					}
 				}
 			}
 		}
-}
 #endif
 	}
 
@@ -162,9 +160,7 @@ void Stress_Func<FPTYPE, Device>::stress_loc(const UnitCell& ucell,
 		}
 	}
 
-
-
-	ModuleBase::timer::tick("Stress_Func","stress_loc");
+	ModuleBase::timer::tick("Stress","stress_loc");
 	return;
 }
 
@@ -307,4 +303,4 @@ void Stress_Func<FPTYPE, Device>::dvloc_coulomb(const UnitCell& ucell,
 template class Stress_Func<double, base_device::DEVICE_CPU>;
 #if ((defined __CUDA) || (defined __ROCM))
 template class Stress_Func<double, base_device::DEVICE_GPU>;
-#endif
\ No newline at end of file
+#endif
diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_mgga.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_mgga.cpp
index 85f49d1138..c528b0f417 100644
--- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_mgga.cpp
+++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_mgga.cpp
@@ -18,11 +18,12 @@ void Stress_Func<FPTYPE, Device>::stress_mgga(const UnitCell& ucell,
                                               ModulePW::PW_Basis_K* wfc_basis,
                                               const psi::Psi<complex<FPTYPE>, Device>* psi_in)
 {
-    ModuleBase::timer::tick("Stress_Func", "stress_mgga");
+	if (PARAM.inp.nspin == 4) 
+	{
+		ModuleBase::WARNING_QUIT("stress_mgga", "noncollinear stress + mGGA not implemented");
+	}
 
-    if (PARAM.inp.nspin == 4) {
-        ModuleBase::WARNING_QUIT("stress_mgga", "noncollinear stress + mGGA not implemented");
-}
+    ModuleBase::timer::tick("Stress", "stress_mgga");
 
     int current_spin = 0;
     const int nrxx = wfc_basis->nrxx;
@@ -37,6 +38,7 @@ void Stress_Func<FPTYPE, Device>::stress_mgga(const UnitCell& ucell,
         ct::DataTypeToEnum<std::complex<FPTYPE>>::value,
         ct::DeviceTypeToEnum<ct_Device>::value,
         {nrxx * 3});
+
     auto crosstaus = ct::Tensor(
         ct::DataTypeToEnum<FPTYPE>::value,
         ct::DeviceTypeToEnum<ct_Device>::value,
@@ -46,18 +48,28 @@ void Stress_Func<FPTYPE, Device>::stress_mgga(const UnitCell& ucell,
     auto cal_stress_mgga_solver = hamilt::cal_stress_mgga_op<std::complex<FPTYPE>, Device>();
     for (int ik = 0; ik < p_kv->get_nks(); ik++)
     {
-        if (PARAM.inp.nspin == 2) {
-            current_spin = p_kv->isk[ik];
-}
+        if (PARAM.inp.nspin == 2) 
+		{
+			current_spin = p_kv->isk[ik];
+		}
         const int npw = p_kv->ngk[ik];
 
         for (int ibnd = 0; ibnd < PARAM.inp.nbands; ibnd++)
         {
             const FPTYPE w1 = wg(ik, ibnd) / ucell.omega;
             const std::complex<FPTYPE>* psi = &psi_in[0](ik, ibnd, 0);
-            XC_Functional::grad_wfc<std::complex<FPTYPE>, Device>(ik, ucell.tpiba, wfc_basis, psi, gradwfc.data<std::complex<FPTYPE>>());
-            cal_stress_mgga_solver(
-                current_spin, nrxx, w1, gradwfc.data<std::complex<FPTYPE>>(), crosstaus.data<FPTYPE>());
+			XC_Functional::grad_wfc<std::complex<FPTYPE>, Device>(ik, 
+					ucell.tpiba, 
+					wfc_basis, 
+					psi, 
+					gradwfc.data<std::complex<FPTYPE>>());
+
+			cal_stress_mgga_solver(
+					current_spin, 
+					nrxx, 
+					w1, 
+					gradwfc.data<std::complex<FPTYPE>>(), 
+					crosstaus.data<FPTYPE>());
         } // band loop
         // delete[] psi;
     } // k loop
@@ -88,13 +100,15 @@ void Stress_Func<FPTYPE, Device>::stress_mgga(const UnitCell& ucell,
             for (int iy = 0; iy < 3; iy++)
             {
                 FPTYPE delta = 0.0;
-                if (ix == iy) {
-                    delta = 1.0;
-}
-                for (int ir = 0; ir < nrxx; ir++)
+				if (ix == iy) 
+				{
+					delta = 1.0;
+				}
+				for (int ir = 0; ir < nrxx; ir++)
                 {
-                    FPTYPE x
-                        = v_ofk(is, ir) * (chr->kin_r[is][ir] * delta + crosstaus_pack[is][ipol2xy[ix][iy] * nrxx + ir]);
+                    FPTYPE x = v_ofk(is, ir) * 
+                          (chr->kin_r[is][ir] * delta 
+                           + crosstaus_pack[is][ipol2xy[ix][iy] * nrxx + ir]);
                     sigma_mgga[ix][iy] += x;
                 }
             }
@@ -117,11 +131,11 @@ void Stress_Func<FPTYPE, Device>::stress_mgga(const UnitCell& ucell,
             sigma(i, j) += sigma_mgga[i][j] / wfc_basis->nxyz;
         }
     }
-    ModuleBase::timer::tick("Stress_Func", "stress_mgga");
+    ModuleBase::timer::tick("Stress", "stress_mgga");
     return;
 }
 
 template class Stress_Func<double, base_device::DEVICE_CPU>;
 #if ((defined __CUDA) || (defined __ROCM))
 template class Stress_Func<double, base_device::DEVICE_GPU>;
-#endif
\ No newline at end of file
+#endif
diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_nl.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_nl.cpp
index 1af82ba153..41554e37ba 100644
--- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_nl.cpp
+++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_nl.cpp
@@ -21,13 +21,13 @@ void Stress_Func<FPTYPE, Device>::stress_nl(ModuleBase::matrix& sigma,
                                             const pseudopot_cell_vnl& nlpp_in,
                                             const UnitCell& ucell_in)
 {
-    ModuleBase::TITLE("Stress_Func", "stress_nl");
+    ModuleBase::TITLE("Stress", "stress_nl");
     // skip nkb==0
     if (nlpp_in.nkb == 0 || psi_in == nullptr || wfc_basis == nullptr)
     {
         return;
     }
-    ModuleBase::timer::tick("Stress_Func", "stress_nl");
+    ModuleBase::timer::tick("Stress", "stress_nl");
 
     FPTYPE* stress_device = nullptr;
     resmem_var_op()(stress_device, 9);
@@ -98,7 +98,7 @@ void Stress_Func<FPTYPE, Device>::stress_nl(ModuleBase::matrix& sigma,
         p_symm->symmetrize_mat3(sigma, ucell_in.lat);
     } // end symmetry
 
-    ModuleBase::timer::tick("Stress_Func", "stress_nl");
+    ModuleBase::timer::tick("Stress", "stress_nl");
 }
 
 template <typename FPTYPE, typename Device>
@@ -108,9 +108,10 @@ void Stress_Func<FPTYPE, Device>::get_dvnl1(ModuleBase::ComplexMatrix& vkb,
                                             Structure_Factor* p_sf,
                                             ModulePW::PW_Basis_K* wfc_basis)
 {
-    if (PARAM.inp.test_pp) {
-        ModuleBase::TITLE("Stress_Func", "get_dvnl1");
-}
+	if (PARAM.inp.test_pp) 
+	{
+		ModuleBase::TITLE("Stress", "get_dvnl1");
+	}
 
     const int npw = wfc_basis->npwk[ik];
     const int lmaxkb = nlpp->lmaxkb;
@@ -120,6 +121,9 @@ void Stress_Func<FPTYPE, Device>::get_dvnl1(ModuleBase::ComplexMatrix& vkb,
     }
 
     const int nhm = nlpp->nhm;
+
+    assert(npw>0);
+
     ModuleBase::matrix vkb1(nhm, npw);
     vkb1.zero_out();
     FPTYPE* vq = new FPTYPE[npw];
@@ -143,22 +147,25 @@ void Stress_Func<FPTYPE, Device>::get_dvnl1(ModuleBase::ComplexMatrix& vkb,
     int jkb = 0;
     for (int it = 0; it < this->ucell->ntype; it++)
     {
-        if (PARAM.inp.test_pp > 1) {
-            ModuleBase::GlobalFunc::OUT("it", it);
-}
-        // calculate beta in G-space using an interpolation table
+		if (PARAM.inp.test_pp > 1) 
+		{
+			ModuleBase::GlobalFunc::OUT("it", it);
+		}
+		// calculate beta in G-space using an interpolation table
         const int nbeta = this->ucell->atoms[it].ncpp.nbeta;
         const int nh = this->ucell->atoms[it].ncpp.nh;
 
-        if (PARAM.inp.test_pp > 1) {
-            ModuleBase::GlobalFunc::OUT("nbeta", nbeta);
-}
+		if (PARAM.inp.test_pp > 1) 
+		{
+			ModuleBase::GlobalFunc::OUT("nbeta", nbeta);
+		}
 
         for (int nb = 0; nb < nbeta; nb++)
         {
-            if (PARAM.inp.test_pp > 1) {
-                ModuleBase::GlobalFunc::OUT("ib", nb);
-}
+			if (PARAM.inp.test_pp > 1) 
+			{
+				ModuleBase::GlobalFunc::OUT("ib", nb);
+			}
 #ifdef _OPENMP
 #pragma omp parallel for
 #endif
@@ -166,9 +173,6 @@ void Stress_Func<FPTYPE, Device>::get_dvnl1(ModuleBase::ComplexMatrix& vkb,
             {
                 const FPTYPE gnorm = gk[ig].norm() * this->ucell->tpiba;
 
-                // cout << "\n gk[ig] = " << gk[ig].x << " " << gk[ig].y << " " << gk[ig].z;
-                // cout << "\n gk.norm = " << gnorm;
-
                 vq[ig] = ModuleBase::PolyInt::Polynomial_Interpolation(nlpp->tab,
                                                                        it,
                                                                        nb,
@@ -176,7 +180,7 @@ void Stress_Func<FPTYPE, Device>::get_dvnl1(ModuleBase::ComplexMatrix& vkb,
                                                                        PARAM.globalv.dq,
                                                                        gnorm);
 
-            } // enddo
+            }
 
             // add spherical harmonic part
             for (int ih = 0; ih < nh; ih++)
@@ -209,7 +213,7 @@ void Stress_Func<FPTYPE, Device>::get_dvnl1(ModuleBase::ComplexMatrix& vkb,
             {
                 for (int ig = 0; ig < npw; ig++)
                 {
-                    std::complex<FPTYPE> pref = pref_tab[int(nlpp->nhtol(it, ih)) % imag_pow_period]; //?
+                    std::complex<FPTYPE> pref = pref_tab[int(nlpp->nhtol(it, ih)) % imag_pow_period];
                     vkb(jkb + ih, ig) = vkb1(ih, ig) * sk[ig] * pref;
                 }
 
@@ -223,15 +227,17 @@ void Stress_Func<FPTYPE, Device>::get_dvnl1(ModuleBase::ComplexMatrix& vkb,
     return;
 } // end get_dvnl1
 
+
 template <typename FPTYPE, typename Device>
 void Stress_Func<FPTYPE, Device>::get_dvnl2(ModuleBase::ComplexMatrix& vkb,
                                             const int ik,
                                             Structure_Factor* p_sf,
                                             ModulePW::PW_Basis_K* wfc_basis)
 {
-    if (PARAM.inp.test_pp) {
-        ModuleBase::TITLE("Stress", "get_dvnl2");
-}
+	if (PARAM.inp.test_pp) 
+	{
+		ModuleBase::TITLE("Stress", "get_dvnl2");
+	}
     //	ModuleBase::timer::tick("Stress","get_dvnl2");
     const int npw = wfc_basis->npwk[ik];
     const int lmaxkb = nlpp->lmaxkb;
@@ -241,6 +247,9 @@ void Stress_Func<FPTYPE, Device>::get_dvnl2(ModuleBase::ComplexMatrix& vkb,
     }
 
     const int nhm = nlpp->nhm;
+
+    assert(npw>0);
+
     ModuleBase::matrix vkb1(nhm, npw);
     FPTYPE* vq = new FPTYPE[npw];
     const int x1 = (lmaxkb + 1) * (lmaxkb + 1);
@@ -262,30 +271,31 @@ void Stress_Func<FPTYPE, Device>::get_dvnl2(ModuleBase::ComplexMatrix& vkb,
     int jkb = 0;
     for (int it = 0; it < this->ucell->ntype; it++)
     {
-        if (PARAM.inp.test_pp > 1) {
-            ModuleBase::GlobalFunc::OUT("it", it);
-}
+        if (PARAM.inp.test_pp > 1) 
+		{
+			ModuleBase::GlobalFunc::OUT("it", it);
+		}
         // calculate beta in G-space using an interpolation table
         const int nbeta = this->ucell->atoms[it].ncpp.nbeta;
         const int nh = this->ucell->atoms[it].ncpp.nh;
 
-        if (PARAM.inp.test_pp > 1) {
-            ModuleBase::GlobalFunc::OUT("nbeta", nbeta);
-}
+		if (PARAM.inp.test_pp > 1) 
+		{
+			ModuleBase::GlobalFunc::OUT("nbeta", nbeta);
+		}
 
         for (int nb = 0; nb < nbeta; nb++)
         {
-            if (PARAM.inp.test_pp > 1) {
-                ModuleBase::GlobalFunc::OUT("ib", nb);
-}
+			if (PARAM.inp.test_pp > 1) 
+			{
+				ModuleBase::GlobalFunc::OUT("ib", nb);
+			}
 #ifdef _OPENMP
 #pragma omp parallel for
 #endif
             for (int ig = 0; ig < npw; ig++)
             {
                 const FPTYPE gnorm = gk[ig].norm() * this->ucell->tpiba;
-                // cout << "\n gk[ig] = " << gk[ig].x << " " << gk[ig].y << " " << gk[ig].z;
-                // cout << "\n gk.norm = " << gnorm;
                 vq[ig] = hamilt::Nonlocal_maths<FPTYPE, Device>::Polynomial_Interpolation_nl(nlpp->tab,
                                                                                              it,
                                                                                              nb,
@@ -309,7 +319,7 @@ void Stress_Func<FPTYPE, Device>::get_dvnl2(ModuleBase::ComplexMatrix& vkb,
                     }
                 }
             } // end ih
-        }     // end nbeta
+        } // end nbeta
 
         // vkb1 contains all betas including angular part for type nt
         // now add the structure factor and factor (-i)^l
@@ -331,11 +341,10 @@ void Stress_Func<FPTYPE, Device>::get_dvnl2(ModuleBase::ComplexMatrix& vkb,
             jkb += nh;
             delete[] sk;
         } // end ia
-    }     // enddo
+    } // enddo
 
     delete[] gk;
     delete[] vq;
-    //	ModuleBase::timer::tick("Stress","get_dvnl2");
 
     return;
 }
@@ -370,4 +379,4 @@ FPTYPE Stress_Func<FPTYPE, Device>::Polynomial_Interpolation_nl(const ModuleBase
 template class Stress_Func<double, base_device::DEVICE_CPU>;
 #if ((defined __CUDA) || (defined __ROCM))
 template class Stress_Func<double, base_device::DEVICE_GPU>;
-#endif
\ No newline at end of file
+#endif
diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_onsite.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_onsite.cpp
index acce052e83..b649d219b8 100644
--- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_onsite.cpp
+++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_onsite.cpp
@@ -14,12 +14,12 @@ void Stress_Func<FPTYPE, Device>::stress_onsite(ModuleBase::matrix& sigma,
                                             const psi::Psi<complex<FPTYPE>, Device>* psi_in,
                                             ModuleSymmetry::Symmetry* p_symm)
 {
-    ModuleBase::TITLE("Stress_Func", "stress_onsite");
+    ModuleBase::TITLE("Stress", "stress_onsite");
     if(psi_in == nullptr || wfc_basis == nullptr)
     {
         return;
     }
-    ModuleBase::timer::tick("Stress_Func", "stress_onsite");
+    ModuleBase::timer::tick("Stress", "stress_onsite");
 
     FPTYPE* stress_device = nullptr;
     resmem_var_op()(stress_device, 9);
@@ -29,7 +29,7 @@ void Stress_Func<FPTYPE, Device>::stress_onsite(ModuleBase::matrix& sigma,
     auto* onsite_p = projectors::OnsiteProjector<FPTYPE, Device>::get_instance();
 
     const int nks = wfc_basis->nks;
-    for (int ik = 0; ik < nks; ik++) // loop k points
+    for (int ik = 0; ik < nks; ik++)
     {
         // skip zero weights to speed up
         int nbands_occ = wg.nc;
@@ -45,6 +45,7 @@ void Stress_Func<FPTYPE, Device>::stress_onsite(ModuleBase::matrix& sigma,
 
         // calculate becp = <psi|beta> for all beta functions
         onsite_p->get_fs_tools()->cal_becp(ik, npm);
+
         // calculate dbecp = <psi|d(beta)/dR> for all beta functions
         // calculate stress = \sum <psi|d(beta_j)/dR> * <psi|beta_i> * D_{ij}
         for (int ipol = 0; ipol < 3; ipol++)
@@ -56,20 +57,29 @@ void Stress_Func<FPTYPE, Device>::stress_onsite(ModuleBase::matrix& sigma,
                 if(PARAM.inp.dft_plus_u)
                 {
                     auto* dftu = ModuleDFTU::DFTU::get_instance();
-                    onsite_p->get_fs_tools()->cal_stress_dftu(ik, npm, stress_device_tmp, dftu->orbital_corr.data(), dftu->get_eff_pot_pw(0), dftu->get_size_eff_pot_pw(), wg.c);
+					onsite_p->get_fs_tools()->cal_stress_dftu(ik, 
+							npm, 
+							stress_device_tmp, 
+							dftu->orbital_corr.data(), 
+							dftu->get_eff_pot_pw(0), 
+							dftu->get_size_eff_pot_pw(), 
+							wg.c);
                 }
                 if(PARAM.inp.sc_mag_switch)
                 {
-                    spinconstrain::SpinConstrain<std::complex<double>>& sc = spinconstrain::SpinConstrain<std::complex<double>>::getScInstance();
+                    spinconstrain::SpinConstrain<std::complex<double>>& sc = 
+                        spinconstrain::SpinConstrain<std::complex<double>>::getScInstance();
                     const std::vector<ModuleBase::Vector3<double>>& lambda = sc.get_sc_lambda();
                     onsite_p->get_fs_tools()->cal_stress_dspin(ik, npm, stress_device_tmp, lambda.data(), wg.c);
                 }
             }
         }
     }
+
     // transfer stress from device to host
     syncmem_var_d2h_op()(sigma_onsite.data(), stress_device, 9);
     delmem_var_op()(stress_device);
+
     // sum up forcenl from all processors
     for (int l = 0; l < 3; l++)
     {
@@ -82,6 +92,7 @@ void Stress_Func<FPTYPE, Device>::stress_onsite(ModuleBase::matrix& sigma,
             Parallel_Reduce::reduce_all(sigma_onsite[l * 3 + m]); // qianrui fix a bug for kpar > 1
         }
     }
+
     // rescale the stress with 1/omega
     for (int ipol = 0; ipol < 3; ipol++)
     {
@@ -98,16 +109,16 @@ void Stress_Func<FPTYPE, Device>::stress_onsite(ModuleBase::matrix& sigma,
             sigma(ipol, jpol) = sigma_onsite[ipol * 3 + jpol];
         }
     }
-    // do symmetry
+
     if (ModuleSymmetry::Symmetry::symm_flag == 1)
     {
         p_symm->symmetrize_mat3(sigma, ucell_in.lat);
-    } // end symmetry
+    }
 
-    ModuleBase::timer::tick("Stress_Func", "stress_onsite");
+    ModuleBase::timer::tick("Stress", "stress_onsite");
 }
 
 template class Stress_Func<double, base_device::DEVICE_CPU>;
 #if ((defined __CUDA) || (defined __ROCM))
 template class Stress_Func<double, base_device::DEVICE_GPU>;
-#endif
\ No newline at end of file
+#endif
diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_us.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_us.cpp
index 464308f317..3273584050 100644
--- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_us.cpp
+++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_us.cpp
@@ -15,8 +15,8 @@ void Stress_PW<FPTYPE, Device>::stress_us(ModuleBase::matrix& sigma,
                                           const pseudopot_cell_vnl& nlpp,
                                           const UnitCell& ucell)
 {
-    ModuleBase::TITLE("Stress_Func", "stress_us");
-    ModuleBase::timer::tick("Stress_Func", "stress_us");
+    ModuleBase::TITLE("Stress", "stress_us");
+    ModuleBase::timer::tick("Stress", "stress_us");
 
     const int npw = rho_basis->npw;
     const int nh_tot = nlpp.nhm * (nlpp.nhm + 1) / 2;
@@ -184,7 +184,7 @@ void Stress_PW<FPTYPE, Device>::stress_us(ModuleBase::matrix& sigma,
     }
     sigma += stressus;
 
-    ModuleBase::timer::tick("Stress_Func", "stress_us");
+    ModuleBase::timer::tick("Stress", "stress_us");
     return;
 }
 
@@ -202,9 +202,10 @@ void Stress_Func<FPTYPE, Device>::dqvan2(const pseudopot_cell_vnl& nlpp,
                                          const ModuleBase::matrix& dylmk0,
                                          std::complex<FPTYPE>* dqg)
 {
-    if (PARAM.inp.test_pp) {
-        ModuleBase::TITLE("Stress", "dqvan2");
-}
+	if (PARAM.inp.test_pp) 
+	{
+		ModuleBase::TITLE("Stress", "dqvan2");
+	}
 
     // computes the indices which correspond to ih,jh
     const int nb = nlpp.indv(itype, ih);
@@ -294,4 +295,4 @@ void Stress_Func<FPTYPE, Device>::dqvan2(const pseudopot_cell_vnl& nlpp,
 template class Stress_PW<double, base_device::DEVICE_CPU>;
 #if ((defined __CUDA) || (defined __ROCM))
 template class Stress_PW<double, base_device::DEVICE_GPU>;
-#endif
\ No newline at end of file
+#endif
diff --git a/source/module_hsolver/diago_bpcg.cpp b/source/module_hsolver/diago_bpcg.cpp
old mode 100755
new mode 100644
diff --git a/source/module_hsolver/hsolver_pw.cpp b/source/module_hsolver/hsolver_pw.cpp
index 02f44f857b..3206a85672 100644
--- a/source/module_hsolver/hsolver_pw.cpp
+++ b/source/module_hsolver/hsolver_pw.cpp
@@ -482,7 +482,7 @@ void HSolverPW<T, Device>::hamiltSolvePsiK(hamilt::Hamilt<T, Device>* hm,
         const int ndim = psi.get_current_ngk();
         // hpsi_func (X, HX, ld, nvec) -> HX = H(X), X and HX blockvectors of size ld x nvec
         auto hpsi_func = [hm, cur_nbasis](T* psi_in, T* hpsi_out, const int ld_psi, const int nvec) {
-            ModuleBase::timer::tick("DavSubspace", "hpsi_func");
+            ModuleBase::timer::tick("diago_bpcg", "hpsi_func");
 
             // Convert "pointer data stucture" to a psi::Psi object
             auto psi_iter_wrapper = psi::Psi<T, Device>(psi_in, 1, nvec, ld_psi, cur_nbasis);
@@ -493,7 +493,7 @@ void HSolverPW<T, Device>::hamiltSolvePsiK(hamilt::Hamilt<T, Device>* hm,
             hpsi_info info(&psi_iter_wrapper, bands_range, hpsi_out);
             hm->ops->hPsi(info);
 
-            ModuleBase::timer::tick("DavSubspace", "hpsi_func");
+            ModuleBase::timer::tick("diago_bpcg", "hpsi_func");
         };
         DiagoBPCG<T, Device> bpcg(pre_condition.data());
         bpcg.init_iter(PARAM.inp.nbands, nband_l, nbasis, ndim);
@@ -670,4 +670,4 @@ template class HSolverPW<std::complex<float>, base_device::DEVICE_GPU>;
 template class HSolverPW<std::complex<double>, base_device::DEVICE_GPU>;
 #endif
 
-} // namespace hsolver
\ No newline at end of file
+} // namespace hsolver
diff --git a/source/module_psi/psi_initializer.cpp b/source/module_psi/psi_initializer.cpp
index 982ef2fdce..03b25c0f79 100644
--- a/source/module_psi/psi_initializer.cpp
+++ b/source/module_psi/psi_initializer.cpp
@@ -32,7 +32,7 @@ void psi_initializer<T>::initialize(const Structure_Factor* sf,
 template <typename T>
 void psi_initializer<T>::random_t(T* psi, const int iw_start, const int iw_end, const int ik, const int mode)
 {
-    ModuleBase::timer::tick("psi_initializer", "random_t");
+    ModuleBase::timer::tick("psi_init", "random_t");
     assert(mode <= 1);
     assert(iw_start >= 0);
     const int ng = this->pw_wfc_->npwk[ik];
@@ -163,14 +163,14 @@ void psi_initializer<T>::random_t(T* psi, const int iw_start, const int iw_end,
             }
         }
     }
-    ModuleBase::timer::tick("psi_initializer_random", "random_t");
+    ModuleBase::timer::tick("psi_init", "random_t");
 }
 
 #ifdef __MPI
 template <typename T>
 void psi_initializer<T>::stick_to_pool(Real* stick, const int& ir, Real* out) const
 {
-    ModuleBase::timer::tick("psi_initializer", "stick_to_pool");
+    ModuleBase::timer::tick("psi_init", "stick_to_pool");
     MPI_Status ierror;
     const int is = this->ixy2is_[ir];
     const int ip = this->pw_wfc_->fftixy2ip[ir];
@@ -218,8 +218,8 @@ void psi_initializer<T>::stick_to_pool(Real* stick, const int& ir, Real* out) co
         }
     }
 
+    ModuleBase::timer::tick("psi_init", "stick_to_pool");
     return;
-    ModuleBase::timer::tick("psi_initializer", "stick_to_pool");
 }
 #endif
 
diff --git a/source/module_psi/psi_initializer_atomic.cpp b/source/module_psi/psi_initializer_atomic.cpp
index d5ea4583b0..d4914a8a73 100644
--- a/source/module_psi/psi_initializer_atomic.cpp
+++ b/source/module_psi/psi_initializer_atomic.cpp
@@ -55,7 +55,8 @@ void psi_initializer_atomic<T>::initialize(const Structure_Factor* sf,         /
                                            const pseudopot_cell_vnl* p_pspot_nl,
                                            const int& rank)
 {
-    ModuleBase::timer::tick("psi_initializer_atomic", "initialize");
+    ModuleBase::timer::tick("psi_init_atomic", "initialize");
+
     if(p_pspot_nl == nullptr)
     {
         ModuleBase::WARNING_QUIT("psi_initializer_atomic<T>::initialize", 
@@ -71,7 +72,8 @@ void psi_initializer_atomic<T>::initialize(const Structure_Factor* sf,         /
     this->ixy2is_.clear();
     this->ixy2is_.resize(this->pw_wfc_->fftnxy);
     this->pw_wfc_->getfftixy2is(this->ixy2is_.data());
-    ModuleBase::timer::tick("psi_initializer_atomic", "initialize_only_once");
+
+    ModuleBase::timer::tick("psi_init_atomic", "initialize");
 }
 
 template <typename T>
@@ -81,7 +83,7 @@ void psi_initializer_atomic<T>::tabulate()
     {
         return;
     }
-    ModuleBase::timer::tick("psi_initializer_atomic", "cal_ovlp_pswfcjlq");
+    ModuleBase::timer::tick("psi_init_atomic", "tabulate");
     
     GlobalV::ofs_running << "\n Make real space PAO into reciprocal space." << std::endl;
     ModuleIO::print_PAOs(*this->p_ucell_);
@@ -219,7 +221,7 @@ void psi_initializer_atomic<T>::tabulate()
             }
         }
     }
-    ModuleBase::timer::tick("psi_initializer_atomic", "cal_ovlp_pswfcjlq");
+    ModuleBase::timer::tick("psi_init_atomic", "tabulate");
 }
 
 std::complex<double> phase_factor(double arg, int mode)
@@ -233,7 +235,7 @@ std::complex<double> phase_factor(double arg, int mode)
 template <typename T>
 void psi_initializer_atomic<T>::init_psig(T* psig,  const int& ik)
 {
-    ModuleBase::timer::tick("psi_initializer_atomic", "init_psig");
+    ModuleBase::timer::tick("psi_init_atomic", "init_psig");
     const int npw = this->pw_wfc_->npwk[ik];
     const int npwk_max = this->pw_wfc_->npwk_max;
     int lmax = this->p_ucell_->lmax_ppwf;
@@ -484,7 +486,7 @@ void psi_initializer_atomic<T>::init_psig(T* psig,  const int& ik)
 	{
 		this->random_t(psig, index, this->nbands_start_, ik);
 	}
-    ModuleBase::timer::tick("psi_initializer_atomic", "init_psig");
+    ModuleBase::timer::tick("psi_init_atomic", "init_psig");
 }
 
 template class psi_initializer_atomic<std::complex<double>>;
diff --git a/source/module_psi/psi_initializer_file.cpp b/source/module_psi/psi_initializer_file.cpp
index 169772aa49..aa49f79d1e 100644
--- a/source/module_psi/psi_initializer_file.cpp
+++ b/source/module_psi/psi_initializer_file.cpp
@@ -21,7 +21,7 @@ void psi_initializer_file<T>::initialize(const Structure_Factor* sf,
 template <typename T>
 void psi_initializer_file<T>::init_psig(T* psig, const int& ik)
 {
-    ModuleBase::timer::tick("psi_initializer_file", "init_psig");
+    ModuleBase::timer::tick("psi_init_file", "init_psig");
     const int npol = PARAM.globalv.npol;
     const int nbasis = this->pw_wfc_->npwk_max * npol;
     const int nkstot = this->p_kv->get_nkstot();
@@ -39,8 +39,8 @@ void psi_initializer_file<T>::init_psig(T* psig, const int& ik)
             psig[ib * nbasis + ig] = this->template cast_to_T<T>(wfcatom(ib, ig));
         }
     }
-    ModuleBase::timer::tick("psi_initializer_file", "init_psig");
+    ModuleBase::timer::tick("psi_init_file", "init_psig");
 }
 
 template class psi_initializer_file<std::complex<double>>;
-template class psi_initializer_file<std::complex<float>>;
\ No newline at end of file
+template class psi_initializer_file<std::complex<float>>;
diff --git a/source/module_psi/psi_initializer_nao.cpp b/source/module_psi/psi_initializer_nao.cpp
index c2bb249501..e88c759cba 100644
--- a/source/module_psi/psi_initializer_nao.cpp
+++ b/source/module_psi/psi_initializer_nao.cpp
@@ -45,7 +45,7 @@ void normalize(const std::vector<double>& r, std::vector<double>& flz)
 template <typename T>
 void psi_initializer_nao<T>::read_external_orbs(const std::string* orbital_files, const int& rank)
 {
-    ModuleBase::timer::tick("psi_initializer_nao", "read_external_orbs");
+    ModuleBase::timer::tick("psi_init_nao", "read_external_orbs");
 
     this->orbital_files_.resize(this->p_ucell_->ntype);
     this->nr_.resize(this->p_ucell_->ntype);
@@ -116,7 +116,7 @@ void psi_initializer_nao<T>::read_external_orbs(const std::string* orbital_files
             std::copy(radials[ichi].begin(), radials[ichi].end(), this->chi_[it][ichi].begin());
         }
     }
-    ModuleBase::timer::tick("psi_initializer_nao", "read_external_orbs");
+    ModuleBase::timer::tick("psi_init_nao", "read_external_orbs");
 }
 
 template <typename T>
@@ -155,7 +155,7 @@ void psi_initializer_nao<T>::initialize(const Structure_Factor* sf,
                                         const pseudopot_cell_vnl* p_pspot_nl,
                                         const int& rank)
 {
-    ModuleBase::timer::tick("psi_initializer_nao", "initialize_mpi");
+    ModuleBase::timer::tick("psi_init_nao", "initialize");
 
     // import
     psi_initializer<T>::initialize(sf, pw_wfc, p_ucell, p_kv_in, random_seed, p_pspot_nl, rank);
@@ -191,13 +191,13 @@ void psi_initializer_nao<T>::initialize(const Structure_Factor* sf,
     this->nbands_start_ = std::max(nbands_local, PARAM.inp.nbands);
     this->nbands_complem_ = this->nbands_start_ - nbands_local;
 
-    ModuleBase::timer::tick("psi_initializer_nao", "initialize_mpi");
+    ModuleBase::timer::tick("psi_init_nao", "initialize");
 }
 
 template <typename T>
 void psi_initializer_nao<T>::tabulate()
 {
-    ModuleBase::timer::tick("psi_initializer_nao", "tabulate");
+    ModuleBase::timer::tick("psi_init_nao", "tabulate");
 
     // a uniformed qgrid
     std::vector<double> qgrid(PARAM.globalv.nqx);
@@ -246,13 +246,13 @@ void psi_initializer_nao<T>::tabulate()
             }
         }
     }
-    ModuleBase::timer::tick("psi_initializer_nao", "tabulate");
+    ModuleBase::timer::tick("psi_init_nao", "tabulate");
 }
 
 template <typename T>
 void psi_initializer_nao<T>::init_psig(T* psig, const int& ik)
 {
-    ModuleBase::timer::tick("psi_initializer_nao", "initialize");
+    ModuleBase::timer::tick("psi_init_nao", "init_psig");
     assert(ik >= 0);
     const int npw = this->pw_wfc_->npwk[ik];
     const int npwk_max = this->pw_wfc_->npwk_max;
@@ -379,7 +379,7 @@ void psi_initializer_nao<T>::init_psig(T* psig, const int& ik)
     {
         this->random_t(psig, ibasis, this->nbands_start_, ik);
     }
-    ModuleBase::timer::tick("psi_initializer_nao", "initialize");
+    ModuleBase::timer::tick("psi_init_nao", "init_psig");
 }
 
 template class psi_initializer_nao<std::complex<double>>;
diff --git a/source/module_psi/psi_initializer_random.cpp b/source/module_psi/psi_initializer_random.cpp
index 84905513e3..e781f32dfb 100644
--- a/source/module_psi/psi_initializer_random.cpp
+++ b/source/module_psi/psi_initializer_random.cpp
@@ -27,13 +27,13 @@ void psi_initializer_random<T>::initialize(const Structure_Factor* sf,
 template <typename T>
 void psi_initializer_random<T>::init_psig(T* psig, const int& ik)
 {
-    ModuleBase::timer::tick("psi_initializer_random", "initialize");
+    ModuleBase::timer::tick("psi_init_random", "init_psig");
     this->random_t(psig, 0, this->nbands_start_, ik);
-    ModuleBase::timer::tick("psi_initializer_random", "initialize");
+    ModuleBase::timer::tick("psi_init_random", "init_psig");
 }
 
 template class psi_initializer_random<std::complex<double>>;
 template class psi_initializer_random<std::complex<float>>;
 // gamma point calculation
 template class psi_initializer_random<double>;
-template class psi_initializer_random<float>;
\ No newline at end of file
+template class psi_initializer_random<float>;