diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 8571ab6309..8225aba614 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -31,7 +31,7 @@ jobs:
 
       - name: Configure
         run: |
-          cmake -B build -DBUILD_TESTING=ON -DENABLE_DEEPKS=ON -DENABLE_MLKEDF=ON -DENABLE_LIBXC=ON -DENABLE_LIBRI=ON -DENABLE_PAW=ON -DENABLE_GOOGLEBENCH=ON -DENABLE_RAPIDJSON=ON  -DCMAKE_EXPORT_COMPILE_COMMANDS=1
+          cmake -B build -DBUILD_TESTING=ON -DENABLE_DEEPKS=ON -DENABLE_MLKEDF=ON -DENABLE_LIBXC=ON -DENABLE_LIBRI=ON -DENABLE_PAW=ON -DENABLE_GOOGLEBENCH=ON -DENABLE_RAPIDJSON=ON  -DCMAKE_EXPORT_COMPILE_COMMANDS=1 -DENABLE_FLOAT_FFTW=ON
 
 # Temporarily removed because no one maintains this now.
 # And it will break the CI test workflow.
diff --git a/.gitignore b/.gitignore
index 444e237950..ebad4b553d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,4 +23,4 @@ __pycache__
 abacus.json
 *.npy
 toolchain/install/
-toolchain/abacus_env.sh
+toolchain/abacus_env.sh
\ No newline at end of file
diff --git a/source/module_base/test/math_chebyshev_test.cpp b/source/module_base/test/math_chebyshev_test.cpp
index a7ea215266..ada96fe0f9 100644
--- a/source/module_base/test/math_chebyshev_test.cpp
+++ b/source/module_base/test/math_chebyshev_test.cpp
@@ -14,9 +14,6 @@
  *   - calfinalvec_real
  *   - calfinalvec_complex
  *   - tracepolyA
- *   - checkconverge
- *
- *
  */
 class toolfunc
 {
@@ -625,6 +622,8 @@ TEST_F(MathChebyshevTest, tracepolyA_float)
 
 TEST_F(MathChebyshevTest, checkconverge_float)
 {
+    #ifdef __MPI
+    #undef __MPI
     const int norder = 100;
     p_fchetest = new ModuleBase::Chebyshev<float>(norder);
 
@@ -648,5 +647,6 @@ TEST_F(MathChebyshevTest, checkconverge_float)
 
     delete[] v;
     delete p_fchetest;
+    #endif
 }
 #endif
\ No newline at end of file
diff --git a/source/module_base/test_parallel/CMakeLists.txt b/source/module_base/test_parallel/CMakeLists.txt
index 5132549f7a..52f467690a 100644
--- a/source/module_base/test_parallel/CMakeLists.txt
+++ b/source/module_base/test_parallel/CMakeLists.txt
@@ -40,6 +40,12 @@ AddTest(
   SOURCES test_para_gemm.cpp
 )
 
+AddTest(
+  TARGET base_math_chebyshev_mpi
+  LIBS MPI::MPI_CXX parameter ${math_libs} base device container
+  SOURCES math_chebyshev_mpi_test.cpp
+)
+
 add_test(NAME base_para_gemm_parallel
       COMMAND mpirun -np 4 ./base_para_gemm
       WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
diff --git a/source/module_base/test_parallel/math_chebyshev_mpi_test.cpp b/source/module_base/test_parallel/math_chebyshev_mpi_test.cpp
new file mode 100644
index 0000000000..5ca222bb3c
--- /dev/null
+++ b/source/module_base/test_parallel/math_chebyshev_mpi_test.cpp
@@ -0,0 +1,207 @@
+#include "../math_chebyshev.h"
+#include "mpi.h"
+#include "module_base/parallel_comm.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+/************************************************
+ *  unit test of class Chebyshev MPI part
+ ***********************************************/
+
+ /**
+  * - Tested Functions:
+  * - checkconverge
+  */
+class toolfunc
+{
+  public:
+    double x7(double x)
+    {
+        return pow(x, 7);
+    }
+    double x6(double x)
+    {
+        return pow(x, 6);
+    }
+    double expr(double x)
+    {
+        return exp(x);
+    }
+    std::complex<double> expi(std::complex<double> x)
+    {
+        const std::complex<double> j(0.0, 1.0);
+        return exp(j * x);
+    }
+    std::complex<double> expi2(std::complex<double> x)
+    {
+        const std::complex<double> j(0.0, 1.0);
+        const double PI = 3.14159265358979323846;
+        return exp(j * PI / 2.0 * x);
+    }
+    // Pauli matrix: [0,-i;i,0]
+    int LDA = 2;
+    double factor = 1;
+    void sigma_y(std::complex<double>* spin_in, std::complex<double>* spin_out, const int m = 1)
+    {
+        const std::complex<double> j(0.0, 1.0);
+        if (this->LDA < 2) {
+            this->LDA = 2;
+}
+        for (int i = 0; i < m; ++i)
+        {
+            spin_out[LDA * i] = -factor * j * spin_in[LDA * i + 1];
+            spin_out[LDA * i + 1] = factor * j * spin_in[LDA * i];
+        }
+    }
+#ifdef __ENABLE_FLOAT_FFTW
+    float x7(float x)
+    {
+        return pow(x, 7);
+    }
+    float x6(float x)
+    {
+        return pow(x, 6);
+    }
+    float expr(float x)
+    {
+        return exp(x);
+    }
+    std::complex<float> expi(std::complex<float> x)
+    {
+        const std::complex<float> j(0.0, 1.0);
+        return exp(j * x);
+    }
+    std::complex<float> expi2(std::complex<float> x)
+    {
+        const std::complex<float> j(0.0, 1.0);
+        const float PI = 3.14159265358979323846;
+        return exp(j * PI / 2.0f * x);
+    }
+    // Pauli matrix: [0,-i;i,0]
+    void sigma_y(std::complex<float>* spin_in, std::complex<float>* spin_out, const int m = 1)
+    {
+        const std::complex<float> j(0.0, 1.0);
+        if (this->LDA < 2)
+            this->LDA = 2;
+        for (int i = 0; i < m; ++i)
+        {
+            spin_out[LDA * i] = -j * spin_in[LDA * i + 1];
+            spin_out[LDA * i + 1] = j * spin_in[LDA * i];
+        }
+    }
+#endif
+};
+class MathChebyshevTest : public testing::Test
+{
+  protected:
+    ModuleBase::Chebyshev<double>* p_chetest;
+    ModuleBase::Chebyshev<float>* p_fchetest;
+    toolfunc fun;
+    int dsize = 0;
+    int my_rank = 0;
+    void SetUp() override
+    {
+        int world_rank;
+        MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
+        int world_size;
+        MPI_Comm_size(MPI_COMM_WORLD, &world_size);
+    
+        int color = (world_rank < world_size / 2) ? 0 : 1;
+        int key = world_rank;
+    
+        MPI_Comm_split(MPI_COMM_WORLD, color, key, &POOL_WORLD);
+    
+        int pool_rank, pool_size;
+        MPI_Comm_rank(POOL_WORLD, &pool_rank);
+        MPI_Comm_size(POOL_WORLD, &pool_size);
+    }
+    void TearDown() override
+    {
+    }
+};
+
+TEST_F(MathChebyshevTest, checkconverge)
+{
+    const int norder = 100;
+    p_chetest = new ModuleBase::Chebyshev<double>(norder);
+    auto fun_sigma_y
+        = [&](std::complex<double>* in, std::complex<double>* out, const int m = 1) { fun.sigma_y(in, out, m); };
+
+    std::complex<double>* v = new std::complex<double>[4];
+    v[0] = 1.0;
+    v[1] = 0.0;
+    v[2] = 0.0;
+    v[3] = 1.0; //[1 0; 0 1]
+    double tmin = -1.1;
+    double tmax = 1.1;
+    bool converge;
+    converge = p_chetest->checkconverge(fun_sigma_y, v, 2, 2, tmax, tmin, 0.2);
+    EXPECT_TRUE(converge);
+    converge = p_chetest->checkconverge(fun_sigma_y, v + 2, 2, 2, tmax, tmin, 0.2);
+    EXPECT_TRUE(converge);
+    EXPECT_NEAR(tmin, -1.1, 1e-8);
+    EXPECT_NEAR(tmax, 1.1, 1e-8);
+
+    tmax = -1.1;
+    converge = p_chetest->checkconverge(fun_sigma_y, v, 2, 2, tmax, tmin, 2.2);
+    EXPECT_TRUE(converge);
+    EXPECT_NEAR(tmin, -1.1, 1e-8);
+    EXPECT_NEAR(tmax, 1.1, 1e-8);
+
+    // not converge
+    v[0] = std::complex<double>(0, 1), v[1] = 1;
+    fun.factor = 1.5;
+    tmin = -1.1, tmax = 1.1;
+    converge = p_chetest->checkconverge(fun_sigma_y, v, 2, 2, tmax, tmin, 0.2);
+    EXPECT_FALSE(converge);
+
+    fun.factor = -1.5;
+    tmin = -1.1, tmax = 1.1;
+    converge = p_chetest->checkconverge(fun_sigma_y, v, 2, 2, tmax, tmin, 0.2);
+    EXPECT_FALSE(converge);
+    fun.factor = 1;
+
+    delete[] v;
+    delete p_chetest;
+}
+
+#ifdef __ENABLE_FLOAT_FFTW
+TEST_F(MathChebyshevTest, checkconverge_float)
+{
+    const int norder = 100;
+    p_fchetest = new ModuleBase::Chebyshev<float>(norder);
+
+    std::complex<float>* v = new std::complex<float>[4];
+    v[0] = 1.0;
+    v[1] = 0.0;
+    v[2] = 0.0;
+    v[3] = 1.0; //[1 0; 0 1]
+    float tmin = -1.1;
+    float tmax = 1.1;
+    bool converge;
+
+    auto fun_sigma_yf
+        = [&](std::complex<float>* in, std::complex<float>* out, const int m = 1) { fun.sigma_y(in, out, m); };
+    converge = p_fchetest->checkconverge(fun_sigma_yf, v, 2, 2, tmax, tmin, 0.2);
+    EXPECT_TRUE(converge);
+    converge = p_fchetest->checkconverge(fun_sigma_yf, v + 2, 2, 2, tmax, tmin, 0.2);
+    EXPECT_TRUE(converge);
+    EXPECT_NEAR(tmin, -1.1, 1e-6);
+    EXPECT_NEAR(tmax, 1.1, 1e-6);
+
+    delete[] v;
+    delete p_fchetest;
+}
+#endif
+
+int main(int argc, char** argv)
+{
+#ifdef __MPI
+    MPI_Init(&argc, &argv);
+#endif
+    testing::InitGoogleTest(&argc, argv);
+    int result = RUN_ALL_TESTS();
+#ifdef __MPI
+    MPI_Finalize();
+#endif
+    return result;
+}
diff --git a/source/module_basis/module_pw/module_fft/fft_cpu.cpp b/source/module_basis/module_pw/module_fft/fft_cpu.cpp
index be920d4ae2..5c4783d83d 100644
--- a/source/module_basis/module_pw/module_fft/fft_cpu.cpp
+++ b/source/module_basis/module_pw/module_fft/fft_cpu.cpp
@@ -347,11 +347,14 @@ void FFT_CPU<double>::fftxyfor(std::complex<double>* in, std::complex<double>* o
     int npy = this->nplane * this->ny;
     if (this->xprime)
     {
+        
         fftw_execute_dft(this->planxfor1, (fftw_complex*)in, (fftw_complex*)out);
+        #pragma omp parallel for
         for (int i = 0; i < this->lixy + 1; ++i)
         {
             fftw_execute_dft(this->planyfor, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]);
         }
+        #pragma omp parallel for
         for (int i = rixy; i < this->nx; ++i)
         {
             fftw_execute_dft(this->planyfor, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]);
@@ -359,6 +362,7 @@ void FFT_CPU<double>::fftxyfor(std::complex<double>* in, std::complex<double>* o
     }
     else
     {
+        #pragma omp parallel for
         for (int i = 0; i < this->nx; ++i)
         {
             fftw_execute_dft(this->planyfor, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]);
@@ -374,10 +378,12 @@ void FFT_CPU<double>::fftxybac(std::complex<double>* in,std::complex<double>* ou
     int npy = this->nplane * this->ny;
     if (this->xprime)
     {
+        #pragma omp parallel for
         for (int i = 0; i < this->lixy + 1; ++i)
         {
             fftw_execute_dft(this->planybac, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]);
         }
+        #pragma omp parallel for
         for (int i = rixy; i < this->nx; ++i)
         {
             fftw_execute_dft(this->planybac, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]);
@@ -388,6 +394,7 @@ void FFT_CPU<double>::fftxybac(std::complex<double>* in,std::complex<double>* ou
     {
         fftw_execute_dft(this->planxbac1, (fftw_complex*)in, (fftw_complex*)out);
         fftw_execute_dft(this->planxbac2, (fftw_complex*)&in[rixy * nplane], (fftw_complex*)&out[rixy * nplane]);
+        #pragma omp parallel for
         for (int i = 0; i < this->nx; ++i)
         {
             fftw_execute_dft(this->planybac, (fftw_complex*)&in[i * npy], (fftw_complex*)&out[i * npy]);
@@ -414,6 +421,7 @@ void FFT_CPU<double>::fftxyr2c(double* in, std::complex<double>* out) const
     if (this->xprime)
     {
         fftw_execute_dft_r2c(this->planxr2c, in, (fftw_complex*)out);
+        #pragma omp parallel for
         for (int i = 0; i < this->lixy + 1; ++i)
         {
             fftw_execute_dft(this->planyfor, (fftw_complex*)&out[i * npy], (fftw_complex*)&out[i * npy]);
@@ -421,6 +429,7 @@ void FFT_CPU<double>::fftxyr2c(double* in, std::complex<double>* out) const
     }
     else
     {
+        #pragma omp parallel for
         for (int i = 0; i < this->nx; ++i)
         {
             fftw_execute_dft_r2c(this->planyr2c, &in[i * npy], (fftw_complex*)&out[i * npy]);
@@ -435,6 +444,7 @@ void FFT_CPU<double>::fftxyc2r(std::complex<double> *in,double *out) const
     int npy = this->nplane * this->ny;
     if (this->xprime)
     {
+        #pragma omp parallel for
         for (int i = 0; i < this->lixy + 1; ++i)
         {
             fftw_execute_dft(this->planybac, (fftw_complex*)&in[i * npy], (fftw_complex*)&in[i * npy]);
@@ -444,6 +454,7 @@ void FFT_CPU<double>::fftxyc2r(std::complex<double> *in,double *out) const
     else
     {
         fftw_execute_dft(this->planxbac1, (fftw_complex*)in, (fftw_complex*)in);
+        #pragma omp parallel for
         for (int i = 0; i < this->nx; ++i)
         {
             fftw_execute_dft_c2r(this->planyc2r, (fftw_complex*)&in[i * npy], &out[i * npy]);
diff --git a/source/module_basis/module_pw/pw_basis.cpp b/source/module_basis/module_pw/pw_basis.cpp
index 034b1b49a3..f4f7abf1dd 100644
--- a/source/module_basis/module_pw/pw_basis.cpp
+++ b/source/module_basis/module_pw/pw_basis.cpp
@@ -17,7 +17,7 @@ PW_Basis::PW_Basis(std::string device_, std::string precision_) : device(std::mo
     classname="PW_Basis";
     this->fft_bundle.setfft("cpu",this->precision);
     this->double_data_ = (this->precision == "double") || (this->precision == "mixing");
-    this->float_data_ = (this->precision == "single") || (this->precision == "mixing");
+    this->float_data_ = (this->precision == "single")  || (this->precision == "mixing");
 }
 
 PW_Basis:: ~PW_Basis()
diff --git a/source/module_basis/module_pw/pw_basis_k.cpp b/source/module_basis/module_pw/pw_basis_k.cpp
index 91343d61a4..a4689ab2d2 100644
--- a/source/module_basis/module_pw/pw_basis_k.cpp
+++ b/source/module_basis/module_pw/pw_basis_k.cpp
@@ -203,11 +203,11 @@ void PW_Basis_K::setuptransform()
     this->getstartgr();
     this->setupIndGk();
     this->fft_bundle.clear();
+    std::string fft_device = this->device;
 #if defined(__DSP)
-    this->fft_bundle.setfft("dsp", this->precision);
-#else
-    this->fft_bundle.setfft(this->device, this->precision);
+    fft_device = "dsp";
 #endif
+    this->fft_bundle.setfft(fft_device, this->precision);
     if (this->xprime)
     {
         this->fft_bundle.initfft(this->nx,
diff --git a/source/module_basis/module_pw/pw_gatherscatter.h b/source/module_basis/module_pw/pw_gatherscatter.h
index 9279ce3723..97be6e5c23 100644
--- a/source/module_basis/module_pw/pw_gatherscatter.h
+++ b/source/module_basis/module_pw/pw_gatherscatter.h
@@ -98,8 +98,7 @@ void PW_Basis::gatherp_scatters(std::complex<T>* in, std::complex<T>* out) const
 template <typename T>
 void PW_Basis::gathers_scatterp(std::complex<T>* in, std::complex<T>* out) const
 {
-    //ModuleBase::timer::tick(this->classname, "gathers_scatterp");
-    
+    // ModuleBase::timer::tick(this->classname, "gathers_scatterp");
     if(this->poolnproc == 1) //In this case nrxx=fftnx*fftny*nz, nst = nstot, 
     {
 #ifdef _OPENMP
@@ -183,7 +182,7 @@ void PW_Basis::gathers_scatterp(std::complex<T>* in, std::complex<T>* out) const
 		}
     }
 #endif
-    //ModuleBase::timer::tick(this->classname, "gathers_scatterp");
+    // ModuleBase::timer::tick(this->classname, "gathers_scatterp");
     return;
 }
 
diff --git a/source/module_basis/module_pw/pw_transform.cpp b/source/module_basis/module_pw/pw_transform.cpp
index 4f34221775..9d83d57e3c 100644
--- a/source/module_basis/module_pw/pw_transform.cpp
+++ b/source/module_basis/module_pw/pw_transform.cpp
@@ -210,7 +210,7 @@ void PW_Basis::recip2real(const std::complex<FPTYPE>* in, FPTYPE* out, const boo
 #endif
     for (int i = 0; i < this->nst * this->nz; ++i)
     {
-        fft_bundle.get_auxg_data<FPTYPE>()[i] = std::complex<double>(0, 0);
+        fft_bundle.get_auxg_data<FPTYPE>()[i] = std::complex<FPTYPE>(0, 0);
     }
 
 #ifdef _OPENMP
diff --git a/source/module_basis/module_pw/pw_transform_k.cpp b/source/module_basis/module_pw/pw_transform_k.cpp
index a709b60429..61fb2892c2 100644
--- a/source/module_basis/module_pw/pw_transform_k.cpp
+++ b/source/module_basis/module_pw/pw_transform_k.cpp
@@ -187,7 +187,6 @@ void PW_Basis_K::recip2real(const std::complex<FPTYPE>* in,
     this->gathers_scatterp(this->fft_bundle.get_auxg_data<FPTYPE>(), this->fft_bundle.get_auxr_data<FPTYPE>());
 
     this->fft_bundle.fftxybac(fft_bundle.get_auxr_data<FPTYPE>(), fft_bundle.get_auxr_data<FPTYPE>());
-
     auto* auxr = this->fft_bundle.get_auxr_data<FPTYPE>();
     if (add)
     {
diff --git a/source/module_basis/module_pw/test/pw_test.cpp b/source/module_basis/module_pw/test/pw_test.cpp
index b8d7203f45..0377802c43 100644
--- a/source/module_basis/module_pw/test/pw_test.cpp
+++ b/source/module_basis/module_pw/test/pw_test.cpp
@@ -36,11 +36,10 @@ class TestEnv : public testing::Environment
 
 int main(int argc, char **argv) 
 {
-    
     int kpar;
     kpar = 1;
 #ifdef __ENABLE_FLOAT_FFTW
-    precision_flag = "single";
+    precision_flag = "mixing";
 #else
     precision_flag = "double";
 #endif
diff --git a/source/module_basis/module_pw/test_serial/pw_basis_k_test.cpp b/source/module_basis/module_pw/test_serial/pw_basis_k_test.cpp
index 153d46302d..2b4b4fc6ce 100644
--- a/source/module_basis/module_pw/test_serial/pw_basis_k_test.cpp
+++ b/source/module_basis/module_pw/test_serial/pw_basis_k_test.cpp
@@ -48,6 +48,7 @@ TEST_F(PWBasisKTEST,Constructor)
 	EXPECT_EQ(basis_k2.precision,"double");
 	EXPECT_EQ(basis_k2.fft_bundle.precision,"double");
 	ModulePW::PW_Basis_K basis_k3(device_flag, precision_single);
+	EXPECT_EQ(basis_k3.precision,"single");
 	EXPECT_EQ(basis_k3.fft_bundle.precision,"single");
 }
 
diff --git a/source/module_esolver/esolver_fp.cpp b/source/module_esolver/esolver_fp.cpp
index 7861195579..cf6d8a888f 100644
--- a/source/module_esolver/esolver_fp.cpp
+++ b/source/module_esolver/esolver_fp.cpp
@@ -23,45 +23,60 @@ namespace ModuleESolver
 
 ESolver_FP::ESolver_FP()
 {
-    std::string fft_device = PARAM.inp.device;
+}
+
+ESolver_FP::~ESolver_FP()
+{
+    if (pw_rho_flag == true)
+    {
+        delete this->pw_rho;
+        this->pw_rho_flag = false;
+    }
+    if (PARAM.globalv.double_grid)
+    {
+        delete pw_rhod;
+    }
+    delete this->pelec;
+}
 
+void ESolver_FP::before_all_runners(UnitCell& ucell, const Input_para& inp)
+{
+    ModuleBase::TITLE("ESolver_FP", "before_all_runners");
+    std::string fft_device = PARAM.inp.device;
+    std::string fft_precison = PARAM.inp.precision;
     // LCAO basis doesn't support GPU acceleration on FFT currently
     if(PARAM.inp.basis_type == "lcao")
     {
         fft_device = "cpu";
     }
-
-    pw_rho = new ModulePW::PW_Basis_Big(fft_device, PARAM.inp.precision);
+    if ((PARAM.inp.precision=="single") || (PARAM.inp.precision=="mixing"))
+    {
+        fft_precison = "mixing";
+    }
+    else if (PARAM.inp.precision=="double")
+    {
+        fft_precison = "double";
+    }
+    #if (not defined(__ENABLE_FLOAT_FFTW) and (defined(__CUDA) || defined(__RCOM)))
+        if (fft_device == "gpu")
+        {
+            fft_precison = "double";
+        }
+    #endif
+    pw_rho = new ModulePW::PW_Basis_Big(fft_device, fft_precison);
+    pw_rho_flag = true;
     if (PARAM.globalv.double_grid)
     {
-        pw_rhod = new ModulePW::PW_Basis_Big(fft_device, PARAM.inp.precision);
+        pw_rhod = new ModulePW::PW_Basis_Big(fft_device, fft_precison);
     }
     else
     {
         pw_rhod = pw_rho;
     }
-
-    // temporary, it will be removed
     pw_big = static_cast<ModulePW::PW_Basis_Big*>(pw_rhod);
     pw_big->setbxyz(PARAM.inp.bx, PARAM.inp.by, PARAM.inp.bz);
     sf.set(pw_rhod, PARAM.inp.nbspline);
 
-}
-
-ESolver_FP::~ESolver_FP()
-{
-    delete pw_rho;
-    if ( PARAM.globalv.double_grid)
-    {
-        delete pw_rhod;
-    }
-    delete this->pelec;
-}
-
-void ESolver_FP::before_all_runners(UnitCell& ucell, const Input_para& inp)
-{
-    ModuleBase::TITLE("ESolver_FP", "before_all_runners");
-
     //! 1) read pseudopotentials
     if (!PARAM.inp.use_paw)
     {
diff --git a/source/module_esolver/esolver_fp.h b/source/module_esolver/esolver_fp.h
index 3634c63be5..9cbdcc7362 100644
--- a/source/module_esolver/esolver_fp.h
+++ b/source/module_esolver/esolver_fp.h
@@ -95,6 +95,9 @@ class ESolver_FP: public ESolver
 
     //! solvent model
     surchem solvent;
+
+    int pw_rho_flag  = false; ///< flag for pw_rho, 0: not initialized, 1: initialized
+
 };
 } // namespace ModuleESolver
 
diff --git a/source/module_esolver/esolver_ks.cpp b/source/module_esolver/esolver_ks.cpp
index a1f622ef52..708db2823f 100644
--- a/source/module_esolver/esolver_ks.cpp
+++ b/source/module_esolver/esolver_ks.cpp
@@ -36,6 +36,27 @@ namespace ModuleESolver
 template <typename T, typename Device>
 ESolver_KS<T, Device>::ESolver_KS()
 {
+}
+
+
+template <typename T, typename Device>
+ESolver_KS<T, Device>::~ESolver_KS()
+{
+    delete this->psi;
+    delete this->pw_wfc;
+    delete this->p_hamilt;
+    delete this->p_chgmix;
+    this->ppcell.release_memory();
+}
+
+
+template <typename T, typename Device>
+void ESolver_KS<T, Device>::before_all_runners(UnitCell& ucell, const Input_para& inp)
+{
+    ModuleBase::TITLE("ESolver_KS", "before_all_runners");
+    //! 1) initialize "before_all_runniers" in ESolver_FP
+    ESolver_FP::before_all_runners(ucell, inp);
+    
     classname = "ESolver_KS";
     basisname = "PLEASE ADD BASISNAME FOR CURRENT ESOLVER.";
 
@@ -75,27 +96,8 @@ ESolver_KS<T, Device>::ESolver_KS()
 
     // cell_factor
     this->ppcell.cell_factor = PARAM.inp.cell_factor;
-}
-
-
-template <typename T, typename Device>
-ESolver_KS<T, Device>::~ESolver_KS()
-{
-    delete this->psi;
-    delete this->pw_wfc;
-    delete this->p_hamilt;
-    delete this->p_chgmix;
-    this->ppcell.release_memory();
-}
 
 
-template <typename T, typename Device>
-void ESolver_KS<T, Device>::before_all_runners(UnitCell& ucell, const Input_para& inp)
-{
-    ModuleBase::TITLE("ESolver_KS", "before_all_runners");
-
-    //! 1) initialize "before_all_runniers" in ESolver_FP
-    ESolver_FP::before_all_runners(ucell, inp);
 
     /// PAW Section
 #ifdef USE_PAW
diff --git a/source/module_hamilt_pw/hamilt_pwdft/structure_factor.cpp b/source/module_hamilt_pw/hamilt_pwdft/structure_factor.cpp
index 09d0b56a05..c751b91cab 100644
--- a/source/module_hamilt_pw/hamilt_pwdft/structure_factor.cpp
+++ b/source/module_hamilt_pw/hamilt_pwdft/structure_factor.cpp
@@ -68,8 +68,7 @@ void Structure_Factor::setup_structure_factor(const UnitCell* Ucell, const Paral
 //	std::ofstream ofs( outstr.c_str() ) ;
     bool usebspline;
     if(nbspline > 0) {   usebspline = true;
-    } else {    usebspline = false;
-}
+    } else {    usebspline = false;}
     
     if(usebspline)
     {
@@ -147,6 +146,7 @@ void Structure_Factor::setup_structure_factor(const UnitCell* Ucell, const Paral
             inat++;
         }
     }
+    
     if (device == "gpu") {
         if (PARAM.globalv.has_float_data) {
             resmem_cd_op()(this->c_eigts1, Ucell->nat * (2 * rho_basis->nx + 1));
diff --git a/source/module_hamilt_pw/hamilt_pwdft/test/CMakeLists.txt b/source/module_hamilt_pw/hamilt_pwdft/test/CMakeLists.txt
index f4f6ff247c..963db3e5cb 100644
--- a/source/module_hamilt_pw/hamilt_pwdft/test/CMakeLists.txt
+++ b/source/module_hamilt_pw/hamilt_pwdft/test/CMakeLists.txt
@@ -2,6 +2,7 @@ remove_definitions(-D__DEEPKS)
 remove_definitions(-D__CUDA)
 remove_definitions(-D__ROCM)
 remove_definitions(-D__EXX)
+remove_definitions(-DUSE_PAW)
 
 AddTest(
   TARGET pwdft_soc
@@ -26,4 +27,31 @@ AddTest(
 	TARGET radial_proj_test
 	LIBS parameter  base device ${math_libs}
 	SOURCES radial_proj_test.cpp ../radial_proj.cpp
+)
+
+AddTest(
+	TARGET structure_factor_test
+	LIBS parameter ${math_libs} base device planewave 
+	SOURCES structure_factor_test.cpp ../structure_factor.cpp ../parallel_grid.cpp
+	../../../module_cell/unitcell.cpp
+	../../../module_io/output.cpp
+	../../../module_cell/update_cell.cpp
+	../../../module_cell/bcast_cell.cpp
+	../../../module_cell/print_cell.cpp
+	../../../module_cell/atom_spec.cpp
+	../../../module_cell/atom_pseudo.cpp
+	../../../module_cell/pseudo.cpp
+	../../../module_cell/read_stru.cpp
+	../../../module_cell/read_atom_species.cpp
+	../../../module_cell/read_atoms.cpp
+	../../../module_cell/read_pp.cpp
+	../../../module_cell/read_pp_complete.cpp
+	../../../module_cell/read_pp_upf100.cpp
+	../../../module_cell/read_pp_upf201.cpp
+	../../../module_cell/read_pp_vwr.cpp
+	../../../module_cell/read_pp_blps.cpp
+	../../../module_elecstate/read_pseudo.cpp
+	../../../module_elecstate/cal_wfc.cpp
+	../../../module_elecstate/cal_nelec_nband.cpp
+	../../../module_elecstate/read_orb.cpp
 )
\ No newline at end of file
diff --git a/source/module_hamilt_pw/hamilt_pwdft/test/structure_factor_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/test/structure_factor_test.cpp
new file mode 100644
index 0000000000..e2231754ee
--- /dev/null
+++ b/source/module_hamilt_pw/hamilt_pwdft/test/structure_factor_test.cpp
@@ -0,0 +1,128 @@
+#include "gtest/gtest.h"
+#include "gmock/gmock.h"
+#include <string>
+#include <cmath>
+#include <complex>
+#include "module_cell/unitcell.h"
+#include "module_elecstate/module_dm/test/prepare_unitcell.h"
+#define private public
+#include "module_parameter/parameter.h"
+#include "module_hamilt_pw/hamilt_pwdft/structure_factor.h"
+#undef private
+/************************************************
+ *  unit test of class Structure_factor and 
+ ***********************************************/
+
+/**
+ * - Tested Functions:
+ *   - Fcoef::create to create a 5 dimensional array of complex numbers
+ *   - Soc::set_fcoef to set the fcoef array
+ *   - Soc::spinor to calculate the spinor
+ *   - Soc::rot_ylm to calculate the rotation matrix
+ *   - Soc::sph_ind to calculate the m index of the spherical harmonics
+*/
+
+//compare two complex by using EXPECT_DOUBLE_EQ()
+InfoNonlocal::InfoNonlocal()
+{
+}
+InfoNonlocal::~InfoNonlocal()
+{
+}
+
+Magnetism::Magnetism()
+{
+}
+Magnetism::~Magnetism()
+{
+}
+
+class StructureFactorTest : public testing::Test
+{
+protected:
+    Structure_Factor SF;
+    std::string output;
+    ModulePW::PW_Basis* rho_basis;
+    UnitCell* ucell;
+    UcellTestPrepare utp = UcellTestLib["Si"];
+    Parallel_Grid* pgrid;
+    std::vector<int> nw = {13};
+    int nlocal = 0;
+void SetUp()
+{
+    rho_basis=new ModulePW::PW_Basis;
+    ucell = utp.SetUcellInfo(nw, nlocal);
+    ucell->set_iat2iwt(1);
+    pgrid = new Parallel_Grid;
+    rho_basis->npw=10;
+    rho_basis->gcar=new ModuleBase::Vector3<double>[10];
+    // for (int ig=0;ig<rho_basis->npw;ig++)
+    // {
+    //     rho_basis->gcar[ig]=1.0;
+    // }
+}
+};
+
+TEST_F(StructureFactorTest, set)
+{
+    const ModulePW::PW_Basis* rho_basis_in;
+    const int nbspline_in =10;
+    SF.set(rho_basis_in,nbspline_in);
+    EXPECT_EQ(nbspline_in, 10);
+}
+
+
+TEST_F(StructureFactorTest, setup_structure_factor_double)
+{
+    rho_basis->npw = 10;
+    SF.setup_structure_factor(ucell,*pgrid,rho_basis);  
+
+    for (int i=0;i< ucell->nat * (2 * rho_basis->nx + 1);i++) 
+    {
+       EXPECT_EQ(SF.z_eigts1[i].real(),1);
+       EXPECT_EQ(SF.z_eigts1[i].imag(),0);
+    }
+
+    for (int i=0;i< ucell->nat * (2 * rho_basis->ny + 1);i++) 
+    {
+       EXPECT_EQ(SF.z_eigts2[i].real(),1);
+       EXPECT_EQ(SF.z_eigts2[i].imag(),0);
+    }
+
+    for (int i=0;i< ucell->nat * (2 * rho_basis->nz + 1);i++) 
+    {
+       EXPECT_EQ(SF.z_eigts3[i].real(),1);
+       EXPECT_EQ(SF.z_eigts3[i].imag(),0);
+    }
+}
+
+TEST_F(StructureFactorTest, setup_structure_factor_float)
+{
+    PARAM.sys.has_float_data = true;
+    rho_basis->npw = 10;
+    SF.setup_structure_factor(ucell,*pgrid,rho_basis);  
+
+    for (int i=0;i< ucell->nat * (2 * rho_basis->nx + 1);i++) 
+    {
+       EXPECT_EQ(SF.c_eigts1[i].real(),1);
+       EXPECT_EQ(SF.c_eigts1[i].imag(),0);
+    }
+
+    for (int i=0;i< ucell->nat * (2 * rho_basis->ny + 1);i++) 
+    {
+       EXPECT_EQ(SF.c_eigts2[i].real(),1);
+       EXPECT_EQ(SF.c_eigts2[i].imag(),0);
+    }
+
+    for (int i=0;i< ucell->nat * (2 * rho_basis->nz + 1);i++) 
+    {
+       EXPECT_EQ(SF.c_eigts3[i].real(),1);
+       EXPECT_EQ(SF.c_eigts3[i].imag(),0);
+    }
+}
+
+int main()
+{
+    testing::InitGoogleTest();
+    return RUN_ALL_TESTS();
+}
\ No newline at end of file
diff --git a/source/module_hamilt_pw/hamilt_stodft/test/test_sto_tool.cpp b/source/module_hamilt_pw/hamilt_stodft/test/test_sto_tool.cpp
index 465fd9c27f..a0654e1e7f 100644
--- a/source/module_hamilt_pw/hamilt_stodft/test/test_sto_tool.cpp
+++ b/source/module_hamilt_pw/hamilt_stodft/test/test_sto_tool.cpp
@@ -33,9 +33,13 @@ void hamilt::HamiltSdftPW<T, Device>::hPsi_norm(const T* psi_in, T* hpsi, const
 
 template class hamilt::HamiltPW<std::complex<double>, base_device::DEVICE_CPU>;
 template class hamilt::HamiltSdftPW<std::complex<double>, base_device::DEVICE_CPU>;
+template class hamilt::HamiltPW<std::complex<float>, base_device::DEVICE_CPU>;
+template class hamilt::HamiltSdftPW<std::complex<float>, base_device::DEVICE_CPU>;
 #if ((defined __CUDA) || (defined __ROCM))
 template class hamilt::HamiltPW<std::complex<double>, base_device::DEVICE_GPU>;
 template class hamilt::HamiltSdftPW<std::complex<double>, base_device::DEVICE_GPU>;
+template class hamilt::HamiltPW<std::complex<float>, base_device::DEVICE_GPU>;
+template class hamilt::HamiltSdftPW<std::complex<float>, base_device::DEVICE_GPU>;
 #endif
 
 /**
diff --git a/source/module_io/read_set_globalv.cpp b/source/module_io/read_set_globalv.cpp
index 83bde5c62d..1cc133988a 100644
--- a/source/module_io/read_set_globalv.cpp
+++ b/source/module_io/read_set_globalv.cpp
@@ -72,7 +72,7 @@ void ReadInput::set_globalv(const Input_para& inp, System_para& sys)
     bool float_cond = false;
 #endif
     sys.has_double_data = (inp.precision == "double") || (inp.precision == "mixing") || float_cond;
-    sys.has_float_data = (inp.precision == "float") || (inp.precision == "mixing") || float_cond;
+    sys.has_float_data = (inp.precision == "single") || (inp.precision == "mixing") || float_cond;
 }
 
 /// @note Here para.inp has not been synchronized of all ranks.
diff --git a/source/module_lr/esolver_lrtd_lcao.cpp b/source/module_lr/esolver_lrtd_lcao.cpp
index 6dd3abe29f..1db10b5caf 100644
--- a/source/module_lr/esolver_lrtd_lcao.cpp
+++ b/source/module_lr/esolver_lrtd_lcao.cpp
@@ -257,7 +257,11 @@ LR::ESolver_LR<T, TR>::ESolver_LR(ModuleESolver::ESolver_KS_LCAO<T, TR>&& ks_sol
     this->gint_->reset_DMRGint(1);
 
     // move pw basis
-    delete this->pw_rho;    // newed in ESolver_FP::ESolver_FP
+    if (this->pw_rho_flag)
+    {
+        this->pw_rho_flag = true;
+        delete this->pw_rho;    // newed in ESolver_FP::ESolver_FP
+    }
     this->pw_rho = ks_sol.pw_rho;
     ks_sol.pw_rho = nullptr;
     //init potential and calculate kernels using ground state charge
diff --git a/tests/01_PW/111_PW_CG_float/INPUT b/tests/01_PW/111_PW_CG_float/INPUT
new file mode 100644
index 0000000000..c1a7ad556f
--- /dev/null
+++ b/tests/01_PW/111_PW_CG_float/INPUT
@@ -0,0 +1,29 @@
+INPUT_PARAMETERS
+#Parameters (1.General)
+suffix			autotest
+calculation     scf
+
+nbands			6
+symmetry		1
+pseudo_dir	../../PP_ORB
+
+#Parameters (2.Iteration)
+ecutwfc			20
+scf_thr				1e-8
+scf_nmax			100
+
+
+#Parameters (3.Basis)
+basis_type		pw
+
+#Parameters (4.Smearing)
+smearing_method		gauss
+smearing_sigma			0.002
+
+#Parameters (5.Mixing)
+mixing_type		plain
+mixing_beta		0.5
+
+ks_solver      cg
+device         cpu
+precision      single
\ No newline at end of file
diff --git a/tests/01_PW/111_PW_CG_float/KPT b/tests/01_PW/111_PW_CG_float/KPT
new file mode 100644
index 0000000000..c289c0158a
--- /dev/null
+++ b/tests/01_PW/111_PW_CG_float/KPT
@@ -0,0 +1,4 @@
+K_POINTS
+0
+Gamma
+1 1 1 0 0 0
diff --git a/tests/01_PW/111_PW_CG_float/README b/tests/01_PW/111_PW_CG_float/README
new file mode 100644
index 0000000000..41587e05b8
--- /dev/null
+++ b/tests/01_PW/111_PW_CG_float/README
@@ -0,0 +1 @@
+test GaAs deformation simulation base parameters use CG method and float precision in CPU device
diff --git a/tests/01_PW/111_PW_CG_float/STRU b/tests/01_PW/111_PW_CG_float/STRU
new file mode 100644
index 0000000000..9b42a124ca
--- /dev/null
+++ b/tests/01_PW/111_PW_CG_float/STRU
@@ -0,0 +1,19 @@
+ATOMIC_SPECIES
+Si 14 Si.pz-vbc.UPF
+
+LATTICE_CONSTANT
+10.2  // add lattice constant
+
+LATTICE_VECTORS
+0.0 0.5 0.5
+0.5 0.0 0.5
+0.5 0.5 0.0
+
+ATOMIC_POSITIONS
+Direct 
+
+Si	// Element type	
+0.0	// magnetism
+2	
+0.00 0.00 0.00 1 1 1
+0.25 0.25 0.25 1 1 1
diff --git a/tests/01_PW/111_PW_CG_float/result.ref b/tests/01_PW/111_PW_CG_float/result.ref
new file mode 100644
index 0000000000..10063ce3c3
--- /dev/null
+++ b/tests/01_PW/111_PW_CG_float/result.ref
@@ -0,0 +1,6 @@
+etotref -198.22383283
+etotperatomref -99.11191642
+pointgroupref T_d
+spacegroupref O_h
+nksibzref 1
+totaltimeref 
diff --git a/tests/01_PW/111_PW_CG_float/threshold b/tests/01_PW/111_PW_CG_float/threshold
new file mode 100644
index 0000000000..cc0ad91b67
--- /dev/null
+++ b/tests/01_PW/111_PW_CG_float/threshold
@@ -0,0 +1,5 @@
+# The float type possesses different precision compared to the double type. 
+# This integration aims to test the functionality of the float type 
+# within the plane-wave (pw) basis
+threshold 0.00001
+fatal_threshold 1
diff --git a/tests/01_PW/CASES_CPU.txt b/tests/01_PW/CASES_CPU.txt
index e34d75ec22..d8b269d929 100644
--- a/tests/01_PW/CASES_CPU.txt
+++ b/tests/01_PW/CASES_CPU.txt
@@ -105,6 +105,7 @@
 108_PW_MD_2O
 109_PW_PBE0
 110_PW_ONCV_skip
+111_PW_CG_float
 801_PW_LT_sc
 802_PW_LT_fcc
 803_PW_LT_bcc
diff --git a/tests/11_PW_GPU/005_PW_CG_GPU_float/INPUT b/tests/11_PW_GPU/005_PW_CG_GPU_float/INPUT
new file mode 100644
index 0000000000..3a22fa5fb9
--- /dev/null
+++ b/tests/11_PW_GPU/005_PW_CG_GPU_float/INPUT
@@ -0,0 +1,35 @@
+INPUT_PARAMETERS
+#Parameters	(General)
+suffix              autotest
+pseudo_dir          ../../PP_ORB
+
+gamma_only          0
+calculation         scf
+symmetry            1
+relax_nmax          1
+out_level           ie
+smearing_method     gaussian
+smearing_sigma      0.02
+
+#Parameters (3.PW)
+ecutwfc             40
+scf_thr             1e-7
+scf_nmax            100
+
+#Parameters (LCAO)
+basis_type          pw
+ks_solver           cg
+device              gpu
+precision           single
+chg_extrap          second-order
+out_dm              0
+pw_diag_thr         0.00001
+
+cal_force           1
+#test_force         1
+cal_stress          1
+#test_stress        1
+
+mixing_type         broyden
+mixing_beta         0.4
+mixing_gg0          1.5
diff --git a/tests/11_PW_GPU/005_PW_CG_GPU_float/KPT b/tests/11_PW_GPU/005_PW_CG_GPU_float/KPT
new file mode 100644
index 0000000000..28006d5e2d
--- /dev/null
+++ b/tests/11_PW_GPU/005_PW_CG_GPU_float/KPT
@@ -0,0 +1,4 @@
+K_POINTS
+0
+Gamma
+2 2 2  0 0 0
diff --git a/tests/11_PW_GPU/005_PW_CG_GPU_float/README b/tests/11_PW_GPU/005_PW_CG_GPU_float/README
new file mode 100644
index 0000000000..f578f9dfbd
--- /dev/null
+++ b/tests/11_PW_GPU/005_PW_CG_GPU_float/README
@@ -0,0 +1 @@
+test  GaAs deformation simulation base parameters use CG method and float precision in GPU device
diff --git a/tests/11_PW_GPU/005_PW_CG_GPU_float/STRU b/tests/11_PW_GPU/005_PW_CG_GPU_float/STRU
new file mode 100644
index 0000000000..b03baadd25
--- /dev/null
+++ b/tests/11_PW_GPU/005_PW_CG_GPU_float/STRU
@@ -0,0 +1,23 @@
+ATOMIC_SPECIES
+As 1   As_dojo.upf upf201
+Ga 1   Ga_dojo.upf upf201
+
+LATTICE_CONSTANT
+1  // add lattice constant, 10.58 ang
+
+LATTICE_VECTORS
+5.33 5.33  0.0
+0.0  5.33 5.33
+5.33  0.0  5.33
+ATOMIC_POSITIONS
+Direct //Cartesian or Direct coordinate.
+
+As
+0
+1
+0.300000          0.3300000          0.27000000     0 0 0
+
+Ga              //Element Label
+0
+1              //number of atom
+0.00000          0.00000          0.000000     0 0 0
diff --git a/tests/11_PW_GPU/005_PW_CG_GPU_float/result.ref b/tests/11_PW_GPU/005_PW_CG_GPU_float/result.ref
new file mode 100644
index 0000000000..e8e006ec72
--- /dev/null
+++ b/tests/11_PW_GPU/005_PW_CG_GPU_float/result.ref
@@ -0,0 +1,7 @@
+etotref -4869.7470519303351466
+etotperatomref -2434.8735259652
+totalforceref 5.195370
+totalstressref 37241.38404200
+pointgroupref C_1
+spacegroupref C_1
+nksibzref 8
diff --git a/tests/11_PW_GPU/005_PW_CG_GPU_float/threshold b/tests/11_PW_GPU/005_PW_CG_GPU_float/threshold
new file mode 100644
index 0000000000..b0cedcec2e
--- /dev/null
+++ b/tests/11_PW_GPU/005_PW_CG_GPU_float/threshold
@@ -0,0 +1,4 @@
+threshold 1
+force_threshold 1 
+stress_threshold 2
+fatal_threshold 2
diff --git a/tests/11_PW_GPU/CASES_GPU.txt b/tests/11_PW_GPU/CASES_GPU.txt
index 3b1710189d..be5e66e229 100644
--- a/tests/11_PW_GPU/CASES_GPU.txt
+++ b/tests/11_PW_GPU/CASES_GPU.txt
@@ -2,3 +2,4 @@
 002_PW_CG_GPU
 003_PW_DA_GPU
 004_PW_OW_GPU
+005_PW_CG_GPU_float
\ No newline at end of file
diff --git a/tests/integrate/102_PW_CG/README b/tests/integrate/102_PW_CG/README
new file mode 100644
index 0000000000..be4d177fb5
--- /dev/null
+++ b/tests/integrate/102_PW_CG/README
@@ -0,0 +1 @@
+This test is for silicon diamond structure using gamma point, smearing method,CG solver, double precision
\ No newline at end of file
diff --git a/tests/performance/P000_si16_pw/INPUT b/tests/performance/P000_si16_pw/INPUT
index 0b669f4e03..a195787ad0 100644
--- a/tests/performance/P000_si16_pw/INPUT
+++ b/tests/performance/P000_si16_pw/INPUT
@@ -22,4 +22,4 @@ smearing_sigma			0.002
 #Parameters (5.Mixing)
 mixing_type		broyden
 mixing_beta		0.3
-ks_solver dav
+ks_solver dav
\ No newline at end of file
diff --git a/tests/performance/P002_si64_pw/INPUT b/tests/performance/P002_si64_pw/INPUT
index 783c0cb3dd..5a6727692b 100644
--- a/tests/performance/P002_si64_pw/INPUT
+++ b/tests/performance/P002_si64_pw/INPUT
@@ -21,4 +21,4 @@ smearing_sigma			0.002
 
 #Parameters (5.Mixing)
 mixing_type		broyden
-mixing_beta		0.3
+mixing_beta		0.3
\ No newline at end of file