diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md
index 40edbf5538..8e15de5b6b 100644
--- a/docs/advanced/input_files/input-main.md
+++ b/docs/advanced/input_files/input-main.md
@@ -423,6 +423,7 @@
     - [pexsi\_zero\_thr](#pexsi_zero_thr)
   - [Linear Response TDDFT](#linear-response-tddft)
     - [xc\_kernel](#xc_kernel)
+    - [lr\_init\_xc\_kernel](#lr_init_xc_kernel)
     - [lr\_solver](#lr_solver)
     - [lr\_thr](#lr_thr)
     - [nocc](#nocc)
@@ -3943,6 +3944,15 @@ These parameters are used to solve the excited states using. e.g. LR-TDDFT.
 Currently supported: `RPA`, `LDA`, `PBE`, `HSE`, `HF`.
 - **Default**: LDA
 
+### lr_init_xc_kernel
+
+- **Type**: String
+- **Description**: The method to initalize the xc kernel. 
+  - "default": Calculate xc kerenel ($f_\text{xc}$) from the ground-state charge density.
+  - "file": Read the xc kernel $f_\text{xc}$ on grid from the provided files. The following words should be the paths of ".cube" files, where the first 1 (*[nspin](#nspin)==1*) or 3 (*[nspin](#nspin)==2*, namely spin-aa, spin-ab and spin-bb) will be read in. The parameter [xc_kernel](#xc_kernel) will be invalid. Now only LDA-type kernel is supproted as the potential will be calculated by directly multiplying the transition density.
+  - "from_charge_file": Calculate fxc from the charge density read from the provided files. The following words should be the paths of ".cube" files, where the first [nspin]($nspin) files will be read in. 
+- **Default**: "default"
+
 ### lr_solver
 
 - **Type**: String
diff --git a/source/Makefile.Objects b/source/Makefile.Objects
index 351c8c340a..d34d08242e 100644
--- a/source/Makefile.Objects
+++ b/source/Makefile.Objects
@@ -731,7 +731,7 @@ OBJS_TENSOR=tensor.o\
     dmr_complex.o\
     operator_lr_hxc.o\
     operator_lr_exx.o\
-    kernel_xc.o\
+    xc_kernel.o\
     pot_hxc_lrtd.o\
     lr_spectrum.o\
     hamilt_casida.o\
diff --git a/source/module_io/read_input_item_tddft.cpp b/source/module_io/read_input_item_tddft.cpp
index ccd3190c54..bfd0542729 100644
--- a/source/module_io/read_input_item_tddft.cpp
+++ b/source/module_io/read_input_item_tddft.cpp
@@ -309,6 +309,20 @@ void ReadInput::item_lr_tddft()
         read_sync_string(input.xc_kernel);
         this->add_item(item);
     }
+    {
+        Input_Item item("lr_init_xc_kernel");
+        item.annotation = "The method to initalize the xc kernel";
+        item.read_value = [](const Input_Item& item, Parameter& para) {
+            size_t count = item.get_size();
+            auto& ifxc = para.input.lr_init_xc_kernel;
+            for (int i = 0; i < count; i++) { ifxc.push_back(item.str_values[i]); }
+            };
+        item.reset_value = [](const Input_Item& item, Parameter& para) {
+            if (para.input.lr_init_xc_kernel.empty()) { para.input.lr_init_xc_kernel.push_back("default"); }
+            };
+        sync_stringvec(input.lr_init_xc_kernel, para.input.lr_init_xc_kernel.size(), "default");
+        this->add_item(item);
+    }
     {
         Input_Item item("lr_solver");
         item.annotation = "the eigensolver for LR-TDDFT";
diff --git a/source/module_io/test/read_input_ptest.cpp b/source/module_io/test/read_input_ptest.cpp
index 69d6d13a1c..7e17e2ae37 100644
--- a/source/module_io/test/read_input_ptest.cpp
+++ b/source/module_io/test/read_input_ptest.cpp
@@ -420,6 +420,7 @@ TEST_F(InputParaTest, ParaRead)
     EXPECT_EQ(param.inp.nocc, param.inp.nbands);
     EXPECT_EQ(param.inp.nvirt, 1);
     EXPECT_EQ(param.inp.xc_kernel, "LDA");
+    EXPECT_EQ(param.inp.lr_init_xc_kernel[0], "default");
     EXPECT_EQ(param.inp.lr_solver, "dav");
     EXPECT_DOUBLE_EQ(param.inp.lr_thr, 1e-2);
     EXPECT_FALSE(param.inp.lr_unrestricted);
diff --git a/source/module_lr/CMakeLists.txt b/source/module_lr/CMakeLists.txt
index 4b816f09b3..a672e05c92 100644
--- a/source/module_lr/CMakeLists.txt
+++ b/source/module_lr/CMakeLists.txt
@@ -17,13 +17,8 @@ if(ENABLE_LCAO)
     potentials/pot_hxc_lrtd.cpp
     lr_spectrum.cpp
     hamilt_casida.cpp
-    esolver_lrtd_lcao.cpp)
-
-    if(ENABLE_LIBXC)
-      list(APPEND objects
-        potentials/kernel_xc.cpp
-      )
-    endif()
+    esolver_lrtd_lcao.cpp
+    potentials/xc_kernel.cpp)
 
 add_library(
     lr
diff --git a/source/module_lr/esolver_lrtd_lcao.cpp b/source/module_lr/esolver_lrtd_lcao.cpp
index aca88c38ae..6eecea6b4d 100644
--- a/source/module_lr/esolver_lrtd_lcao.cpp
+++ b/source/module_lr/esolver_lrtd_lcao.cpp
@@ -65,8 +65,8 @@ inline int cal_nupdown_form_occ(const ModuleBase::matrix& wg)
 template<typename T, typename TR>
 void LR::ESolver_LR<T, TR>::parameter_check()const
 {
-    std::set<std::string> lr_solvers = { "dav", "lapack" , "spectrum", "dav_subspace", "cg" };
-    std::set<std::string> xc_kernels = { "rpa", "lda", "pbe", "hf" , "hse" };
+    const std::set<std::string> lr_solvers = { "dav", "lapack" , "spectrum", "dav_subspace", "cg" };
+    const std::set<std::string> xc_kernels = { "rpa", "lda", "pwlda", "pbe", "hf" , "hse" };
     if (lr_solvers.find(this->input.lr_solver) == lr_solvers.end()) {
         throw std::invalid_argument("ESolver_LR: unknown type of lr_solver");
 }
@@ -104,7 +104,13 @@ void LR::ESolver_LR<T, TR>::set_dimension()
     GlobalV::ofs_running << "number of excited states to be solved: " << this->nstates << std::endl;
     if (input.ri_hartree_benchmark == "aims" && !input.aims_nbasis.empty())
     {
-        this->nbasis = [&]() -> int { int nbas = 0; for (int it = 0;it < ucell.ntype;++it) { nbas += ucell.atoms[it].na * input.aims_nbasis[it]; };return nbas;}();
+        // calculate total number of basis funcs, see https://en.cppreference.com/w/cpp/algorithm/inner_product
+        this->nbasis = std::inner_product(input.aims_nbasis.begin(), /* iterator1.begin */
+                                          input.aims_nbasis.end(),  /* iterator1.end */
+                                          ucell.atoms,  /* iterator2.begin */
+                                          0,  /* init value */
+                                          std::plus<int>(), /* iter op1 */
+                                          [](const int& a, const Atom& b) { return a * b.na; }); /* iter op2 */
         std::cout << "nbasis from aims: " << this->nbasis << std::endl;
     }
 }
@@ -592,11 +598,11 @@ void LR::ESolver_LR<T, TR>::init_pot(const Charge& chg_gs)
     {
         using ST = PotHxcLR::SpinType;
     case 1:
-        this->pot[0] = std::make_shared<PotHxcLR>(xc_kernel, this->pw_rho, &ucell, &chg_gs, ST::S1);
+        this->pot[0] = std::make_shared<PotHxcLR>(xc_kernel, *this->pw_rho, ucell, chg_gs, GlobalC::Pgrid, ST::S1, input.lr_init_xc_kernel);
         break;
     case 2:
-        this->pot[0] = std::make_shared<PotHxcLR>(xc_kernel, this->pw_rho, &ucell, &chg_gs, openshell ? ST::S2_updown : ST::S2_singlet);
-        this->pot[1] = std::make_shared<PotHxcLR>(xc_kernel, this->pw_rho, &ucell, &chg_gs, openshell ? ST::S2_updown : ST::S2_triplet);
+        this->pot[0] = std::make_shared<PotHxcLR>(xc_kernel, *this->pw_rho, ucell, chg_gs, GlobalC::Pgrid, openshell ? ST::S2_updown : ST::S2_singlet, input.lr_init_xc_kernel);
+        this->pot[1] = std::make_shared<PotHxcLR>(xc_kernel, *this->pw_rho, ucell, chg_gs, GlobalC::Pgrid, openshell ? ST::S2_updown : ST::S2_triplet, input.lr_init_xc_kernel);
         break;
     default:
         throw std::invalid_argument("ESolver_LR: nspin must be 1 or 2");
diff --git a/source/module_lr/lr_spectrum.cpp b/source/module_lr/lr_spectrum.cpp
index 5cd9acfdb3..e38d81609a 100644
--- a/source/module_lr/lr_spectrum.cpp
+++ b/source/module_lr/lr_spectrum.cpp
@@ -53,7 +53,7 @@ void LR::LR_Spectrum<double>::oscillator_strength(Grid_Driver& gd, const std::ve
 
             // 2. transition density
             double** rho_trans;
-            LR_Util::new_p2(rho_trans, 1, this->rho_basis.nrxx);
+            LR_Util::_allocate_2order_nested_ptr(rho_trans, 1, this->rho_basis.nrxx);
             this->cal_gint_rho(rho_trans, this->rho_basis.nrxx);
 
             // 3. transition dipole moment
@@ -67,7 +67,7 @@ void LR::LR_Spectrum<double>::oscillator_strength(Grid_Driver& gd, const std::ve
                 ModuleBase::Vector3<double> rc = rd * ucell.latvec * ucell.lat0; // real coordinate
                 transition_dipole_[istate] += rc * rho_trans[0][ir];
             }
-            LR_Util::delete_p2(rho_trans, 1);
+            LR_Util::_deallocate_2order_nested_ptr(rho_trans, 1);
         }
         transition_dipole_[istate] *= (ucell.omega / static_cast<double>(gint->get_ncxyz()));   // dv
         Parallel_Reduce::reduce_all(transition_dipole_[istate].x);
@@ -114,8 +114,8 @@ void LR::LR_Spectrum<std::complex<double>>::oscillator_strength(Grid_Driver& gd,
             // 2. transition density
             double** rho_trans_real;
             double** rho_trans_imag;
-            LR_Util::new_p2(rho_trans_real, 1, this->rho_basis.nrxx);
-            LR_Util::new_p2(rho_trans_imag, 1, this->rho_basis.nrxx);
+            LR_Util::_allocate_2order_nested_ptr(rho_trans_real, 1, this->rho_basis.nrxx);
+            LR_Util::_allocate_2order_nested_ptr(rho_trans_imag, 1, this->rho_basis.nrxx);
             // real part
             LR_Util::get_DMR_real_imag_part(DM_trans, DM_trans_real_imag, ucell.nat, 'R');
             this->gint->transfer_DM2DtoGrid(DM_trans_real_imag.get_DMR_vector());
@@ -140,8 +140,8 @@ void LR::LR_Spectrum<std::complex<double>>::oscillator_strength(Grid_Driver& gd,
                 ModuleBase::Vector3<std::complex<double>> rc_complex(rc.x, rc.y, rc.z);
                 transition_dipole_[istate] += rc_complex * std::complex<double>(rho_trans_real[0][ir], rho_trans_imag[0][ir]);
             }
-            LR_Util::delete_p2(rho_trans_real, 1);
-            LR_Util::delete_p2(rho_trans_imag, 1);
+            LR_Util::_deallocate_2order_nested_ptr(rho_trans_real, 1);
+            LR_Util::_deallocate_2order_nested_ptr(rho_trans_imag, 1);
         }
         transition_dipole_[istate] *= (ucell.omega / static_cast<double>(gint->get_ncxyz()));   // dv
         Parallel_Reduce::reduce_all(transition_dipole_[istate].x);
diff --git a/source/module_lr/operator_casida/operator_lr_hxc.cpp b/source/module_lr/operator_casida/operator_lr_hxc.cpp
index 31e64a9acf..890b03398a 100644
--- a/source/module_lr/operator_casida/operator_lr_hxc.cpp
+++ b/source/module_lr/operator_casida/operator_lr_hxc.cpp
@@ -61,15 +61,15 @@ namespace LR
         // \f[ \tilde{\rho}(r)=\sum_{\mu_j, \mu_b}\tilde{\rho}_{\mu_j,\mu_b}\phi_{\mu_b}(r)\phi_{\mu_j}(r) \f]
         double** rho_trans;
         const int& nrxx = this->pot.lock()->nrxx;
-        LR_Util::new_p2(rho_trans, 1, nrxx); // currently gint_kernel_rho uses PARAM.inp.nspin, it needs refactor
+        LR_Util::_allocate_2order_nested_ptr(rho_trans, 1, nrxx); // currently gint_kernel_rho uses PARAM.inp.nspin, it needs refactor
         ModuleBase::GlobalFunc::ZEROS(rho_trans[0], nrxx);
         Gint_inout inout_rho(rho_trans, Gint_Tools::job_type::rho, 1, false);
         this->gint->cal_gint(&inout_rho);
 
         // 3. v_hxc = f_hxc * rho_trans
         ModuleBase::matrix vr_hxc(1, nrxx);   //grid
-        this->pot.lock()->cal_v_eff(rho_trans, &GlobalC::ucell, vr_hxc, ispin_ks);
-        LR_Util::delete_p2(rho_trans, 1);
+        this->pot.lock()->cal_v_eff(rho_trans, GlobalC::ucell, vr_hxc, ispin_ks);
+        LR_Util::_deallocate_2order_nested_ptr(rho_trans, 1);
 
         // 4. V^{Hxc}_{\mu,\nu}=\int{dr} \phi_\mu(r) v_{Hxc}(r) \phi_\mu(r)
         Gint_inout inout_vlocal(vr_hxc.c, 0, Gint_Tools::job_type::vlocal);
@@ -103,7 +103,7 @@ namespace LR
                 double** rho_trans;
                 const int& nrxx = this->pot.lock()->nrxx;
 
-                LR_Util::new_p2(rho_trans, 1, nrxx); // nspin=1 for transition density
+                LR_Util::_allocate_2order_nested_ptr(rho_trans, 1, nrxx); // nspin=1 for transition density
                 ModuleBase::GlobalFunc::ZEROS(rho_trans[0], nrxx);
                 Gint_inout inout_rho(rho_trans, Gint_Tools::job_type::rho, 1, false);
                 this->gint->cal_gint(&inout_rho);
@@ -111,10 +111,10 @@ namespace LR
 
                 // 3. v_hxc = f_hxc * rho_trans
                 ModuleBase::matrix vr_hxc(1, nrxx);   //grid
-                this->pot.lock()->cal_v_eff(rho_trans, &GlobalC::ucell, vr_hxc, ispin_ks);
+                this->pot.lock()->cal_v_eff(rho_trans, GlobalC::ucell, vr_hxc, ispin_ks);
                 // print_grid_nonzero(vr_hxc.c, this->poticab->nrxx, 10, "vr_hxc");
 
-                LR_Util::delete_p2(rho_trans, 1);
+                LR_Util::_deallocate_2order_nested_ptr(rho_trans, 1);
 
                 // 4. V^{Hxc}_{\mu,\nu}=\int{dr} \phi_\mu(r) v_{Hxc}(r) \phi_\mu(r)
                 Gint_inout inout_vlocal(vr_hxc.c, 0, Gint_Tools::job_type::vlocal);
diff --git a/source/module_lr/potentials/kernel.h b/source/module_lr/potentials/kernel.h
deleted file mode 100644
index 67ab8d0868..0000000000
--- a/source/module_lr/potentials/kernel.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#pragma once
-#include "module_basis/module_pw/pw_basis.h"
-#include "module_elecstate/module_charge/charge.h"
-#include "module_cell/unitcell.h"
-// #include <ATen/tensor.h>
-
-namespace LR
-{
-    class KernelXC
-    {
-    public:
-        KernelXC() {};
-        ~KernelXC() {};
-
-        // xc kernel for LR-TDDFT
-#ifdef USE_LIBXC
-        void f_xc_libxc(const int& nspin, const double& omega, const double& tpiba, const Charge* chg_gs);
-#endif
-
-        const std::vector<double>& get_kernel(const std::string& name) { return kernel_set_[name]; }
-        const std::vector<ModuleBase::Vector3<double>>& get_grad_kernel(const std::string& name) { return grad_kernel_set_[name]; }
-
-    protected:
-
-        const ModulePW::PW_Basis* rho_basis_ = nullptr;
-        std::map<std::string, std::vector<double>> kernel_set_; // [kernel_type][nrxx][nspin]
-        std::map<std::string, std::vector<ModuleBase::Vector3<double>>> grad_kernel_set_;// [kernel_type][nrxx][nspin],  intermediate terms for GGA
-    };
-}
-
diff --git a/source/module_lr/potentials/kernel_xc.cpp b/source/module_lr/potentials/kernel_xc.cpp
deleted file mode 100644
index dee51ef3c5..0000000000
--- a/source/module_lr/potentials/kernel_xc.cpp
+++ /dev/null
@@ -1,234 +0,0 @@
-#include "kernel.h"
-#include "module_hamilt_general/module_xc/xc_functional.h"
-#include "module_parameter/parameter.h"
-#include "module_base/timer.h"
-#include "module_lr/utils/lr_util.h"
-#ifdef USE_LIBXC
-#include <xc.h>
-#include "module_hamilt_general/module_xc/xc_functional_libxc.h"
-
-void LR::KernelXC::f_xc_libxc(const int& nspin, const double& omega, const double& tpiba, const Charge* chg_gs)
-{
-    ModuleBase::TITLE("XC_Functional", "f_xc_libxc");
-    ModuleBase::timer::tick("XC_Functional", "f_xc_libxc");
-    // https://www.tddft.org/programs/libxc/manual/libxc-5.1.x/
-
-    std::vector<xc_func_type> funcs = XC_Functional_Libxc::init_func(
-        XC_Functional::get_func_id(), 
-        (1 == nspin) ? XC_UNPOLARIZED : XC_POLARIZED);
-    int nrxx = chg_gs->nrxx;
-
-    // converting rho (extract it as a subfuntion in the future)
-    // -----------------------------------------------------------------------------------
-    std::vector<double> rho(nspin * nrxx);    // r major / spin contigous
-    std::vector<double> amag;
-    if (1 == nspin || 2 == nspin)
-    {
-#ifdef _OPENMP
-#pragma omp parallel for collapse(2) schedule(static, 1024)
-#endif
-        for (int is = 0; is < nspin; ++is)
-            for (int ir = 0; ir < nrxx; ++ir)
-                rho[ir * nspin + is] = chg_gs->rho[is][ir] + 1.0 / nspin * chg_gs->rho_core[ir];
-    }
-    else
-    {
-        amag.resize(nrxx);
-#ifdef _OPENMP
-#pragma omp parallel for
-#endif
-        for (int ir = 0; ir < nrxx; ++ir)
-        {
-            const double arhox = std::abs(chg_gs->rho[0][ir] + chg_gs->rho_core[ir]);
-            amag[ir] = std::sqrt(std::pow(chg_gs->rho[1][ir], 2) + std::pow(chg_gs->rho[2][ir], 2) + std::pow(chg_gs->rho[3][ir], 2));
-            const double amag_clip = (amag[ir] < arhox) ? amag[ir] : arhox;
-            rho[ir * nspin + 0] = (arhox + amag_clip) / 2.0;
-            rho[ir * nspin + 1] = (arhox - amag_clip) / 2.0;
-        }
-    }
-
-    // -----------------------------------------------------------------------------------
-    // for GGA
-    const bool is_gga = [&funcs]()
-        {
-            for (xc_func_type& func : funcs)
-            {
-                switch (func.info->family)
-                {
-                case XC_FAMILY_GGA:
-                case XC_FAMILY_HYB_GGA:
-                    return true;
-                }
-            }
-            return false;
-        }();
-        std::vector<std::vector<ModuleBase::Vector3<double>>> gdr;  // \nabla \rho
-        std::vector<double> sigma;  // |\nabla\rho|^2
-        std::vector<double> sgn;        // sgn for threshold mask
-        if (is_gga)
-        {
-            // 0. set up sgn for threshold mask
-            // in the case of GGA correlation for polarized case,
-            // a cutoff for grho is required to ensure that libxc gives reasonable results
-            
-            // 1. \nabla \rho
-            gdr.resize(nspin);
-            for (int is = 0; is < nspin; ++is)
-            {
-                std::vector<double> rhor(nrxx);
-#ifdef _OPENMP
-#pragma omp parallel for schedule(static, 1024)
-#endif
-                for (int ir = 0; ir < nrxx; ++ir) rhor[ir] = rho[ir * nspin + is];
-                gdr[is].resize(nrxx);
-                LR_Util::grad(rhor.data(), gdr[is].data(), *(chg_gs->rhopw), tpiba);
-            }
-            // 2. |\nabla\rho|^2
-            sigma.resize(nrxx * ((1 == nspin) ? 1 : 3));
-            if (1 == nspin)
-            {
-#ifdef _OPENMP
-#pragma omp parallel for schedule(static, 1024)
-#endif
-                for (int ir = 0; ir < nrxx; ++ir)
-                    sigma[ir] = gdr[0][ir] * gdr[0][ir];
-            }
-            else
-            {
-#ifdef _OPENMP
-#pragma omp parallel for schedule(static, 256)
-#endif
-                for (int ir = 0; ir < nrxx; ++ir)
-                {
-                    sigma[ir * 3] = gdr[0][ir] * gdr[0][ir];
-                    sigma[ir * 3 + 1] = gdr[0][ir] * gdr[1][ir];
-                    sigma[ir * 3 + 2] = gdr[1][ir] * gdr[1][ir];
-                }
-            }
-        }
-        // -----------------------------------------------------------------------------------
-        //==================== XC Kernels (f_xc)=============================
-        this->kernel_set_.emplace("vrho", std::vector<double>(nspin * nrxx));
-        this->kernel_set_.emplace("v2rho2", std::vector<double>(((1 == nspin) ? 1 : 3) * nrxx));//(nrxx* ((1 == nspin) ? 1 : 3)): 00, 01, 11
-        if (is_gga)
-        {
-            this->kernel_set_.emplace("vsigma", std::vector<double>(((1 == nspin) ? 1 : 3) * nrxx)); //(nrxx*): 2 for rho * 3 for sigma: 00, 01, 02, 10, 11, 12
-            this->kernel_set_.emplace("v2rhosigma", std::vector<double>(((1 == nspin) ? 1 : 6) * nrxx)); //(nrxx*): 2 for rho * 3 for sigma: 00, 01, 02, 10, 11, 12
-            this->kernel_set_.emplace("v2sigma2", std::vector<double>(((1 == nspin) ? 1 : 6) * nrxx));   //(nrxx* ((1 == nspin) ? 1 : 6)): 00, 01, 02, 11, 12, 22
-        }
-        //MetaGGA ...
-
-        for (xc_func_type& func : funcs)
-        {
-            const double rho_threshold = 1E-6;
-            const double grho_threshold = 1E-10;
-
-            xc_func_set_dens_threshold(&func, rho_threshold);
-
-            //cut off grho if not LDA (future subfunc)
-
-            switch (func.info->family)
-            {
-            case XC_FAMILY_LDA:
-                xc_lda_vxc(&func, nrxx, rho.data(), this->kernel_set_["vrho"].data());
-                xc_lda_fxc(&func, nrxx, rho.data(), this->kernel_set_["v2rho2"].data());
-                break;
-            case XC_FAMILY_GGA:
-            case XC_FAMILY_HYB_GGA:
-                xc_gga_vxc(&func, nrxx, rho.data(), sigma.data(),
-                    this->kernel_set_["vrho"].data(), this->kernel_set_["vsigma"].data());
-                xc_gga_fxc(&func, nrxx, rho.data(), sigma.data(),
-                    this->kernel_set_["v2rho2"].data(), this->kernel_set_["v2rhosigma"].data(), this->kernel_set_["v2sigma2"].data());
-                break;
-            default:
-                throw std::domain_error("func.info->family =" + std::to_string(func.info->family)
-                    + " unfinished in " + std::string(__FILE__) + " line " + std::to_string(__LINE__));
-                break;
-            }
-            // some formulas for GGA
-            if (func.info->family == XC_FAMILY_GGA || func.info->family == XC_FAMILY_HYB_GGA)
-            {
-                const std::vector<double>& v2r2 = this->kernel_set_["v2rho2"];
-                const std::vector<double>& v2rs = this->kernel_set_["v2rhosigma"];
-                const std::vector<double>& v2s2 = this->kernel_set_["v2sigma2"];
-                const std::vector<double>& vs = this->kernel_set_["vsigma"];
-                const double tpiba2 = tpiba * tpiba;
-
-                if (1 == nspin)
-                {
-                    // 0. drho
-                    this->grad_kernel_set_.emplace("drho_gs", std::vector<ModuleBase::Vector3<double>>(nrxx));
-                    for (int ir = 0; ir < nrxx; ++ir)this->grad_kernel_set_["drho_gs"][ir] = gdr[0][ir];
-                    // 1. $2f^{\rho\sigma}*\nabla\rho$
-                    this->grad_kernel_set_.emplace("2_v2rhosigma_drho", std::vector<ModuleBase::Vector3<double>>(nrxx));
-                    for (int ir = 0; ir < nrxx; ++ir)this->grad_kernel_set_["2_v2rhosigma_drho"][ir] = gdr[0][ir] * v2rs.at(ir) * 2.;
-                    // 2. $4f^{\sigma\sigma}*\nabla\rho$
-                    this->grad_kernel_set_.emplace("4_v2sigma2_drho", std::vector<ModuleBase::Vector3<double>>(nrxx));
-                    for (int ir = 0; ir < nrxx; ++ir)this->grad_kernel_set_["4_v2sigma2_drho"][ir] = sigma.at(ir) * v2s2.at(ir) * 4.;
-                }
-                // else if (2 == nspin)    // wrong, to be fixed
-                // {
-                //     // 1. $\nabla\cdot(f^{\rho\sigma}*\nabla\rho)$
-                //     std::vector<double> div_v2rhosigma_gdrho_r(3 * nrxx);
-                //     std::vector<ModuleBase::Vector3<double>> v2rhosigma_gdrho_r(3 * nrxx);
-                //     for (int ir = 0; ir < nrxx; ++ir)
-                //     {
-                //         v2rhosigma_gdrho_r[ir] = gdr[0][ir] * v2rs.at(ir * 6) * 4.0
-                //             + gdr[1][ir] * v2rs.at(ir * 6 + 1) * 2.0;   //up-up
-                //         v2rhosigma_gdrho_r[nrxx + ir] = gdr[0][ir] * (v2rs.at(ir * 6 + 3) * 2.0 + v2rs.at(ir * 6 + 1))
-                //             + gdr[1][ir] * (v2rs.at(ir * 6 + 2) * 2.0 + v2rs.at(ir * 6 + 4));   //up-down
-                //         v2rhosigma_gdrho_r[2 * nrxx + ir] = gdr[1][ir] * v2rs.at(ir * 6 + 5) * 4.0
-                //             + gdr[0][ir] * v2rs.at(ir * 6 + 4) * 2.0;   //down-down
-                //     }
-                //     for (int isig = 0;isig < 3;++isig)
-                //         XC_Functional::grad_dot(v2rhosigma_gdrho_r.data() + isig * nrxx, div_v2rhosigma_gdrho_r.data() + isig * nrxx, chg_gs->rhopw, tpiba);
-                //     // 2. $\nabla^2(f^{\sigma\sigma}*\sigma)$
-                //     std::vector<double> v2sigma2_sigma_r(3 * nrxx);
-                //     for (int ir = 0; ir < nrxx; ++ir)
-                //     {
-                //         v2sigma2_sigma_r[ir] = v2s2.at(ir * 6) * sigma[ir * 3] * 4.0
-                //             + v2s2.at(ir * 6 + 1) * sigma[ir * 3 + 1] * 4.0
-                //             + v2s2.at(ir * 6 + 3) * sigma[ir * 3 + 2];   //up-up
-                //         v2sigma2_sigma_r[nrxx + ir] = v2s2.at(ir * 6 + 1) * sigma[ir * 3] * 2.0
-                //             + v2s2.at(ir * 6 + 4) * sigma[ir * 3 + 2] * 2.0
-                //             + (v2s2.at(ir * 6 + 2) * 4.0 + v2s2.at(ir * 6 + 3)) * sigma[ir * 3 + 1];   //up-down
-                //         v2sigma2_sigma_r[2 * nrxx + ir] = v2s2.at(ir * 6 + 5) * sigma[ir * 3 + 2] * 4.0
-                //             + v2s2.at(ir * 6 + 4) * sigma[ir * 3 + 1] * 4.0
-                //             + v2s2.at(ir * 6 + 3) * sigma[ir * 3];   //down-down
-                //     }
-                //     for (int isig = 0;isig < 3;++isig)
-                //         LR_Util::laplace(v2sigma2_sigma_r.data() + isig * nrxx, v2sigma2_sigma_r.data() + isig * nrxx, *(chg_gs->rhopw), tpiba2);
-                //     // 3. $\nabla^2(v^\sigma)$
-                //     std::vector<double> lap_vsigma(3 * nrxx);
-                //     for (int ir = 0;ir < nrxx;++ir)
-                //     {
-                //         lap_vsigma[ir] = vs.at(ir * 3) * 2.0;
-                //         lap_vsigma[nrxx + ir] = vs.at(ir * 3 + 1);
-                //         lap_vsigma[2 * nrxx + ir] = vs.at(ir * 3 + 2) * 2.0;
-                //     }
-                //     for (int isig = 0;isig < 3;++isig)
-                //         LR_Util::laplace(lap_vsigma.data() + isig * nrxx, lap_vsigma.data() + isig * nrxx, *(chg_gs->rhopw), tpiba2);
-                //     // add to v2rho2
-                //     BlasConnector::axpy(3 * nrxx, 1.0, v2r2.data(), 1, to_mul_rho_.data(), 1);
-                //     BlasConnector::axpy(3 * nrxx, -1.0, div_v2rhosigma_gdrho_r.data(), 1, to_mul_rho_.data(), 1);
-                //     BlasConnector::axpy(3 * nrxx, 1.0, v2sigma2_sigma_r.data(), 1, to_mul_rho_.data(), 1);
-                //     BlasConnector::axpy(3 * nrxx, 1.0, lap_vsigma.data(), 1, to_mul_rho_.data(), 1);
-                // }
-                else
-                {
-                    throw std::domain_error("nspin =" + std::to_string(nspin)
-                        + " unfinished in " + std::string(__FILE__) + " line " + std::to_string(__LINE__));
-                }
-            }
-        } // end for( xc_func_type &func : funcs )
-        XC_Functional_Libxc::finish_func(funcs);
-
-        if (1 == PARAM.inp.nspin || 2 == PARAM.inp.nspin) return;
-        // else if (4 == PARAM.inp.nspin)
-        else//NSPIN != 1,2,4 is not supported
-        {
-            throw std::domain_error("PARAM.inp.nspin =" + std::to_string(PARAM.inp.nspin)
-                + " unfinished in " + std::string(__FILE__) + " line " + std::to_string(__LINE__));
-        }
-}
-#endif
\ No newline at end of file
diff --git a/source/module_lr/potentials/pot_hxc_lrtd.cpp b/source/module_lr/potentials/pot_hxc_lrtd.cpp
index ebdb8747c5..1508b87301 100644
--- a/source/module_lr/potentials/pot_hxc_lrtd.cpp
+++ b/source/module_lr/potentials/pot_hxc_lrtd.cpp
@@ -1,40 +1,29 @@
 #include "pot_hxc_lrtd.h"
-#include "module_elecstate/potentials/pot_base.h"
 #include "module_parameter/parameter.h"
 #include "module_elecstate/potentials/H_Hartree_pw.h"
 #include "module_base/timer.h"
 #include "module_hamilt_general/module_xc/xc_functional.h"
 #include <set>
 #include "module_lr/utils/lr_util.h"
-
+#include "module_lr/utils/lr_util_xc.hpp"
+#include "module_hamilt_pw/hamilt_pwdft/global.h"    // tmp, for pgrid
 #define FXC_PARA_TYPE const double* const rho, ModuleBase::matrix& v_eff, const std::vector<int>& ispin_op = { 0,0 }
 namespace LR
 {
     // constructor for exchange-correlation kernel
-    PotHxcLR::PotHxcLR(const std::string& xc_kernel_in, const ModulePW::PW_Basis* rho_basis_in, const UnitCell* ucell_in,
-        const Charge* chg_gs/*ground state*/, const SpinType& st_in)
-        :xc_kernel(xc_kernel_in), tpiba_(ucell_in->tpiba), spin_type_(st_in)
+    PotHxcLR::PotHxcLR(const std::string& xc_kernel, const ModulePW::PW_Basis& rho_basis, const UnitCell& ucell,
+        const Charge& chg_gs/*ground state*/, const Parallel_Grid& pgrid,
+        const SpinType& st, const std::vector<std::string>& lr_init_xc_kernel)
+        :xc_kernel_(xc_kernel), tpiba_(ucell.tpiba), spin_type_(st), rho_basis_(rho_basis), nrxx_(chg_gs.nrxx),
+        nspin_(PARAM.inp.nspin == 1 || (PARAM.inp.nspin == 4 && !PARAM.globalv.domag && !PARAM.globalv.domag_z) ? 1 : 2),
+        pot_hartree_(LR_Util::make_unique<elecstate::PotHartree>(&rho_basis)),
+        xc_kernel_components_(rho_basis, ucell, chg_gs, pgrid, nspin_, xc_kernel, lr_init_xc_kernel), //call XC_Functional::set_func_type and libxc
+        xc_type_(XCType(XC_Functional::get_func_type()))
     {
-        this->rho_basis_ = rho_basis_in;
-        this->nrxx = chg_gs->nrxx;
-        this->nspin = (PARAM.inp.nspin == 1 || (PARAM.inp.nspin == 4 && !PARAM.globalv.domag && !PARAM.globalv.domag_z)) ? 1 : 2;
-
-        this->pot_hartree = LR_Util::make_unique<elecstate::PotHartree>(this->rho_basis_);
-        std::set<std::string> local_xc = { "lda", "pbe", "hse" };
-        if (local_xc.find(this->xc_kernel) != local_xc.end())
-        {
-            XC_Functional::set_xc_type(this->xc_kernel);    // for hse, (1-alpha) and omega are set here
-            this->xc_type_ = XCType(XC_Functional::get_func_type());
-#ifdef USE_LIBXC
-            this->set_integral_func(this->spin_type_, this->xc_type_);
-            this->xc_kernel_components_.f_xc_libxc(nspin, ucell_in->omega, ucell_in->tpiba, chg_gs);
-#else 
-            ModuleBase::WARNING_QUIT("KernelXC", "to calculate xc-kernel in LR-TDDFT, compile with LIBXC");
-#endif
-        }
+        if (std::set<std::string>({ "lda", "pwlda", "pbe", "hse" }).count(xc_kernel)) { this->set_integral_func(this->spin_type_, this->xc_type_); }
     }
 
-    void PotHxcLR::cal_v_eff(double** rho, const UnitCell* ucell, ModuleBase::matrix& v_eff, const std::vector<int>& ispin_op)
+    void PotHxcLR::cal_v_eff(double** rho, const UnitCell& ucell, ModuleBase::matrix& v_eff, const std::vector<int>& ispin_op)
     {
         ModuleBase::TITLE("PotHxcLR", "cal_v_eff");
         ModuleBase::timer::tick("PotHxcLR", "cal_v_eff");
@@ -44,16 +33,16 @@ namespace LR
         switch (this->spin_type_)
         {
         case SpinType::S1: case SpinType::S2_updown:
-            v_eff += elecstate::H_Hartree_pw::v_hartree(*ucell, const_cast<ModulePW::PW_Basis*>(this->rho_basis_), 1, rho);
+            v_eff += elecstate::H_Hartree_pw::v_hartree(ucell, const_cast<ModulePW::PW_Basis*>(&this->rho_basis_), 1, rho);
             break;
         case SpinType::S2_singlet:
-            v_eff += 2 * elecstate::H_Hartree_pw::v_hartree(*ucell, const_cast<ModulePW::PW_Basis*>(this->rho_basis_), 1, rho);
+            v_eff += 2 * elecstate::H_Hartree_pw::v_hartree(ucell, const_cast<ModulePW::PW_Basis*>(&this->rho_basis_), 1, rho);
             break;
         default:
             break;
         }
         // XC
-        if (xc_kernel == "rpa" || xc_kernel == "hf") { return; }    // no xc
+        if (this->xc_kernel_ == "rpa" || this->xc_kernel_ == "hf") { return; }    // no xc
 #ifdef USE_LIBXC
         this->kernel_to_potential_[spin_type_](rho[0], v_eff, ispin_op);
 #else
@@ -63,17 +52,16 @@ namespace LR
         ModuleBase::timer::tick("PotHxcLR", "cal_v_eff");
     }
 
-#ifdef USE_LIBXC
     void PotHxcLR::set_integral_func(const SpinType& s, const XCType& xc)
     {
         auto& funcs = this->kernel_to_potential_;
         auto& fxc = this->xc_kernel_components_;
-        if (xc == XCType::LDA) switch (s)
+        if (xc == XCType::LDA) { switch (s)
         {
         case SpinType::S1:
             funcs[s] = [this, &fxc](FXC_PARA_TYPE)->void
                 {
-                    for (int ir = 0;ir < nrxx;++ir) { v_eff(0, ir) += ModuleBase::e2 * fxc.get_kernel("v2rho2").at(ir) * rho[ir]; }
+                    for (int ir = 0;ir < nrxx;++ir) { v_eff(0, ir) += ModuleBase::e2 * fxc.v2rho2.at(ir) * rho[ir]; }
                 };
             break;
         case SpinType::S2_singlet:
@@ -83,7 +71,7 @@ namespace LR
                     {
                         const int irs0 = 3 * ir;
                         const int irs1 = irs0 + 1;
-                        v_eff(0, ir) += ModuleBase::e2 * (fxc.get_kernel("v2rho2").at(irs0) + fxc.get_kernel("v2rho2").at(irs1)) * rho[ir];
+                        v_eff(0, ir) += ModuleBase::e2 * (fxc.v2rho2.at(irs0) + fxc.v2rho2.at(irs1)) * rho[ir];
                     }
                 };
             break;
@@ -94,7 +82,7 @@ namespace LR
                     {
                         const int irs0 = 3 * ir;
                         const int irs1 = irs0 + 1;
-                        v_eff(0, ir) += ModuleBase::e2 * (fxc.get_kernel("v2rho2").at(irs0) - fxc.get_kernel("v2rho2").at(irs1)) * rho[ir];
+                        v_eff(0, ir) += ModuleBase::e2 * (fxc.v2rho2.at(irs0) - fxc.v2rho2.at(irs1)) * rho[ir];
                     }
                 };
             break;
@@ -103,7 +91,7 @@ namespace LR
                 {
                     assert(ispin_op.size() >= 2);
                     const int is = ispin_op[0] + ispin_op[1];
-                    for (int ir = 0;ir < nrxx;++ir) { v_eff(0, ir) += ModuleBase::e2 * fxc.get_kernel("v2rho2").at(3 * ir + is) * rho[ir]; }
+                    for (int ir = 0;ir < nrxx;++ir) { v_eff(0, ir) += ModuleBase::e2 * fxc.v2rho2.at(3 * ir + is) * rho[ir]; }
                 };
             break;
         default:
@@ -111,7 +99,7 @@ namespace LR
                 + " unfinished in " + std::string(__FILE__) + " line " + std::to_string(__LINE__));
             break;
         }
-        else if (xc == XCType::GGA || xc == XCType::HYB_GGA) switch (s)
+        } else if (xc == XCType::GGA || xc == XCType::HYB_GGA) { switch (s)
         {
         case SpinType::S1:
             funcs[s] = [this, &fxc](FXC_PARA_TYPE)->void
@@ -130,7 +118,7 @@ namespace LR
                     //     std::cout << std::endl;};
 
                     std::vector<ModuleBase::Vector3<double>> drho(nrxx);    // transition density gradient
-                    LR_Util::grad(rho, drho.data(), *(this->rho_basis_), this->tpiba_);
+                    LR_Util::grad(rho, drho.data(), this->rho_basis_, this->tpiba_);
 
                     std::vector<double> vxc_tmp(nrxx, 0.0);
 
@@ -138,17 +126,17 @@ namespace LR
                     std::vector<ModuleBase::Vector3<double>> e_drho(nrxx);
                     for (int ir = 0;ir < nrxx;++ir)
                     {
-                        e_drho[ir] = -(fxc.get_grad_kernel("2_v2rhosigma_drho").at(ir) * rho[ir]
-                            + fxc.get_grad_kernel("4_v2sigma2_drho").at(ir) * (fxc.get_grad_kernel("drho_gs").at(ir) * drho.at(ir))
-                            + drho.at(ir) * fxc.get_kernel("vsigma").at(ir) * 2.);
+                        e_drho[ir] = -(fxc.v2rhosigma_2drho.at(ir) * rho[ir]
+                            + fxc.v2sigma2_4drho.at(ir) * (fxc.drho_gs.at(ir) * drho.at(ir))
+                            + drho.at(ir) * fxc.vsigma.at(ir) * 2.);
                     }
-                    XC_Functional::grad_dot(e_drho.data(), vxc_tmp.data(), this->rho_basis_, this->tpiba_);
+                    XC_Functional::grad_dot(e_drho.data(), vxc_tmp.data(), &this->rho_basis_, this->tpiba_);
 
                     // 2. $f^{\rho\rho}\rho_1+2f^{\rho\sigma}\nabla\rho\cdot\nabla\rho_1$
                     for (int ir = 0;ir < nrxx;++ir)
                     {
-                        vxc_tmp[ir] += (fxc.get_kernel("v2rho2").at(ir) * rho[ir]
-                            + fxc.get_grad_kernel("2_v2rhosigma_drho").at(ir) * drho.at(ir));
+                        vxc_tmp[ir] += (fxc.v2rho2.at(ir) * rho[ir]
+                            + fxc.v2rhosigma_2drho.at(ir) * drho.at(ir));
                     }
                     BlasConnector::axpy(nrxx, ModuleBase::e2, vxc_tmp.data(), 1, v_eff.c, 1);
                 };
@@ -162,11 +150,10 @@ namespace LR
                 + std::string(__FILE__) + " line " + std::to_string(__LINE__));
             break;
         }
-        else
+        } else
         {
             throw std::domain_error("GlobalV::XC_Functional::get_func_type() =" + std::to_string(XC_Functional::get_func_type())
                 + " unfinished in " + std::string(__FILE__) + " line " + std::to_string(__LINE__));
         }
     }
-#endif
 } // namespace LR
diff --git a/source/module_lr/potentials/pot_hxc_lrtd.h b/source/module_lr/potentials/pot_hxc_lrtd.h
index a04ab42bb9..c165999ffb 100644
--- a/source/module_lr/potentials/pot_hxc_lrtd.h
+++ b/source/module_lr/potentials/pot_hxc_lrtd.h
@@ -1,35 +1,39 @@
 #pragma once
-#include "module_elecstate/potentials/pot_base.h"
 #include "module_elecstate/potentials/H_Hartree_pw.h"
-#include "kernel.h"
+#include "xc_kernel.h"
 #include <unordered_map>
 #include <memory>
+
 namespace LR
 {
-    class PotHxcLR : public elecstate::PotBase
+    class PotHxcLR
     {
     public:
         /// S1: K^Hartree + K^xc
         /// S2_singlet: 2*K^Hartree + K^xc_{upup} + K^xc_{updown}
         /// S2_triplet: K^xc_{upup} - K^xc_{updown}
+        /// S2_updown: K^Hartree + (K^xc_{upup}, K^xc_{updown},  K^xc_{downup} or K^xc_{downdown}), according to `ispin_op` (for spin-polarized systems)
         enum SpinType { S1 = 0, S2_singlet = 1, S2_triplet = 2, S2_updown = 3 };
+        /// XCType here is to determin the method of integration from kernel to potential, not the way calculating the kernel
         enum XCType { None = 0, LDA = 1, GGA = 2, HYB_GGA = 4 };
         /// constructor for exchange-correlation kernel
-        PotHxcLR(const std::string& xc_kernel_in, const ModulePW::PW_Basis* rho_basis_in, const UnitCell* ucell_in, const Charge* chg_gs/*ground state*/,
-            const SpinType& st_in = SpinType::S1);
+        PotHxcLR(const std::string& xc_kernel, const ModulePW::PW_Basis& rho_basis,
+            const UnitCell& ucell, const Charge& chg_gs/*ground state*/, const Parallel_Grid& pgrid,
+            const SpinType& st = SpinType::S1, const std::vector<std::string>& lr_init_xc_kernel = { "default" });
         ~PotHxcLR() {}
-        void cal_v_eff(const Charge* chg/*excited state*/, const UnitCell* ucell, ModuleBase::matrix& v_eff) override {};
-        void cal_v_eff(double** rho, const UnitCell* ucell, ModuleBase::matrix& v_eff, const std::vector<int>& ispin_op = { 0,0 });
-        int nrxx;
+        void cal_v_eff(double** rho, const UnitCell& ucell, ModuleBase::matrix& v_eff, const std::vector<int>& ispin_op = { 0,0 });
+        const int& nrxx = nrxx_;
     private:
-        int nspin;
-        std::unique_ptr<elecstate::PotHartree> pot_hartree;
+        const ModulePW::PW_Basis& rho_basis_;
+        const int nspin_ = 1;
+        const int nrxx_ = 1;
+        std::unique_ptr<elecstate::PotHartree> pot_hartree_;
         /// different components of local and semi-local xc kernels:
         /// LDA: v2rho2
         /// GGA: v2rho2, v2rhosigma, v2sigma2
         /// meta-GGA: v2rho2, v2rhosigma, v2sigma2, v2rholap, v2rhotau, v2sigmalap, v2sigmatau, v2laptau, v2lap2, v2tau2
-        KernelXC xc_kernel_components_;
-        const std::string xc_kernel;
+        const KernelXC xc_kernel_components_;
+        const std::string xc_kernel_;
         const double& tpiba_;
         const SpinType spin_type_ = SpinType::S1;
         XCType xc_type_ = XCType::None;
diff --git a/source/module_lr/potentials/xc_kernel.cpp b/source/module_lr/potentials/xc_kernel.cpp
new file mode 100644
index 0000000000..f5fa4f8fc8
--- /dev/null
+++ b/source/module_lr/potentials/xc_kernel.cpp
@@ -0,0 +1,270 @@
+#include "xc_kernel.h"
+#include "module_hamilt_general/module_xc/xc_functional.h"
+#include "module_parameter/parameter.h"
+#include "module_base/timer.h"
+#include "module_lr/utils/lr_util.h"
+#include "module_lr/utils/lr_util_xc.hpp"
+#include <set>
+#include "module_io/cube_io.h"
+#ifdef USE_LIBXC
+#include <xc.h>
+#include "module_hamilt_general/module_xc/xc_functional_libxc.h"
+#endif
+
+LR::KernelXC::KernelXC(const ModulePW::PW_Basis& rho_basis,
+    const UnitCell& ucell,
+    const Charge& chg_gs,
+    const Parallel_Grid& pgrid,
+    const int& nspin,
+    const std::string& kernel_name,
+    const std::vector<std::string>& lr_init_xc_kernel) :rho_basis_(rho_basis)
+{
+    if (!std::set<std::string>({ "lda", "pwlda", "pbe", "hse" }).count(kernel_name)) { return; }
+    XC_Functional::set_xc_type(kernel_name);    // for hse, (1-alpha) and omega are set here
+
+    const int& nrxx = rho_basis.nrxx;
+    if (lr_init_xc_kernel[0] == "file")
+    {
+        const std::set<std::string> lda_xc = { "lda", "pwlda" };
+        assert(lda_xc.count(kernel_name));
+        const int n_component = (1 == nspin) ? 1 : 3;   // spin components of fxc: (uu, ud=du, dd) when nspin=2
+        this->v2rho2_.resize(n_component * nrxx);
+        // read fxc adn add to xc_kernel_components
+        assert(lr_init_xc_kernel.size() >= n_component + 1);
+        for (int is = 0;is < n_component;++is)
+        {
+            double ef = 0.0;
+            int prenspin = 1;
+            std::vector<double> v2rho2_tmp(nrxx);
+            ModuleIO::read_vdata_palgrid(pgrid, GlobalV::MY_RANK, GlobalV::ofs_running, lr_init_xc_kernel[is + 1],
+                v2rho2_tmp.data(), ucell.nat);
+            for (int ir = 0;ir < nrxx;++ir) { this->v2rho2_[ir * n_component + is] = v2rho2_tmp[ir]; }
+        }
+        return;
+    }
+
+#ifdef USE_LIBXC
+    if (lr_init_xc_kernel[0] == "from_charge_file")
+    {
+        assert(lr_init_xc_kernel.size() >= 2);
+        double** rho_for_fxc;
+        LR_Util::_allocate_2order_nested_ptr(rho_for_fxc, nspin, nrxx);
+        double ef = 0.0;
+        int prenspin = 1;
+        for (int is = 0;is < nspin;++is)
+        {
+            const std::string file = lr_init_xc_kernel[lr_init_xc_kernel.size() > nspin ? 1 + is : 1];
+            ModuleIO::read_vdata_palgrid(pgrid, GlobalV::MY_RANK, GlobalV::ofs_running, file,
+                rho_for_fxc[is], ucell.nat);
+        }
+        this->f_xc_libxc(nspin, ucell.omega, ucell.tpiba, rho_for_fxc, chg_gs.rho_core);
+        LR_Util::_deallocate_2order_nested_ptr(rho_for_fxc, nspin);
+    }
+    else { this->f_xc_libxc(nspin, ucell.omega, ucell.tpiba, chg_gs.rho, chg_gs.rho_core); }
+#else 
+    ModuleBase::WARNING_QUIT("KernelXC", "to calculate xc-kernel in LR-TDDFT, compile with LIBXC");
+#endif
+}
+
+#ifdef USE_LIBXC
+void LR::KernelXC::f_xc_libxc(const int& nspin, const double& omega, const double& tpiba, const double* const* const rho_gs, const double* const rho_core)
+{
+    ModuleBase::TITLE("XC_Functional", "f_xc_libxc");
+    ModuleBase::timer::tick("XC_Functional", "f_xc_libxc");
+    // https://www.tddft.org/programs/libxc/manual/libxc-5.1.x/
+
+    assert(nspin == 1 || nspin == 2);
+
+    std::vector<xc_func_type> funcs = XC_Functional_Libxc::init_func(
+        XC_Functional::get_func_id(),
+        (1 == nspin) ? XC_UNPOLARIZED : XC_POLARIZED);
+    const int& nrxx = rho_basis_.nrxx;
+
+    // converting rho (extract it as a subfuntion in the future)
+    // -----------------------------------------------------------------------------------
+    std::vector<double> rho(nspin * nrxx);    // r major / spin contigous
+
+#ifdef _OPENMP
+#pragma omp parallel for collapse(2) schedule(static, 1024)
+#endif
+    for (int is = 0; is < nspin; ++is) { for (int ir = 0; ir < nrxx; ++ir) { rho[ir * nspin + is] = rho_gs[is][ir]; } }
+    if (rho_core)
+    {
+        const double fac = 1.0 / nspin;
+        for (int is = 0; is < nspin; ++is) { for (int ir = 0; ir < nrxx; ++ir) { rho[ir * nspin + is] += fac * rho_core[ir]; } }
+    }
+
+    // -----------------------------------------------------------------------------------
+    // for GGA
+    const bool is_gga = std::any_of(funcs.begin(), funcs.end(), [](const xc_func_type& f) { return f.info->family == XC_FAMILY_GGA || f.info->family == XC_FAMILY_HYB_GGA; });
+
+    std::vector<std::vector<ModuleBase::Vector3<double>>> gradrho;  // \nabla \rho
+    std::vector<double> sigma;  // |\nabla\rho|^2
+    std::vector<double> sgn;        // sgn for threshold mask
+    if (is_gga)
+    {
+        // 0. set up sgn for threshold mask
+        // in the case of GGA correlation for polarized case,
+        // a cutoff for grho is required to ensure that libxc gives reasonable results
+
+        // 1. \nabla \rho
+        gradrho.resize(nspin);
+        for (int is = 0; is < nspin; ++is)
+        {
+            std::vector<double> rhor(nrxx);
+#ifdef _OPENMP
+#pragma omp parallel for schedule(static, 1024)
+#endif
+            for (int ir = 0; ir < nrxx; ++ir) rhor[ir] = rho[ir * nspin + is];
+            gradrho[is].resize(nrxx);
+            LR_Util::grad(rhor.data(), gradrho[is].data(), rho_basis_, tpiba);
+        }
+        // 2. |\nabla\rho|^2
+        sigma.resize(nrxx * ((1 == nspin) ? 1 : 3));
+        if (1 == nspin)
+        {
+#ifdef _OPENMP
+#pragma omp parallel for schedule(static, 1024)
+#endif
+            for (int ir = 0; ir < nrxx; ++ir)
+                sigma[ir] = gradrho[0][ir] * gradrho[0][ir];
+        }
+        else
+        {
+#ifdef _OPENMP
+#pragma omp parallel for schedule(static, 256)
+#endif
+            for (int ir = 0; ir < nrxx; ++ir)
+            {
+                sigma[ir * 3] = gradrho[0][ir] * gradrho[0][ir];
+                sigma[ir * 3 + 1] = gradrho[0][ir] * gradrho[1][ir];
+                sigma[ir * 3 + 2] = gradrho[1][ir] * gradrho[1][ir];
+            }
+        }
+    }
+    // -----------------------------------------------------------------------------------
+    //==================== XC Kernels (f_xc)=============================
+    this->vrho_.resize(nspin * nrxx);
+    this->v2rho2_.resize(((1 == nspin) ? 1 : 3) * nrxx);//(nrxx* ((1 == nspin) ? 1 : 3)): 00, 01, 11
+    if (is_gga)
+    {
+        this->vsigma_.resize(((1 == nspin) ? 1 : 3) * nrxx);//(nrxx*): 2 for rho * 3 for sigma: 00, 01, 02, 10, 11, 12
+        this->v2rhosigma_.resize(((1 == nspin) ? 1 : 6) * nrxx); //(nrxx*): 2 for rho * 3 for sigma: 00, 01, 02, 10, 11, 12
+        this->v2sigma2_.resize(((1 == nspin) ? 1 : 6) * nrxx);   //(nrxx* ((1 == nspin) ? 1 : 6)): 00, 01, 02, 11, 12, 22
+    }
+    //MetaGGA ...
+
+    for (xc_func_type& func : funcs)
+    {
+        const double rho_threshold = 1E-6;
+        const double grho_threshold = 1E-10;
+
+        xc_func_set_dens_threshold(&func, rho_threshold);
+
+        //cut off grho if not LDA (future subfunc)
+
+        switch (func.info->family)
+        {
+        case XC_FAMILY_LDA:
+            xc_lda_vxc(&func, nrxx, rho.data(), vrho_.data());
+            xc_lda_fxc(&func, nrxx, rho.data(), v2rho2_.data());
+            break;
+        case XC_FAMILY_GGA:
+        case XC_FAMILY_HYB_GGA:
+            xc_gga_vxc(&func, nrxx, rho.data(), sigma.data(), vrho_.data(), vsigma_.data());
+            xc_gga_fxc(&func, nrxx, rho.data(), sigma.data(), v2rho2_.data(), v2rhosigma_.data(), v2sigma2_.data());
+            break;
+        default:
+            throw std::domain_error("func.info->family =" + std::to_string(func.info->family)
+                + " unfinished in " + std::string(__FILE__) + " line " + std::to_string(__LINE__));
+            break;
+        }
+        // some formulas for GGA
+        if (func.info->family == XC_FAMILY_GGA || func.info->family == XC_FAMILY_HYB_GGA)
+        {
+            const std::vector<double>& v2r2 = this->v2rho2_;
+            const std::vector<double>& v2rs = this->v2rhosigma_;
+            const std::vector<double>& v2s2 = this->v2sigma2_;
+            const std::vector<double>& vs = this->vsigma_;
+            const double tpiba2 = tpiba * tpiba;
+
+            if (1 == nspin)
+            {
+                using V3 = ModuleBase::Vector3<double>;
+                // 0. drho
+                this->drho_gs_ = gradrho[0];
+                // 1. $2f^{\rho\sigma}*\nabla\rho$
+                this->v2rhosigma_2drho_.resize(nrxx);
+                std::transform(gradrho[0].begin(), gradrho[0].end(), v2rs.begin(), this->v2rhosigma_2drho_.begin(),
+                    [](const V3& a, const V3& b) {return a * b * 2.; });
+                // 2. $4f^{\sigma\sigma}*\nabla\rho$
+                this->v2sigma2_4drho_.resize(nrxx);
+                std::transform(sigma.begin(), sigma.end(), v2s2.begin(), v2sigma2_4drho_.begin(),
+                    [](const V3& a, const V3& b) {return a * b * 4.; });
+            }
+            // else if (2 == nspin)    // wrong, to be fixed
+            // {
+            //     // 1. $\nabla\cdot(f^{\rho\sigma}*\nabla\rho)$
+            //     std::vector<double> div_v2rhosigma_gdrho_r(3 * nrxx);
+            //     std::vector<ModuleBase::Vector3<double>> v2rhosigma_gdrho_r(3 * nrxx);
+            //     for (int ir = 0; ir < nrxx; ++ir)
+            //     {
+            //         v2rhosigma_gdrho_r[ir] = gradrho[0][ir] * v2rs.at(ir * 6) * 4.0
+            //             + gradrho[1][ir] * v2rs.at(ir * 6 + 1) * 2.0;   //up-up
+            //         v2rhosigma_gdrho_r[nrxx + ir] = gradrho[0][ir] * (v2rs.at(ir * 6 + 3) * 2.0 + v2rs.at(ir * 6 + 1))
+            //             + gradrho[1][ir] * (v2rs.at(ir * 6 + 2) * 2.0 + v2rs.at(ir * 6 + 4));   //up-down
+            //         v2rhosigma_gdrho_r[2 * nrxx + ir] = gradrho[1][ir] * v2rs.at(ir * 6 + 5) * 4.0
+            //             + gradrho[0][ir] * v2rs.at(ir * 6 + 4) * 2.0;   //down-down
+            //     }
+            //     for (int isig = 0;isig < 3;++isig)
+            //         XC_Functional::grad_dot(v2rhosigma_gdrho_r.data() + isig * nrxx, div_v2rhosigma_gdrho_r.data() + isig * nrxx, chg_gs.rhopw, tpiba);
+            //     // 2. $\nabla^2(f^{\sigma\sigma}*\sigma)$
+            //     std::vector<double> v2sigma2_sigma_r(3 * nrxx);
+            //     for (int ir = 0; ir < nrxx; ++ir)
+            //     {
+            //         v2sigma2_sigma_r[ir] = v2s2.at(ir * 6) * sigma[ir * 3] * 4.0
+            //             + v2s2.at(ir * 6 + 1) * sigma[ir * 3 + 1] * 4.0
+            //             + v2s2.at(ir * 6 + 3) * sigma[ir * 3 + 2];   //up-up
+            //         v2sigma2_sigma_r[nrxx + ir] = v2s2.at(ir * 6 + 1) * sigma[ir * 3] * 2.0
+            //             + v2s2.at(ir * 6 + 4) * sigma[ir * 3 + 2] * 2.0
+            //             + (v2s2.at(ir * 6 + 2) * 4.0 + v2s2.at(ir * 6 + 3)) * sigma[ir * 3 + 1];   //up-down
+            //         v2sigma2_sigma_r[2 * nrxx + ir] = v2s2.at(ir * 6 + 5) * sigma[ir * 3 + 2] * 4.0
+            //             + v2s2.at(ir * 6 + 4) * sigma[ir * 3 + 1] * 4.0
+            //             + v2s2.at(ir * 6 + 3) * sigma[ir * 3];   //down-down
+            //     }
+            //     for (int isig = 0;isig < 3;++isig)
+            //         LR_Util::lapl(v2sigma2_sigma_r.data() + isig * nrxx, v2sigma2_sigma_r.data() + isig * nrxx, *(chg_gs.rhopw), tpiba2);
+            //     // 3. $\nabla^2(v^\sigma)$
+            //     std::vector<double> lap_vsigma(3 * nrxx);
+            //     for (int ir = 0;ir < nrxx;++ir)
+            //     {
+            //         lap_vsigma[ir] = vs.at(ir * 3) * 2.0;
+            //         lap_vsigma[nrxx + ir] = vs.at(ir * 3 + 1);
+            //         lap_vsigma[2 * nrxx + ir] = vs.at(ir * 3 + 2) * 2.0;
+            //     }
+            //     for (int isig = 0;isig < 3;++isig)
+            //         LR_Util::lapl(lap_vsigma.data() + isig * nrxx, lap_vsigma.data() + isig * nrxx, *(chg_gs.rhopw), tpiba2);
+            //     // add to v2rho2_
+            //     BlasConnector::axpy(3 * nrxx, 1.0, v2r2.data(), 1, to_mul_rho_.data(), 1);
+            //     BlasConnector::axpy(3 * nrxx, -1.0, div_v2rhosigma_gdrho_r.data(), 1, to_mul_rho_.data(), 1);
+            //     BlasConnector::axpy(3 * nrxx, 1.0, v2sigma2_sigma_r.data(), 1, to_mul_rho_.data(), 1);
+            //     BlasConnector::axpy(3 * nrxx, 1.0, lap_vsigma.data(), 1, to_mul_rho_.data(), 1);
+            // }
+            else
+            {
+                throw std::domain_error("nspin =" + std::to_string(nspin)
+                    + " unfinished in " + std::string(__FILE__) + " line " + std::to_string(__LINE__));
+            }
+        }
+    } // end for( xc_func_type &func : funcs )
+    XC_Functional_Libxc::finish_func(funcs);
+
+    if (1 == PARAM.inp.nspin || 2 == PARAM.inp.nspin) return;
+    // else if (4 == PARAM.inp.nspin)
+    else//NSPIN != 1,2,4 is not supported
+    {
+        throw std::domain_error("PARAM.inp.nspin =" + std::to_string(PARAM.inp.nspin)
+            + " unfinished in " + std::string(__FILE__) + " line " + std::to_string(__LINE__));
+    }
+}
+#endif
\ No newline at end of file
diff --git a/source/module_lr/potentials/xc_kernel.h b/source/module_lr/potentials/xc_kernel.h
new file mode 100644
index 0000000000..0db7f3d616
--- /dev/null
+++ b/source/module_lr/potentials/xc_kernel.h
@@ -0,0 +1,47 @@
+#pragma once
+#include "module_basis/module_pw/pw_basis.h"
+#include "module_cell/unitcell.h"
+#include "module_hamilt_pw/hamilt_pwdft/parallel_grid.h"
+#include "module_elecstate/module_charge/charge.h"
+#define CREF(x) const std::vector<double>& x = x##_;
+#define CREF3(x) const std::vector<ModuleBase::Vector3<double>>& x = x##_;
+namespace LR
+{
+    /// @brief Calculate the exchange-correlation (XC) kernel ($f_{xc}=\delta^2E_xc/\delta\rho^2$) and store its components.
+    class KernelXC
+    {
+    public:
+        KernelXC(const ModulePW::PW_Basis& rho_basis,
+            const UnitCell& ucell,
+            const Charge& chg_gs,
+            const Parallel_Grid& pgrid,
+            const int& nspin,
+            const std::string& kernel_name,
+            const std::vector<std::string>& lr_init_xc_kernel);
+        ~KernelXC() {};
+
+        // const references
+        CREF(vrho);CREF(vsigma); CREF(v2rho2); CREF(v2rhosigma); CREF(v2sigma2);
+        CREF3(drho_gs); CREF3(v2rhosigma_2drho); CREF3(v2sigma2_4drho);
+
+    private:
+#ifdef USE_LIBXC
+        /// @brief Calculate the XC kernel using libxc.
+        void f_xc_libxc(const int& nspin, const double& omega, const double& tpiba, const double* const* const rho_gs, const double* const rho_core = nullptr);
+#endif
+        // See https://libxc.gitlab.io/manual/libxc-5.1.x/ for the naming convention of the following members.
+        // std::map<std::string, std::vector<double>> kernel_set_; // [kernel_type][nrxx][nspin]
+        std::vector<double> vrho_;
+        std::vector<double> vsigma_;
+        std::vector<double> v2rho2_;
+        std::vector<double> v2rhosigma_;
+        std::vector<double> v2sigma2_;
+        // std::map<std::string, std::vector<ModuleBase::Vector3<double>>> grad_kernel_set_;// [kernel_type][nrxx][nspin],  intermediate terms for GGA
+        std::vector<ModuleBase::Vector3<double>> drho_gs_;
+        std::vector<ModuleBase::Vector3<double>> v2rhosigma_2drho_;  ///< $f^{\rho\sigma}*\nabla\rho *2$
+        std::vector<ModuleBase::Vector3<double>> v2sigma2_4drho_; ///< $f^{\sigma\sigma}*\nabla\rho *4$
+
+        const ModulePW::PW_Basis& rho_basis_;
+    };
+}
+
diff --git a/source/module_lr/utils/gint_move.hpp b/source/module_lr/utils/gint_move.hpp
index 7d21ca6037..8b31a09450 100644
--- a/source/module_lr/utils/gint_move.hpp
+++ b/source/module_lr/utils/gint_move.hpp
@@ -12,11 +12,11 @@ using D2 = void(*) (T**, size_t);
 // template <typename T>
 // using D3 = void(*) (T***, size_t, size_t);
 // template <typename T>
-// D2<T> d2 = LR_Util::delete_p2<T>;
+// D2<T> d2 = LR_Util::_deallocate_2order_nested_ptr<T>;
 // template <typename T>
 // D3<T> d3 = LR_Util::delete_p3<T>;
 // Change to C++ 11
-D2<double> d2 = LR_Util::delete_p2<double>;
+D2<double> d2 = LR_Util::_deallocate_2order_nested_ptr<double>;
 // D3<double> d3 = LR_Util::delete_p3<double>;
 
 
diff --git a/source/module_lr/utils/lr_util.cpp b/source/module_lr/utils/lr_util.cpp
index eb5ec2bc43..1418764126 100644
--- a/source/module_lr/utils/lr_util.cpp
+++ b/source/module_lr/utils/lr_util.cpp
@@ -62,7 +62,7 @@ namespace LR_Util
     template<>
     void matsym<double>(const double* in, const int n, const Parallel_2D& pmat, double* out)
     {
-        for (int i = 0;i < pmat.get_local_size();++i) { out[i] = in[i]; }
+        std::copy(in, in + pmat.get_local_size(), out);
         const double alpha = 0.5, beta = 0.5;
         const int i1 = 1;
         pdtran_(&n, &n, &alpha, in, &i1, &i1, pmat.desc, &beta, out, &i1, &i1, pmat.desc);
@@ -79,7 +79,7 @@ namespace LR_Util
     template<>
     void matsym<std::complex<double>>(const std::complex<double>* in, const int n, const Parallel_2D& pmat, std::complex<double>* out)
     {
-        for (int i = 0;i < pmat.get_local_size();++i) { out[i] = in[i]; }
+        std::copy(in, in + pmat.get_local_size(), out);
         const std::complex<double> alpha(0.5, 0.0), beta(0.5, 0.0);
         const int i1 = 1;
         pztranc_(&n, &n, &alpha, in, &i1, &i1, pmat.desc, &beta, out, &i1, &i1, pmat.desc);
@@ -94,93 +94,6 @@ namespace LR_Util
         pztranc_(&n, &n, &alpha, tmp.data(), &i1, &i1, pmat.desc, &beta, inout, &i1, &i1, pmat.desc);
     }
 #endif
-    container::Tensor mat2ten_double(ModuleBase::matrix& m)
-    {
-        container::Tensor t(DAT::DT_DOUBLE, DEV::CpuDevice, { m.nr, m.nc });
-        for (int i = 0;i < t.NumElements();++i) {t.data<double>()[i] = m.c[i];
-}
-        return t;
-    }
-    std::vector<container::Tensor> mat2ten_double(std::vector<ModuleBase::matrix>& m)
-    {
-        std::vector<container::Tensor> t;
-        for (int i = 0;i < m.size();++i) { t.push_back(mat2ten_double(m[i]));
-}
-        return t;
-    }
-    ModuleBase::matrix ten2mat_double(container::Tensor& t)
-    {
-        ModuleBase::matrix m(t.shape().dims()[0], t.shape().dims()[1]);
-        for (int i = 0;i < t.NumElements();++i) {m.c[i] = t.data<double>()[i];
-}
-        return m;
-    }
-    std::vector<ModuleBase::matrix> ten2mat_double(std::vector<container::Tensor>& t)
-    {
-        std::vector<ModuleBase::matrix> m;
-        for (int i = 0;i < t.size();++i) { m.push_back(ten2mat_double(t[i]));
-}
-        return m;
-    }
-    container::Tensor mat2ten_complex(ModuleBase::ComplexMatrix& m)
-    {
-        container::Tensor t(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, { m.nr, m.nc });
-        for (int i = 0;i < t.NumElements();++i) {t.data<std::complex<double>>()[i] = m.c[i];
-}
-        return t;
-    }
-    std::vector<container::Tensor> mat2ten_complex(std::vector<ModuleBase::ComplexMatrix>& m)
-    {
-        std::vector<container::Tensor> t;
-        for (int i = 0;i < m.size();++i) { t.push_back(mat2ten_complex(m[i]));
-}
-        return t;
-    }
-    ModuleBase::ComplexMatrix ten2mat_complex(container::Tensor& t)
-    {
-        ModuleBase::ComplexMatrix m(t.shape().dims()[0], t.shape().dims()[1]);
-        for (int i = 0;i < t.NumElements();++i) {m.c[i] = t.data<std::complex<double>>()[i];
-}
-        return m;
-    }
-    std::vector<ModuleBase::ComplexMatrix> ten2mat_complex(std::vector<container::Tensor>& t)
-    {
-        std::vector<ModuleBase::ComplexMatrix> m;
-        for (int i = 0;i < t.size();++i) { m.push_back(ten2mat_complex(t[i]));
-}
-        return m;
-    }
-
-    ModuleBase::matrix vec2mat(const std::vector<double>& v, const int nr, const int nc)
-    {
-        assert(v.size() == nr * nc);
-        ModuleBase::matrix m(nr, nc, false);
-        for (int i = 0;i < v.size();++i) { m.c[i] = v[i];
-}
-        return m;
-    }
-    ModuleBase::ComplexMatrix vec2mat(const std::vector<std::complex<double>>& v, const int nr, const int nc)
-    {
-        assert(v.size() == nr * nc);
-        ModuleBase::ComplexMatrix m(nr, nc, false);
-        for (int i = 0;i < v.size();++i) { m.c[i] = v[i];
-}
-        return m;
-    }
-    std::vector<ModuleBase::matrix> vec2mat(const std::vector<std::vector<double>>& v, const int nr, const int nc)
-    {
-        std::vector<ModuleBase::matrix> m(v.size());
-        for (int i = 0;i < v.size();++i) { m[i] = vec2mat(v[i], nr, nc);
-}
-        return m;
-    }
-    std::vector<ModuleBase::ComplexMatrix> vec2mat(const std::vector<std::vector<std::complex<double>>>& v, const int nr, const int nc)
-    {
-        std::vector<ModuleBase::ComplexMatrix> m(v.size());
-        for (int i = 0;i < v.size();++i) { m[i] = vec2mat(v[i], nr, nc);
-}
-        return m;
-    }
 
     // for the first matrix in the commutator
     void setup_2d_division(Parallel_2D& pv, int nb, int gr, int gc)
@@ -266,34 +179,4 @@ namespace LR_Util
             vl.data(), &ldvl, vr.data(), &ldvr, work2.data(), &lwork, rwork.data(), &info);
         if (info) { std::cout << "ERROR: Lapack solver zgeev, info=" << info << std::endl; }
     }
-#ifdef USE_LIBXC
-    void grad(const double* rhor,
-        ModuleBase::Vector3<double>* gdr,
-        const ModulePW::PW_Basis& rho_basis,
-        const double& tpiba)
-    {
-        std::vector<std::complex<double>> rhog(rho_basis.npw);
-        rho_basis.real2recip(rhor, rhog.data());
-        XC_Functional::grad_rho(rhog.data(), gdr, &rho_basis, tpiba);
-    }
-    void laplace(const double* rhor, double* lapn,
-        const ModulePW::PW_Basis& rho_basis,
-        const double& tpiba2)
-    {
-        ModuleBase::GlobalFunc::ZEROS(lapn, rho_basis.nrxx);
-        std::vector<std::complex<double>> rhog(rho_basis.npw);
-        std::vector<double> tmp_rhor(rho_basis.nrxx);
-        rho_basis.real2recip(rhor, rhog.data());
-        for (int i = 0;i < 3;++i)
-        {
-            for (int ig = 0; ig < rho_basis.npw; ig++) {
-                rhog[ig] *= pow(rho_basis.gcar[ig][i], 2);
-}
-            rho_basis.recip2real(rhog.data(), tmp_rhor.data());
-            for (int ir = 0; ir < rho_basis.nrxx; ir++) {
-                lapn[ir] -= tmp_rhor[ir] * tpiba2;
-}
-        }
-    }
-#endif
-}
+}
\ No newline at end of file
diff --git a/source/module_lr/utils/lr_util.h b/source/module_lr/utils/lr_util.h
index 93f26be2a6..7eed4f3062 100644
--- a/source/module_lr/utils/lr_util.h
+++ b/source/module_lr/utils/lr_util.h
@@ -12,6 +12,15 @@
 using DAT = container::DataType;
 using DEV = container::DeviceType;
 
+#ifndef TO_COMPLEX_H
+#define TO_COMPLEX_H
+template <typename T> struct ToComplex;
+template <> struct ToComplex<double> { using type = std::complex<double>; };
+template <> struct ToComplex<std::complex<double>> { using type = std::complex<double>; };
+template <> struct ToComplex<float> { using type = std::complex<float>; };
+template <> struct ToComplex<std::complex<float>> { using type = std::complex<float>; };
+#endif
+
 namespace LR_Util
 {
     /// =====================PHYSICS====================
@@ -36,26 +45,16 @@ namespace LR_Util
     std::pair<ModuleBase::matrix, std::vector<std::pair<int, int>>>
         set_ix_map_diagonal(bool mode, int nc, int nv);
 
-#ifdef  USE_LIBXC
-    /// operators to calculate XC kernels
-    void grad(const double* rhor,
-        ModuleBase::Vector3<double>* gdr,
-        const ModulePW::PW_Basis& rho_basis,
-        const double& tpiba);
-    void laplace(const double* rhor,
-        double* lapn,
-        const ModulePW::PW_Basis& rho_basis,
-        const double& tpiba2);
-#endif
+    // Operators to calculate xc kernel have been moved into lr_util_xc.hpp.
     /// =================ALGORITHM====================
 
     //====== newers and deleters========
     /// @brief  delete 2d pointer  
     template <typename T>
-    void delete_p2(T** p2, size_t size);
+    void _deallocate_2order_nested_ptr(T** p2, size_t size);
     /// @brief  new 2d pointer  
     template <typename T>
-    void new_p2(T**& p2, size_t size1, size_t size2);
+    void _allocate_2order_nested_ptr(T**& p2, size_t size1, size_t size2);
 
     template<typename T> ct::Tensor newTensor(const ct::TensorShape& shape)
     {
@@ -75,20 +74,6 @@ namespace LR_Util
     template<typename T>
     void matsym(T* inout, const int n, const Parallel_2D& pmat);
 #endif
-    ///======== Tensor - Matrix transformer==========
-    container::Tensor mat2ten_double(ModuleBase::matrix& m);
-    std::vector<container::Tensor> mat2ten_double(std::vector<ModuleBase::matrix>& m);
-    ModuleBase::matrix ten2mat_double(container::Tensor& t);
-    std::vector<ModuleBase::matrix> ten2mat_double(std::vector<container::Tensor>& t);
-    container::Tensor mat2ten_complex(ModuleBase::ComplexMatrix& m);
-    std::vector<container::Tensor> mat2ten_complex(std::vector<ModuleBase::ComplexMatrix>& m);
-    ModuleBase::ComplexMatrix ten2mat_complex(container::Tensor& t);
-    std::vector<ModuleBase::ComplexMatrix> ten2mat_complex(std::vector<container::Tensor>& t);
-
-    ModuleBase::matrix vec2mat(const std::vector<double>& v, const int nr, const int nc);
-    ModuleBase::ComplexMatrix vec2mat(const std::vector<std::complex<double>>& v, const int nr, const int nc);
-    std::vector<ModuleBase::matrix> vec2mat(const std::vector<std::vector<double>>& v, const int nr, const int nc);
-    std::vector<ModuleBase::ComplexMatrix> vec2mat(const std::vector<std::vector<std::complex<double>>>& v, const int nr, const int nc);
 
     ///===================Psi wrapper=================
     /// psi(nk=1, nbands=nb, nk * nbasis) -> psi(nb, nk, nbasis) without memory copy
diff --git a/source/module_lr/utils/lr_util.hpp b/source/module_lr/utils/lr_util.hpp
index 97a5113eb7..9fda5ccb8d 100644
--- a/source/module_lr/utils/lr_util.hpp
+++ b/source/module_lr/utils/lr_util.hpp
@@ -32,7 +32,7 @@ namespace LR_Util
     /// @param size1
     /// @param size2
     template <typename T>
-    void new_p2(T**& p2, size_t size1, size_t size2)
+    void _allocate_2order_nested_ptr(T**& p2, size_t size1, size_t size2)
     {
         p2 = new T * [size1];
         for (size_t i = 0; i < size1; ++i)
@@ -46,7 +46,7 @@ namespace LR_Util
     /// @param p2 
     /// @param size 
     template <typename T>
-    void delete_p2(T** p2, size_t size)
+    void _deallocate_2order_nested_ptr(T** p2, size_t size)
     {
         if (p2 != nullptr)
         {
diff --git a/source/module_lr/utils/lr_util_xc.hpp b/source/module_lr/utils/lr_util_xc.hpp
new file mode 100644
index 0000000000..a270482ba7
--- /dev/null
+++ b/source/module_lr/utils/lr_util_xc.hpp
@@ -0,0 +1,48 @@
+#pragma once
+#include "lr_util.h"
+namespace LR_Util
+{
+    template<typename T>
+    void grad(const T* rhor,
+        ModuleBase::Vector3<T>* gradrho,
+        const ModulePW::PW_Basis& rho_basis,
+        const double& tpiba)
+    {
+        std::vector<typename ToComplex<T>::type> rhog(rho_basis.npw);
+        rho_basis.real2recip(rhor, rhog.data());
+        XC_Functional::grad_rho(rhog.data(), gradrho, &rho_basis, tpiba);
+    }
+    template<typename T>
+    void grad(const std::vector<T>& rhor,
+        std::vector<ModuleBase::Vector3<T>>& gradrho,
+        const ModulePW::PW_Basis& rho_basis,
+        const double& tpiba)
+    {
+        grad(rhor.data(), gradrho.data(), rho_basis, tpiba);
+    }
+
+    template<typename T>
+    void lapl(const T* rhor, T* lapn,
+        const ModulePW::PW_Basis& rho_basis,
+        const double& tpiba2)
+    {
+        ModuleBase::GlobalFunc::ZEROS(lapn, rho_basis.nrxx);
+        std::vector<typename ToComplex<T>::type> rhog(rho_basis.npw);
+        std::vector<T> tmp_rhor(rho_basis.nrxx);
+        rho_basis.real2recip(rhor, rhog.data());
+        for (int i = 0;i < 3;++i)
+        {
+            for (int ig = 0; ig < rho_basis.npw; ig++) { rhog[ig] *= pow(rho_basis.gcar[ig][i], 2); }
+            rho_basis.recip2real(rhog.data(), tmp_rhor.data());
+            for (int ir = 0; ir < rho_basis.nrxx; ir++) { lapn[ir] -= tmp_rhor[ir] * tpiba2; }
+        }
+    }
+    template<typename T>
+    void lapl(const std::vector<T>& rhor,
+        std::vector<T>& lapn,
+        const ModulePW::PW_Basis& rho_basis,
+        const double& tpiba2)
+    {
+        lapl(rhor.data(), lapn.data(), rho_basis, tpiba2);
+    }
+}
diff --git a/source/module_lr/utils/test/CMakeLists.txt b/source/module_lr/utils/test/CMakeLists.txt
index 53d1db607e..fde01fa69b 100644
--- a/source/module_lr/utils/test/CMakeLists.txt
+++ b/source/module_lr/utils/test/CMakeLists.txt
@@ -1,7 +1,7 @@
 remove_definitions(-DUSE_LIBXC)
 AddTest(
   TARGET lr_util_phys_test
-  LIBS parameter base ${math_libs} device container
+  LIBS parameter base ${math_libs} device container planewave #for FFT
   SOURCES lr_util_physics_test.cpp  ../lr_util.cpp
   ../../../module_basis/module_ao/parallel_2d.cpp
   ../../../module_io/orb_io.cpp
@@ -9,7 +9,7 @@ AddTest(
 
 AddTest(
   TARGET lr_util_algo_test
-  LIBS parameter base ${math_libs} device psi container
+  LIBS parameter base ${math_libs} device psi container planewave #for FFT
   SOURCES lr_util_algorithms_test.cpp ../lr_util.cpp 
   ../../../module_basis/module_ao/parallel_2d.cpp
 )
\ No newline at end of file
diff --git a/source/module_parameter/input_parameter.h b/source/module_parameter/input_parameter.h
index bfd9ae91cb..ac4d74fca7 100644
--- a/source/module_parameter/input_parameter.h
+++ b/source/module_parameter/input_parameter.h
@@ -299,6 +299,7 @@ struct Input_para
 
     // ==============   #Parameters (10.lr-tddft) ===========================
     int lr_nstates = 1; ///< the number of 2-particle states to be solved
+    std::vector<std::string> lr_init_xc_kernel = {};    ///< The method to initalize the xc kernel
     int nocc = -1;      ///< the number of occupied orbitals to form the 2-particle basis
     int nvirt = 1;      ///< the number of virtual orbitals to form the 2-particle basis (nocc + nvirt <= nbands)
     std::string xc_kernel = "LDA"; ///< exchange correlation (XC) kernel for LR-TDDFT