A-006
diff --git a/‎docs/advanced/input_files/input-main.md‎
Lines changed: 15 additions & 3 deletions b/‎docs/advanced/input_files/input-main.md‎
Lines changed: 15 additions & 3 deletions
diff --git a/‎source/module_esolver/esolver_ks_pw.cpp‎
Lines changed: 4 additions & 5 deletions b/‎source/module_esolver/esolver_ks_pw.cpp‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎source/module_io/cal_ldos.cpp‎
Lines changed: 267 additions & 49 deletions b/‎source/module_io/cal_ldos.cpp‎
Lines changed: 267 additions & 49 deletions
@@ -177,6 +177,7 @@
     - [dos\_emax\_ev](#dos_emax_ev)
     - [dos\_nche](#dos_nche)
     - [stm\_bias](#stm_bias)
+    - [ldos\_line](#ldos_line)
   - [NAOs](#naos)
     - [bessel\_nao\_ecut](#bessel_nao_ecut)
     - [bessel\_nao\_tolerence](#bessel_nao_tolerence)
@@ -1705,9 +1706,13 @@ These variables are used to control the output of properties.
 
 ### out_ldos
 
-- **Type**: Boolean
-- **Description**: Whether to output the local density of states for given bias in cube file format, which is controlled by [stm_bias](#stm_bias). 
-- **Default**: False
+- **Type**: Integer
+- **Description**: Whether to output the local density of states (LDOS), optionally output precision can be set by a second parameter, default is 3.
+  - 0: no output
+  - 1: output the partial charge density for given bias (controlled by [stm_bias](#stm_bias)) in cube file format, which can be used to plot scanning tunneling spectroscopys to mimick STM images using the Python script [plot.py](../../../tools/stm/plot.py).
+  - 2: output LDOS along a line in real space (controlled by [ldos_line](#ldos_line)). Parameters used to control DOS output are also valid for LDOS.
+  - 3: output both two LDOS modes above.
+- **Default**: 0
 
 ### out_band
 
@@ -1986,6 +1991,13 @@ These variables are used to control the calculation of DOS. [Detailed introducti
 - **Default**: 1.0
 - **Unit**: V
 
+### ldos_line
+
+- **Type**: Real*6 Integer(optional)
+- **Description**: Specify the path of the three-dimensional space and display LDOS in the form of a two-dimensional color chart, see details in [out_ldos](#out_ldos). The first three paramenters are the direct coordinates of the start point, the next three paramenters are the direct coordinates of the end point, and the final one is the number of points along the path, whose default is 100.
+- **Default**: 0.0 0.0 0.0 0.0 0.0 1.0 100
+
+
 [back to top](#full-list-of-input-keywords)
 
 ## NAOs
 
@@ -947,11 +947,10 @@ void ESolver_KS_PW<T, Device>::after_all_runners(UnitCell& ucell)
     //----------------------------------------------------------
     if (PARAM.inp.out_ldos[0])
     {
-        ModuleIO::Cal_ldos<std::complex<double>>::cal_ldos_pw(
-            reinterpret_cast<elecstate::ElecStatePW<std::complex<double>>*>(this->pelec),
-            this->psi[0],
-            this->Pgrid,
-            ucell);
+        ModuleIO::cal_ldos_pw(reinterpret_cast<elecstate::ElecStatePW<std::complex<double>>*>(this->pelec),
+                              this->psi[0],
+                              this->Pgrid,
+                              ucell);
     }
 
     //----------------------------------------------------------
 
@@ -1,5 +1,6 @@
 #include "cal_ldos.h"
 
+#include "cal_dos.h"
 #include "cube_io.h"
 #include "module_elecstate/module_dm/cal_dm_psi.h"
 #include "module_hamilt_lcao/module_gint/temp_gint/gint_interface.h"
@@ -8,54 +9,6 @@
 
 namespace ModuleIO
 {
-template <typename T>
-void Cal_ldos<T>::cal_ldos_pw(const elecstate::ElecStatePW<std::complex<double>>* pelec,
-                              const psi::Psi<std::complex<double>>& psi,
-                              const Parallel_Grid& pgrid,
-                              const UnitCell& ucell)
-{
-    for (int ie = 0; ie < PARAM.inp.stm_bias[2]; ie++)
-    {
-        // energy range for ldos (efermi as reference)
-        const double en = PARAM.inp.stm_bias[0] + ie * PARAM.inp.stm_bias[1];
-        const double emin = en < 0 ? en : 0;
-        const double emax = en > 0 ? en : 0;
-
-        std::vector<double> ldos(pelec->charge->nrxx);
-        std::vector<std::complex<double>> wfcr(pelec->basis->nrxx);
-
-        for (int ik = 0; ik < pelec->klist->get_nks(); ++ik)
-        {
-            psi.fix_k(ik);
-            const double efermi = pelec->eferm.get_efval(pelec->klist->isk[ik]);
-            int nbands = psi.get_nbands();
-
-            for (int ib = 0; ib < nbands; ib++)
-            {
-                pelec->basis->recip2real(&psi(ib, 0), wfcr.data(), ik);
-
-                const double eigenval = (pelec->ekb(ik, ib) - efermi) * ModuleBase::Ry_to_eV;
-                double weight = en > 0 ? pelec->klist->wk[ik] - pelec->wg(ik, ib) : pelec->wg(ik, ib);
-                weight /= ucell.omega;
-
-                if (eigenval >= emin && eigenval <= emax)
-                {
-                    for (int ir = 0; ir < pelec->basis->nrxx; ir++)
-                    {
-                        ldos[ir] += weight * norm(wfcr[ir]);
-                    }
-                }
-            }
-        }
-
-        std::stringstream fn;
-        fn << PARAM.globalv.global_out_dir << "LDOS_" << en << "eV"
-           << ".cube";
-
-        const int precision = PARAM.inp.out_ldos[1];
-        ModuleIO::write_vdata_palgrid(pgrid, ldos.data(), 0, PARAM.inp.nspin, 0, fn.str(), 0, &ucell, precision, 0);
-    }
-}
 
 #ifdef __LCAO
 template <typename T>
@@ -147,4 +100,269 @@ void Cal_ldos<T>::cal_ldos_lcao(const elecstate::ElecStateLCAO<T>* pelec,
 
 template class Cal_ldos<double>;               // Gamma_only case
 template class Cal_ldos<std::complex<double>>; // multi-k case
-} // namespace ModuleIO 
+
+// pw case
+void cal_ldos_pw(const elecstate::ElecStatePW<std::complex<double>>* pelec,
+                 const psi::Psi<std::complex<double>>& psi,
+                 const Parallel_Grid& pgrid,
+                 const UnitCell& ucell)
+{
+    if (PARAM.inp.out_ldos[0] == 1 || PARAM.inp.out_ldos[0] == 3)
+    {
+        ModuleIO::stm_mode_pw(pelec, psi, pgrid, ucell);
+    }
+    if (PARAM.inp.out_ldos[0] == 2 || PARAM.inp.out_ldos[0] == 3)
+    {
+        ModuleIO::ldos_mode_pw(pelec, psi, pgrid, ucell);
+    }
+}
+
+void stm_mode_pw(const elecstate::ElecStatePW<std::complex<double>>* pelec,
+                 const psi::Psi<std::complex<double>>& psi,
+                 const Parallel_Grid& pgrid,
+                 const UnitCell& ucell)
+{
+    for (int ie = 0; ie < PARAM.inp.stm_bias[2]; ie++)
+    {
+        // energy range for ldos (efermi as reference)
+        const double en = PARAM.inp.stm_bias[0] + ie * PARAM.inp.stm_bias[1];
+        const double emin = en < 0 ? en : 0;
+        const double emax = en > 0 ? en : 0;
+
+        std::vector<double> ldos(pelec->charge->nrxx);
+        std::vector<std::complex<double>> wfcr(pelec->basis->nrxx);
+
+        for (int ik = 0; ik < pelec->klist->get_nks(); ++ik)
+        {
+            psi.fix_k(ik);
+            const double efermi = pelec->eferm.get_efval(pelec->klist->isk[ik]);
+            const int nbands = psi.get_nbands();
+
+            for (int ib = 0; ib < nbands; ib++)
+            {
+                pelec->basis->recip2real(&psi(ib, 0), wfcr.data(), ik);
+
+                const double eigenval = (pelec->ekb(ik, ib) - efermi) * ModuleBase::Ry_to_eV;
+                double weight = en > 0 ? pelec->klist->wk[ik] - pelec->wg(ik, ib) : pelec->wg(ik, ib);
+                weight /= ucell.omega;
+
+                if (eigenval >= emin && eigenval <= emax)
+                {
+                    for (int ir = 0; ir < pelec->basis->nrxx; ir++)
+                    {
+                        ldos[ir] += weight * norm(wfcr[ir]);
+                    }
+                }
+            }
+        }
+
+        std::stringstream fn;
+        fn << PARAM.globalv.global_out_dir << "LDOS_" << en << "eV"
+           << ".cube";
+
+        const int precision = PARAM.inp.out_ldos[1];
+        ModuleIO::write_vdata_palgrid(pgrid, ldos.data(), 0, PARAM.inp.nspin, 0, fn.str(), 0, &ucell, precision, 0);
+    }
+}
+
+void ldos_mode_pw(const elecstate::ElecStatePW<std::complex<double>>* pelec,
+                  const psi::Psi<std::complex<double>>& psi,
+                  const Parallel_Grid& pgrid,
+                  const UnitCell& ucell)
+{
+    double emax = 0.0;
+    double emin = 0.0;
+
+    prepare_dos(GlobalV::ofs_running,
+                pelec->eferm,
+                pelec->ekb,
+                pelec->klist->get_nks(),
+                PARAM.inp.nbands,
+                PARAM.inp.dos_edelta_ev,
+                PARAM.inp.dos_scale,
+                emax,
+                emin);
+
+    const int ndata = static_cast<int>((emax - emin) / PARAM.inp.dos_edelta_ev) + 1;
+    const double sigma = sqrt(2.0) * PARAM.inp.dos_sigma;
+    const double sigma2 = sigma * sigma;
+    const double sigma_PI = sqrt(ModuleBase::PI) * sigma;
+
+    std::vector<double> start = {PARAM.inp.ldos_line[0], PARAM.inp.ldos_line[1], PARAM.inp.ldos_line[2]};
+    std::vector<double> end = {PARAM.inp.ldos_line[3], PARAM.inp.ldos_line[4], PARAM.inp.ldos_line[5]};
+    const int npoints = PARAM.inp.ldos_line[6];
+
+    // calculate grid points
+    std::vector<std::vector<int>> points(npoints, std::vector<int>(3, 0));
+    std::vector<std::vector<double>> shifts(npoints, std::vector<double>(3, 0));
+    get_grid_points(start, end, npoints, pgrid.nx, pgrid.ny, pgrid.nz, points, shifts);
+
+    std::vector<std::vector<double>> ldos(npoints, std::vector<double>(ndata, 0));
+
+    // calculate ldos
+    std::vector<double> tmp(pelec->charge->nrxx);
+    std::vector<std::complex<double>> wfcr(pelec->basis->nrxx);
+    for (int ik = 0; ik < pelec->klist->get_nks(); ++ik)
+    {
+        psi.fix_k(ik);
+        const double efermi = pelec->eferm.get_efval(pelec->klist->isk[ik]);
+        const int nbands = psi.get_nbands();
+
+        for (int ib = 0; ib < nbands; ib++)
+        {
+            pelec->basis->recip2real(&psi(ib, 0), wfcr.data(), ik);
+            const double weight = pelec->klist->wk[ik] / ucell.omega;
+
+            for (int ir = 0; ir < pelec->basis->nrxx; ir++)
+            {
+                tmp[ir] += weight * norm(wfcr[ir]);
+            }
+
+            std::vector<double> results(npoints, 0);
+            trilinear_interpolate(points, shifts, pgrid, tmp, results);
+
+            const double eigenval = pelec->ekb(ik, ib) * ModuleBase::Ry_to_eV;
+
+            for (int ie = 0; ie < ndata; ++ie)
+            {
+                const double en = emin + ie * PARAM.inp.dos_edelta_ev;
+                const double de = en - eigenval;
+                const double de2 = de * de;
+                const double gauss = exp(-de2 / sigma2) / sigma_PI;
+                for (int ip = 0; ip < npoints; ++ip)
+                {
+                    ldos[ip][ie] += results[ip] * gauss;
+                }
+            }
+        }
+    }
+
+    std::ofstream ofs_ldos;
+    std::stringstream fn;
+    fn << PARAM.globalv.global_out_dir << "LDOS.txt";
+    if (GlobalV::MY_RANK == 0)
+    {
+        ofs_ldos.open(fn.str().c_str());
+
+        for (int ip = 0; ip < npoints; ++ip)
+        {
+            for (int ie = 0; ie < ndata; ++ie)
+            {
+                ofs_ldos << ldos[ip][ie] << "  ";
+            }
+            ofs_ldos << std::endl;
+        }
+        ofs_ldos.close();
+    }
+}
+
+void get_grid_points(const std::vector<double>& start,
+                     const std::vector<double>& end,
+                     const int& npoints,
+                     const int& nx,
+                     const int& ny,
+                     const int& nz,
+                     std::vector<std::vector<int>>& points,
+                     std::vector<std::vector<double>>& shifts)
+{
+    std::vector<int> ndim = {nx, ny, nz};
+    auto grid_points = [](const std::vector<double>& coor,
+                          const std::vector<int>& ndim,
+                          std::vector<int>& points,
+                          std::vector<double>& shift) {
+        for (int i = 0; i < 3; i++)
+        {
+            shift[i] = coor[i] * ndim[i];
+            while (shift[i] >= ndim[i])
+            {
+                shift[i] -= ndim[i];
+            }
+            while (shift[i] < 0)
+            {
+                shift[i] += ndim[i];
+            }
+            points[i] = static_cast<int>(shift[i]);
+            shift[i] -= points[i];
+        }
+    };
+
+    if (npoints == 1)
+    {
+        grid_points(start, ndim, points[0], shifts[0]);
+    }
+    else
+    {
+        std::vector<double> delta = {end[0] - start[0], end[1] - start[1], end[2] - start[2]};
+        for (int i = 0; i < npoints; i++)
+        {
+            const double ratio = static_cast<double>(i) / (npoints - 1);
+            std::vector<double> current = {0, 0, 0};
+            for (int j = 0; j < 3; j++)
+            {
+                current[j] = start[j] + ratio * delta[j];
+            }
+            grid_points(current, ndim, points[i], shifts[i]);
+        }
+    }
+}
+
+void trilinear_interpolate(const std::vector<std::vector<int>>& points,
+                           const std::vector<std::vector<double>>& shifts,
+                           const Parallel_Grid& pgrid,
+                           const std::vector<double>& data,
+                           std::vector<double>& results)
+{
+    const int nx = pgrid.nx;
+    const int ny = pgrid.ny;
+    const int nz = pgrid.nz;
+    const int nyz = ny * nz;
+    const int nxyz = nx * ny * nz;
+
+    // reduce
+    std::vector<double> data_full(nxyz);
+#ifdef __MPI
+    if (GlobalV::MY_POOL == 0 && GlobalV::MY_BNDGROUP == 0)
+    {
+        pgrid.reduce(data_full.data(), data.data());
+    }
+    MPI_Barrier(MPI_COMM_WORLD);
+#else
+    std::memcpy(data_full.data(), data.data(), nxyz * sizeof(double));
+#endif
+
+    auto grid_points = [&data_full, &nyz, &nz](const int& ix, const int& iy, const int& iz) {
+        return data_full[ix * nyz + iy * nz + iz];
+    };
+
+    // trilinear interpolation
+    const int npoints = points.size();
+    results.resize(npoints, 0.0);
+    if (GlobalV::MY_RANK == 0)
+    {
+        for (int l = 0; l < npoints; ++l)
+        {
+            for (int i = 0; i < 2; ++i)
+            {
+                double weight = (i * shifts[l][0] + (1 - i) * (1 - shifts[l][0]));
+                for (int j = 0; j < 2; ++j)
+                {
+                    weight *= (j * shifts[l][1] + (1 - j) * (1 - shifts[l][1]));
+                    for (int k = 0; k < 2; ++k)
+                    {
+                        weight *= (k * shifts[l][2] + (1 - k) * (1 - shifts[l][2]));
+
+                        const int ix = points[l][0] + i;
+                        const int iy = points[l][1] + j;
+                        const int iz = points[l][2] + k;
+                        results[l] += weight * grid_points(ix, iy, iz);
+                    }
+                }
+            }
+        }
+    }
+#ifdef __MPI
+    MPI_Bcast(results.data(), npoints, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+#endif
+}
+
+} // namespace ModuleIO