Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/advanced/input_files/input-main.md
Original file line number Diff line number Diff line change
Expand Up @@ -4231,6 +4231,7 @@ Currently supported: `RPA`, `LDA`, `PBE`, `HSE`, `HF`.
- **Description**: The method to solve the Casida equation $AX=\Omega X$ in LR-TDDFT under Tamm-Dancoff approximation (TDA), where $A_{ai,bj}=(\epsilon_a-\epsilon_i)\delta_{ij}\delta_{ab}+(ai|f_{Hxc}|bj)+\alpha_{EX}(ab|ij)$ is the particle-hole excitation matrix and $X$ is the transition amplitude.
- `dav`/`dav_subspace`/ `cg`: Construct $AX$ and diagonalize the Hamiltonian matrix iteratively with Davidson/Non-ortho-Davidson/CG algorithm.
- `lapack`: Construct the full $A$ matrix and directly diagonalize with LAPACK.
- `scalapack`: Construct the full $A$ matrix and directly diagonalize with ScaLAPACK (needs MPI).
- `spectrum`: Calculate absorption spectrum only without solving Casida equation. The `OUT.${suffix}/` directory should contain the
files for LR-TDDFT eigenstates and eigenvalues, i.e. `Excitation_Energy.dat` and `Excitation_Amplitude_${processor_rank}.dat`
output by setting `out_wfc_lr` to true.
Expand Down
39 changes: 33 additions & 6 deletions source/module_base/scalapack_connector.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,30 @@ extern "C"
const float* abstol, int* m, int* nz, float* w, const float*orfac, std::complex<float>* Z, const int* iz, const int* jz, const int*descz,
std::complex<float>* work, int* lwork, float* rwork, int* lrwork, int*iwork, int*liwork, int* ifail, int*iclustr, float*gap, int* info);

void pdsyev_(const char* jobz, const char* uplo, const int* n,
double* a, const int* ia, const int* ja, const int* desca,
double* w, double* z, const int* iz, const int* jz, const int* descz,
double* work, const int* lwork, int* info);

void pzgetri_(
void pzheev_(const char* jobz, const char* uplo, const int* n,
std::complex<double>* a, const int* ia, const int* ja, const int* desca,
double* w, std::complex<double>* z, const int* iz, const int* jz, const int* descz,
std::complex<double>* work, const int* lwork, double* rwork, const int* lrwork, int* info);

void pzheevd_(const char* jobz, const char* uplo, const int* n,
std::complex<double>* a, const int* ia, const int* ja, const int* desca,
double* w, std::complex<double>* z, const int* iz, const int* jz, const int* descz,
std::complex<double>* work, const int* lwork, double* rwork, const int* lrwork, int* iwork, const int* liwork, int* info);

void pzheevx_(const char* jobz, const char* range, const char* uplo, const int* n,
std::complex<double>* a, const int* ia, const int* ja, const int* desca,
const double* vl, const double* vu, const int* il, const int* iu, const double* abstol,
int* m, int* nz, double* w, const double* orfac,
std::complex<double>* z, const int* iz, const int* jz, const int* descz,
std::complex<double>* work, const int* lwork, double* rwork, const int* lrwork, int* iwork, const int* liwork,
int* ifail, int* iclustr, double* gap, int* info);

void pzgetri_(
const int *n,
const std::complex<double> *A, const int *ia, const int *ja, const int *desca,
int *ipiv, const std::complex<double> *work, const int *lwork, const int *iwork, const int *liwork, const int *info);
Expand Down Expand Up @@ -171,11 +193,16 @@ extern "C"
template <typename T>
typename std::enable_if<block2d_data_type<T>::value,void>::type Cpxgemr2d(int M, int N, T *A, int IA, int JA, int *DESCA, T *B, int IB, int JB, int *DESCB, int ICTXT)
{
if (std::is_same<T,double>::value) Cpdgemr2d(M, N, reinterpret_cast<double*>(A),IA, JA, DESCA,reinterpret_cast<double*>(B),IB,JB, DESCB,ICTXT);
if (std::is_same<T,std::complex<double>>::value) Cpzgemr2d(M, N, reinterpret_cast<std::complex<double>*>(A),IA, JA, DESCA,reinterpret_cast<std::complex<double>*>(B),IB,JB, DESCB,ICTXT);
if (std::is_same<T,float>::value) Cpsgemr2d(M, N, reinterpret_cast<float*>(A),IA, JA, DESCA,reinterpret_cast<float*>(B),IB,JB, DESCB,ICTXT);
if (std::is_same<T,std::complex<float>>::value) Cpcgemr2d(M, N, reinterpret_cast<std::complex<float>*>(A),IA, JA, DESCA,reinterpret_cast<std::complex<float>*>(B),IB,JB, DESCB,ICTXT);
if (std::is_same<T,int>::value) Cpigemr2d(M, N, reinterpret_cast<int*>(A),IA, JA, DESCA,reinterpret_cast<int*>(B),IB,JB, DESCB,ICTXT);
if (std::is_same<T,double>::value) { Cpdgemr2d(M, N, reinterpret_cast<double*>(A),IA, JA, DESCA,reinterpret_cast<double*>(B),IB,JB, DESCB,ICTXT);
}
if (std::is_same<T,std::complex<double>>::value) { Cpzgemr2d(M, N, reinterpret_cast<std::complex<double>*>(A),IA, JA, DESCA,reinterpret_cast<std::complex<double>*>(B),IB,JB, DESCB,ICTXT);
}
if (std::is_same<T,float>::value) { Cpsgemr2d(M, N, reinterpret_cast<float*>(A),IA, JA, DESCA,reinterpret_cast<float*>(B),IB,JB, DESCB,ICTXT);
}
if (std::is_same<T,std::complex<float>>::value) { Cpcgemr2d(M, N, reinterpret_cast<std::complex<float>*>(A),IA, JA, DESCA,reinterpret_cast<std::complex<float>*>(B),IB,JB, DESCB,ICTXT);
}
if (std::is_same<T,int>::value) { Cpigemr2d(M, N, reinterpret_cast<int*>(A),IA, JA, DESCA,reinterpret_cast<int*>(B),IB,JB, DESCB,ICTXT);
}
};


Expand Down
2 changes: 1 addition & 1 deletion source/module_lr/esolver_lrtd_lcao.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ inline void setup_2center_table(TwoCenterBundle& two_center_bundle, LCAO_Orbital
template<typename T, typename TR>
void LR::ESolver_LR<T, TR>::parameter_check()const
{
const std::set<std::string> lr_solvers = { "dav", "lapack" , "spectrum", "dav_subspace", "cg" };
const std::set<std::string> lr_solvers = { "dav", "lapack" , "scalapack", "spectrum", "dav_subspace", "cg" };
const std::set<std::string> xc_kernels = { "rpa", "lda", "pwlda", "pbe", "hf" , "hse" };
if (lr_solvers.find(this->input.lr_solver) == lr_solvers.end()) {
throw std::invalid_argument("ESolver_LR: unknown type of lr_solver");
Expand Down
46 changes: 40 additions & 6 deletions source/module_lr/hsolver_lrtd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
#include "module_lr/utils/lr_util.h"
#include "module_lr/utils/lr_util_print.h"
#include "module_base/module_container/ATen/core/tensor_map.h"

#include <chrono>
namespace LR
{
template<typename T> using Real = typename GetTypeReal<T>::type;

using iclock = std::chrono::high_resolution_clock;
namespace HSolver
{
template<typename T>
Expand Down Expand Up @@ -54,6 +54,9 @@ namespace LR
std::vector<T> Amat_full = hm.matrix();
const int gdim = std::sqrt(Amat_full.size());
eigenvalue.resize(gdim);

iclock::time_point start_time = iclock::now();

if (hermitian) { LR_Util::diag_lapack(gdim, Amat_full.data(), eigenvalue.data()); }
else
{
Expand All @@ -64,7 +67,39 @@ namespace LR
}
// copy eigenvectors
hm.global2local(psi, Amat_full.data(), nband);

iclock::time_point end_time = iclock::now();
std::chrono::duration<double> elapsed_time
= std::chrono::duration_cast<std::chrono::duration<double>>(end_time - start_time);
std::cout << " Time elapsed diagonalizing A matrix: " << elapsed_time.count() << std::endl;

}
#ifdef __MPI
else if (method == "scalapack")
{
std::vector<T> Amat_full = hm.matrix();
const int gdim = std::sqrt(Amat_full.size());
eigenvalue.resize(gdim);
Parallel_2D para_cvk;
LR_Util::setup_2d_division(para_cvk, 1, gdim, gdim);
std::vector<T> Amat_local(para_cvk.get_local_size());
LR_Util::set_local_from_global(para_cvk, Amat_full.data(), Amat_local.data());
std::vector<T> eigvec_local_paracvk(para_cvk.get_local_size());

iclock::time_point start_time = iclock::now();

LR_Util::diag_scalapack(gdim, Amat_local.data(), eigenvalue.data(), eigvec_local_paracvk.data(), para_cvk.desc);
// copy eigenvectors
LR_Util::gather_2d_to_full(para_cvk, eigvec_local_paracvk.data(), Amat_full.data(), false, gdim, gdim);
hm.global2local(psi, Amat_full.data(), nband);

iclock::time_point end_time = iclock::now();
std::chrono::duration<double> elapsed_time
= std::chrono::duration_cast<std::chrono::duration<double>>(end_time - start_time);
std::cout << " Time elapsed diagonalizing A matrix: " << elapsed_time.count() << std::endl;

}
#endif
else
{
// 3. set maxiter and funcs
Expand Down Expand Up @@ -156,6 +191,9 @@ namespace LR
cg.diag(hpsi_func, spsi_func, psi_tensor, eigen_tensor, ethr_band, precon_tensor);
}
else { throw std::runtime_error("HSolverLR::solve: method not implemented"); }
// output iters
std::cout << "Average iterative diagonalization steps: " << hsolver::DiagoIterAssist<T>::avg_iter
<< " ; where current threshold is: " << hsolver::DiagoIterAssist<T>::PW_DIAG_THR << " . " << std::endl;
}

// 5. copy eigenvalues
Expand All @@ -180,10 +218,6 @@ namespace LR
// }
// std::cout << "state " << ist << ", norm2=" << norm2 << std::endl;
// }

// output iters
std::cout << "Average iterative diagonalization steps: " << hsolver::DiagoIterAssist<T>::avg_iter
<< " ; where current threshold is: " << hsolver::DiagoIterAssist<T>::PW_DIAG_THR << " . " << std::endl;
}
}
}
128 changes: 128 additions & 0 deletions source/module_lr/utils/lr_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,134 @@ namespace LR_Util
if (info) { std::cout << "ERROR: Lapack solver zgeev, info=" << info << std::endl; }
}

#ifdef __MPI
void diag_scalapack(const int& n, double* mat, double* eigval, double* eigvec, const int(&desc)[9])
{
ModuleBase::TITLE("LR_Util", "diag_scalapack<double>");
const char jobz = 'V', uplo = 'U';
const int minus_one = -1;
const int i1 = 1;
int info = 0;
double lwork_tmp = 0.0;
pdsyev_(&jobz, &uplo, &n,
mat, &i1, &i1, desc,
eigval, eigvec, &i1, &i1, desc,
&lwork_tmp, &minus_one, &info); // get the optimal size of work into lwork
const int lwork = lwork_tmp;
std::vector<double> work(lwork);
pdsyev_(&jobz, &uplo, &n,
mat, &i1, &i1, desc,
eigval, eigvec, &i1, &i1, desc,
work.data(), &lwork, &info);
if (info) { std::cout << "ERROR: Scalapack solver, info=" << info << std::endl; }
}
void diag_scalapack(const int& n, std::complex<double>* mat, double* eigval, std::complex<double>* eigvec, const int(&desc)[9])
{
ModuleBase::TITLE("LR_Util", "diag_lapack<complex<double>>");
const char jobz = 'V', uplo = 'U';
const int minus_one = -1;
const int i1 = 1;
int info = 0;
std::vector<std::complex<double>> work(1, 0.0);
std::vector<double>rwork(1, 0.0);
// pzheev_(&jobz, &uplo, &n,
// mat, &i1, &i1, desc,
// eigval, eigvec, &i1, &i1, desc,
// work.data(), &minus_one, rwork.data(), &minus_one, &info); // get the optimal workspace size
/// try pzheevd
// int liwork = 0;
// pzheevd_(&jobz, &uplo, &n,
// mat, &i1, &i1, desc,
// eigval, eigvec, &i1, &i1, desc,
// &lwork_tmp, &minus_one, &lrwork_tmp, &minus_one, &liwork, &minus_one, &info); // get the optimal workspace size

// try pzheevx
const char range = 'A';
const double zero = 0.0;
double abstol = 0.0;
int nz = n;
std::vector<int> iwork(1, 0);
std::vector<int> ifail(n, 0);
std::vector<int> iclustr(2 * GlobalV::DSIZE);
std::vector<double> gap(GlobalV::DSIZE);
pzheevx_(&jobz, &range, &uplo, &n,
mat, &i1, &i1, desc,
&zero, &zero, &i1, &i1, &zero,
&nz, &nz, eigval, &zero,
eigvec, &i1, &i1, desc,
work.data(), &minus_one, rwork.data(), &minus_one, iwork.data(), &minus_one,
ifail.data(), iclustr.data(), gap.data(), &info);

const int lwork = work.at(0).real();
work.resize(lwork);
const int lrwork = rwork.at(0);
rwork.resize(lrwork);
const int liwork = iwork.at(0);
iwork.resize(liwork);
// std::cout << "pzheevx: query result: lwork=" << work.at(0) << ", lrwork=" << rwork.at(0) << ", liwork=" << iwork.at(0) << std::endl;

// pzheev_(&jobz, &uplo, &n,
// mat, &i1, &i1, desc,
// eigval, eigvec, &i1, &i1, desc,
// work.data(), &lwork, rwork.data(), &lrwork, &info);
// std::vector<int> iwork(liwork);
// pzheevd_(&jobz, &uplo, &n,
// mat, &i1, &i1, desc,
// eigval, eigvec, &i1, &i1, desc,
// work.data(), &lwork, rwork.data(), &lrwork, iwork.data(), &liwork, &info);
pzheevx_(&jobz, &range, &uplo, &n,
mat, &i1, &i1, desc,
&zero, &zero, &i1, &i1, &zero,
&nz, &nz, eigval, &zero,
eigvec, &i1, &i1, desc,
work.data(), &lwork, rwork.data(), &lrwork, iwork.data(), &liwork,
ifail.data(), iclustr.data(), gap.data(), &info);
if (info) { std::cout << "ERROR: Scalapack solver, info=" << info << std::endl; }
}

void diag_scalapack(const int& n, std::complex<double>* hmat, std::complex<double>* const smat, double* eigval, std::complex<double>* eigvec, const int(&desc)[9])
{
ModuleBase::TITLE("LR_Util", "diag_lapack<complex<double>>");
const char jobz = 'V', uplo = 'U', range = 'A';
int minus_one = -1;
const int i1 = 1;
const double zero = 0.0;
int info = 0;
double abstol = 0.0;
int nz = n;
std::vector<std::complex<double>> work(1, 0.0);
std::vector<double>rwork(1, 0.0);
std::vector<int> iwork(1, 0);
std::vector<int> ifail(n, 0);
std::vector<int> iclustr(2 * GlobalV::DSIZE);
std::vector<double> gap(GlobalV::DSIZE);
pzhegvx_(&i1, &jobz, &range, &uplo, &n,
hmat, &i1, &i1, desc, smat, &i1, &i1, desc,
&zero, &zero, &i1, &i1, &zero,
&nz, &nz, eigval, &zero,
eigvec, &i1, &i1, desc,
work.data(), &minus_one, rwork.data(), &minus_one, iwork.data(), &minus_one,
ifail.data(), iclustr.data(), gap.data(), &info);

int lwork = work.at(0).real();
work.resize(lwork);
int lrwork = rwork.at(0);
rwork.resize(lrwork);
int liwork = iwork.at(0);
iwork.resize(liwork);
// std::cout << "pzhegvx: query result: lwork=" << work.at(0) << ", lrwork=" << rwork.at(0) << ", liwork=" << iwork.at(0) << std::endl;
pzhegvx_(&i1, &jobz, &range, &uplo, &n,
hmat, &i1, &i1, desc,
smat, &i1, &i1, desc,
&zero, &zero, &i1, &i1, &zero,
&nz, &nz, eigval, &zero,
eigvec, &i1, &i1, desc,
work.data(), &lwork, rwork.data(), &lrwork, iwork.data(), &liwork,
ifail.data(), iclustr.data(), gap.data(), &info);
if (info) { std::cout << "ERROR: Scalapack solver, info=" << info << std::endl; }
}
#endif

std::string tolower(const std::string& str)
{
std::string str_lower = str;
Expand Down
8 changes: 7 additions & 1 deletion source/module_lr/utils/lr_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ namespace LR_Util
/// the defination of row and col is consistent with setup_2d_division
template <typename T>
void gather_2d_to_full(const Parallel_2D& pv, const T* submat, T* fullmat, bool col_first, int global_nrow, int global_ncol);
template <typename T>
void set_local_from_global(const Parallel_2D& pv, const T* global, T* local);
#endif

///=================diago-lapack====================
Expand All @@ -103,7 +105,11 @@ namespace LR_Util
/// @brief diagonalize a general matrix
void diag_lapack_nh(const int& n, double* mat, std::complex<double>* eig);
void diag_lapack_nh(const int& n, std::complex<double>* mat, std::complex<double>* eig);

#ifdef __MPI
void diag_scalapack(const int& n, double* mat, double* eigval, double* eigvec, const int(&desc)[9]);
void diag_scalapack(const int& n, std::complex<double>* mat, double* eigval, std::complex<double>* eigvec, const int(&desc)[9]);
void diag_scalapack(const int& n, std::complex<double>* hmat, std::complex<double>* smat, double* eigval, std::complex<double>* eigvec, const int(&desc)[9]);
#endif
///=================string option====================
std::string tolower(const std::string& str);
std::string toupper(const std::string& str);
Expand Down
12 changes: 12 additions & 0 deletions source/module_lr/utils/lr_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,18 @@ namespace LR_Util
//reduce to root
MPI_Allreduce(MPI_IN_PLACE, fullmat, global_nrow * global_ncol, get_mpi_datatype(), MPI_SUM, pv.comm());
};

template <typename T>
void set_local_from_global(const Parallel_2D& pv, const T* global, T* local)
{
for (int c = 0;c < pv.get_col_size();++c)
{
for (int r = 0;r < pv.get_row_size();++r)
{
local[c * pv.get_row_size() + r] = global[pv.local2global_col(c) * pv.get_global_row_size() + pv.local2global_row(r)];
}
}
}
#endif

}
2 changes: 1 addition & 1 deletion source/module_lr/utils/lr_util_print.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ namespace LR_Util
const int& iat2 = tmp2.first.first;
const auto& R = tmp2.first.second;
auto& t = tmp2.second;
if (R != TC({ 0, 0, 0 })) {continue;} // for test
// if (R != TC({ 0, 0, 0 })) {continue;} // for test
std::cout << "iat1=" << iat1 << " iat2=" << iat2 << " R=(" << R[0] << " " << R[1] << " " << R[2] << ")\n";
if (t.shape.size() == 2)
{
Expand Down
Loading
Loading