Skip to content
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion docs/advanced/input_files/input-main.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
- [pw\_diag\_thr](#pw_diag_thr)
- [pw\_diag\_nmax](#pw_diag_nmax)
- [pw\_diag\_ndim](#pw_diag_ndim)
- [diag\_subspace](#diag_subspace)
- [erf\_ecut](#erf_ecut)
- [fft\_mode](#fft_mode)
- [erf\_height](#erf_height)
Expand Down Expand Up @@ -787,7 +788,18 @@ These variables are used to control the plane wave related parameters.

- **Type**: Integer
- **Description**: Only useful when you use `ks_solver = dav` or `ks_solver = dav_subspace`. It indicates dimension of workspace(number of wavefunction packets, at least 2 needed) for the Davidson method. A larger value may yield a smaller number of iterations in the algorithm but uses more memory and more CPU time in subspace diagonalization.
- **Default**: 4
- **Default**: 4

### diag_subspace

- **Type**: Integer
- **Description**: The method to diagonalize subspace in dav_subspace method. The available options are:
- 0: by LAPACK
- 1: by GenELPA
- 2: by ScaLAPACK
LAPACK only solve in one core, GenELPA and ScaLAPACK can solve in parallel. If the system is small (such as the band number is less than 100), LAPACK is recommended. If the system is large and MPI parallel is used, then GenELPA or ScaLAPACK is recommended, and GenELPA usually has better performance. For GenELPA and ScaLAPACK, the block size can be set by [nb2d](#nb2d).

- **Default**: 0

### erf_ecut

Expand Down
3 changes: 3 additions & 0 deletions python/pyabacus/src/hsolver/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ list(APPEND _diago
${HSOLVER_PATH}/diago_cg.cpp
${HSOLVER_PATH}/diag_const_nums.cpp
${HSOLVER_PATH}/diago_iter_assist.cpp
${HSOLVER_PATH}/diag_hs_para.cpp
${HSOLVER_PATH}/diago_pxxxgvx.cpp


${HSOLVER_PATH}/kernels/dngvd_op.cpp
${HSOLVER_PATH}/kernels/math_kernel_op.cpp
Expand Down
8 changes: 6 additions & 2 deletions python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,9 @@ class PyDiagoDavSubspace
bool need_subspace,
std::vector<double>& diag_ethr,
bool scf_type,
hsolver::diag_comm_info comm_info
hsolver::diag_comm_info comm_info,
int diag_subspace,
int nb2d
) {
auto hpsi_func = [mm_op] (
std::complex<double> *psi_in,
Expand Down Expand Up @@ -138,7 +140,9 @@ class PyDiagoDavSubspace
tol,
max_iter,
need_subspace,
comm_info
comm_info,
diag_subspace,
nb2d
);

return obj->diag(hpsi_func, psi, nbasis, eigenvalue, diag_ethr, scf_type);
Expand Down
11 changes: 10 additions & 1 deletion python/pyabacus/src/hsolver/py_hsolver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,13 @@ void bind_hsolver(py::module& m)
where the initial precision of eigenvalue calculation can be coarse.
If false, it indicates a non-self-consistent field (non-SCF) calculation,
where high precision in eigenvalue calculation is required from the start.
comm_info : diag_comm_info
The communicator information.
diago_subspace : int
The method to solve the generalized eigenvalue problem.
0: LAPACK, 1: Gen-ELPA, 2: ScaLAPACK
nb2d : int
The block size in 2d block cyclic distribution if use elpa or scalapack.
)pbdoc",
"mm_op"_a,
"precond_vec"_a,
Expand All @@ -76,7 +83,9 @@ void bind_hsolver(py::module& m)
"need_subspace"_a,
"diag_ethr"_a,
"scf_type"_a,
"comm_info"_a)
"comm_info"_a,
"diago_subspace"_a,
"nb2d"_a)
.def("set_psi", &py_hsolver::PyDiagoDavSubspace::set_psi, R"pbdoc(
Set the initial guess of the eigenvectors, i.e. the wave functions.
)pbdoc", "psi_in"_a)
Expand Down
13 changes: 11 additions & 2 deletions python/pyabacus/src/pyabacus/hsolver/_hsolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ def dav_subspace(
max_iter: int = 1000,
need_subspace: bool = False,
diag_ethr: Union[List[float], None] = None,
scf_type: bool = False
scf_type: bool = False,
diag_subspace: int = 0,
nb2d: int = 0
) -> Tuple[NDArray[np.float64], NDArray[np.complex128]]:
""" A function to diagonalize a matrix using the Davidson-Subspace method.

Expand Down Expand Up @@ -67,6 +69,11 @@ def dav_subspace(
If True, the initial precision of eigenvalue calculation can be coarse.
If False, it indicates a non-self-consistent field (non-SCF) calculation,
where high precision in eigenvalue calculation is required from the start.
diag_subspace : int, optional
The method to do the diagonalization, by default 0.
0: LAPACK, 1: Gen-elpa, 2: Scalapack
nb2d : int, optional
The block size for 2D decomposition, by default 0, which will be automatically set.

Returns
-------
Expand Down Expand Up @@ -101,7 +108,9 @@ def dav_subspace(
need_subspace,
diag_ethr,
scf_type,
comm_info
comm_info,
diag_subspace,
nb2d
)

e = _diago_obj_dav_subspace.get_eigenvalue()
Expand Down
2 changes: 2 additions & 0 deletions source/Makefile.Objects
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,8 @@ OBJS_HSOLVER=diago_cg.o\
math_kernel_op.o\
dngvd_op.o\
diag_const_nums.o\
diag_hs_para.o\
diago_pxxxgvx.o\

OBJS_HSOLVER_LCAO=hsolver_lcao.o\
diago_scalapack.o\
Expand Down
2 changes: 1 addition & 1 deletion source/module_base/blacs_connector.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ extern "C"
// Informational and Miscellaneous
void Cblacs_gridinfo(int icontxt, int* nprow, int *npcol, int *myprow, int *mypcol);
void Cblacs_gridinit(int* icontxt, char* layout, int nprow, int npcol);
void Cblacs_gridexit(int* icontxt);
void Cblacs_gridexit(int icontxt);
int Cblacs_pnum(int icontxt, int prow, int pcol);
void Cblacs_pcoord(int icontxt, int pnum, int *prow, int *pcol);
void Cblacs_exit(int icontxt);
Expand Down
14 changes: 14 additions & 0 deletions source/module_base/scalapack_connector.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,26 @@ extern "C"
const double* vl, const double* vu, const int* il, const int* iu,
const double* abstol, int* m, int* nz, double* w, const double*orfac, double* Z, const int* iz, const int* jz, const int*descz,
double* work, int* lwork, int*iwork, int*liwork, int* ifail, int*iclustr, double*gap, int* info);

void pzhegvx_(const int* itype, const char* jobz, const char* range, const char* uplo,
const int* n, std::complex<double>* A, const int* ia, const int* ja, const int*desca, std::complex<double>* B, const int* ib, const int* jb, const int*descb,
const double* vl, const double* vu, const int* il, const int* iu,
const double* abstol, int* m, int* nz, double* w, const double*orfac, std::complex<double>* Z, const int* iz, const int* jz, const int*descz,
std::complex<double>* work, int* lwork, double* rwork, int* lrwork, int*iwork, int*liwork, int* ifail, int*iclustr, double*gap, int* info);

void pssygvx_(const int* itype, const char* jobz, const char* range, const char* uplo,
const int* n, float* A, const int* ia, const int* ja, const int*desca, float* B, const int* ib, const int* jb, const int*descb,
const float* vl, const float* vu, const int* il, const int* iu,
const float* abstol, int* m, int* nz, float* w, const float*orfac, float* Z, const int* iz, const int* jz, const int*descz,
float* work, int* lwork, int*iwork, int*liwork, int* ifail, int*iclustr, float*gap, int* info);

void pchegvx_(const int* itype, const char* jobz, const char* range, const char* uplo,
const int* n, std::complex<float>* A, const int* ia, const int* ja, const int*desca, std::complex<float>* B, const int* ib, const int* jb, const int*descb,
const float* vl, const float* vu, const int* il, const int* iu,
const float* abstol, int* m, int* nz, float* w, const float*orfac, std::complex<float>* Z, const int* iz, const int* jz, const int*descz,
std::complex<float>* work, int* lwork, float* rwork, int* lrwork, int*iwork, int*liwork, int* ifail, int*iclustr, float*gap, int* info);


void pzgetri_(
const int *n,
const std::complex<double> *A, const int *ia, const int *ja, const int *desca,
Expand Down
3 changes: 3 additions & 0 deletions source/module_hsolver/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ list(APPEND objects
hsolver_pw_sdft.cpp
diago_iter_assist.cpp
hsolver.cpp
diago_pxxxgvx.cpp
diag_hs_para.cpp

)

if(ENABLE_LCAO)
Expand Down
205 changes: 205 additions & 0 deletions source/module_hsolver/diag_hs_para.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
#include "module_hsolver/diag_hs_para.h"

#include "module_base/scalapack_connector.h"
#include "module_base/parallel_2d.h"
#include "module_hsolver/diago_pxxxgvx.h"

#ifdef __ELPA
#include "module_hsolver/genelpa/elpa_solver.h"
#endif

#include <iostream>

namespace hsolver
{

#ifdef __ELPA
void elpa_diag(MPI_Comm comm,
const int nband,
std::complex<double>* h_local,
std::complex<double>* s_local,
double* ekb,
std::complex<double>* wfc_2d,
Parallel_2D& para2d_local)
{
int DecomposedState = 0;
ELPA_Solver es(false, comm, nband, para2d_local.get_row_size(), para2d_local.get_col_size(), para2d_local.desc);
es.generalized_eigenvector(h_local, s_local, DecomposedState, ekb, wfc_2d);
es.exit();
}

void elpa_diag(MPI_Comm comm,
const int nband,
double* h_local,
double* s_local,
double* ekb,
double* wfc_2d,
Parallel_2D& para2d_local)
{
int DecomposedState = 0;
ELPA_Solver es(true, comm, nband, para2d_local.get_row_size(), para2d_local.get_col_size(), para2d_local.desc);
es.generalized_eigenvector(h_local, s_local, DecomposedState, ekb, wfc_2d);
es.exit();
}

void elpa_diag(MPI_Comm comm,
const int nband,
std::complex<float>* h_local,
std::complex<float>* s_local,
float* ekb,
std::complex<float>* wfc_2d,
Parallel_2D& para2d_local)
{
std::cout << "Error: ELPA do not support single precision. " << std::endl;
exit(1);
}

void elpa_diag(MPI_Comm comm,
const int nband,
float* h_local,
float* s_local,
float* ekb,
float* wfc_2d,
Parallel_2D& para2d_local)
{
std::cout << "Error: ELPA do not support single precision. " << std::endl;
exit(1);
}

#endif

#ifdef __MPI

template <typename T>
void Diago_HS_para(T* h,
T* s,
const int lda,
const int nband,
typename GetTypeReal<T>::type* const ekb,
T* const wfc,
const MPI_Comm& comm,
const int diag_subspace,
const int block_size)
{
int myrank = 0;
MPI_Comm_rank(comm, &myrank);
Parallel_2D para2d_global;
Parallel_2D para2d_local;
para2d_global.init(lda, lda, lda, comm);

int max_nb = block_size;
if (block_size == 0)
{
if (nband > 500)
{
max_nb = 32;
}
else
{
max_nb = 16;
}
}
else if (block_size < 0)
{
std::cout << "Error: block_size in diago_subspace should be a positive integer. " << std::endl;
exit(1);
}

// for genelpa, if the block size is too large that some cores have no data, then it will cause error.
if (diag_subspace == 1)
{
if (max_nb * (std::max(para2d_global.dim0, para2d_global.dim1) - 1) >= lda)
{
max_nb = lda / std::max(para2d_global.dim0, para2d_global.dim1);
}
}

para2d_local.init(lda, lda, max_nb, comm);
std::vector<T> h_local(para2d_local.get_col_size() * para2d_local.get_row_size());
std::vector<T> s_local(para2d_local.get_col_size() * para2d_local.get_row_size());
std::vector<T> wfc_2d(para2d_local.get_col_size() * para2d_local.get_row_size());

// distribute h and s to 2D
Cpxgemr2d(lda, lda, h, 1, 1, para2d_global.desc, h_local.data(), 1, 1, para2d_local.desc, para2d_local.blacs_ctxt);
Cpxgemr2d(lda, lda, s, 1, 1, para2d_global.desc, s_local.data(), 1, 1, para2d_local.desc, para2d_local.blacs_ctxt);

if (diag_subspace == 1)
{
#ifdef __ELPA
elpa_diag(comm, nband, h_local.data(), s_local.data(), ekb, wfc_2d.data(), para2d_local);
#else
std::cout << "ERROR: try to use ELPA to solve the generalized eigenvalue problem, but ELPA is not compiled. "
<< std::endl;
exit(1);
#endif
}
else if (diag_subspace == 2)
{
hsolver::pxxxgvx_diag(para2d_local.desc,
para2d_local.get_row_size(),
para2d_local.get_col_size(),
nband,
h_local.data(),
s_local.data(),
ekb,
wfc_2d.data());
}
else
{
std::cout << "Error: parallel diagonalization method is not supported. " << "diag_subspace = " << diag_subspace
<< std::endl;
exit(1);
}

// gather wfc
Cpxgemr2d(lda, lda, wfc_2d.data(), 1, 1, para2d_local.desc, wfc, 1, 1, para2d_global.desc, para2d_local.blacs_ctxt);

// free the context
Cblacs_gridexit(para2d_local.blacs_ctxt);
Cblacs_gridexit(para2d_global.blacs_ctxt);
}

// template instantiation
template void Diago_HS_para<double>(double* h,
double* s,
const int lda,
const int nband,
typename GetTypeReal<double>::type* const ekb,
double* const wfc,
const MPI_Comm& comm,
const int diag_subspace,
const int block_size);

template void Diago_HS_para<std::complex<double>>(std::complex<double>* h,
std::complex<double>* s,
const int lda,
const int nband,
typename GetTypeReal<std::complex<double>>::type* const ekb,
std::complex<double>* const wfc,
const MPI_Comm& comm,
const int diag_subspace,
const int block_size);

template void Diago_HS_para<float>(float* h,
float* s,
const int lda,
const int nband,
typename GetTypeReal<float>::type* const ekb,
float* const wfc,
const MPI_Comm& comm,
const int diag_subspace,
const int block_size);

template void Diago_HS_para<std::complex<float>>(std::complex<float>* h,
std::complex<float>* s,
const int lda,
const int nband,
typename GetTypeReal<std::complex<float>>::type* const ekb,
std::complex<float>* const wfc,
const MPI_Comm& comm,
const int diag_subspace,
const int block_size);

#endif

} // namespace hsolver
Loading
Loading