diff --git a/source/source_base/gather_math_lib_info.cpp b/source/source_base/gather_math_lib_info.cpp index 825aaa3163..3cd89a94eb 100644 --- a/source/source_base/gather_math_lib_info.cpp +++ b/source/source_base/gather_math_lib_info.cpp @@ -60,7 +60,7 @@ void zhegvx_i(const int *itype, const int *il, const int *iu, const double *abstol, - const int *m, + int *m, double *w, std::complex *z, const int *ldz, diff --git a/source/source_base/global_function.h b/source/source_base/global_function.h index 7981fb79bd..3d6f1e35be 100644 --- a/source/source_base/global_function.h +++ b/source/source_base/global_function.h @@ -182,17 +182,17 @@ inline void DCOPY(const T* a, T* b, const int& dim) { } template -inline void COPYARRAY(const T* a, T* b, const long dim); +inline void COPYARRAY(const T* a, T* b, const int dim); template <> -inline void COPYARRAY(const std::complex* a, std::complex* b, const long dim) +inline void COPYARRAY(const std::complex* a, std::complex* b, const int dim) { const int one = 1; zcopy_(&dim, a, &one, b, &one); } template <> -inline void COPYARRAY(const double* a, double* b, const long dim) +inline void COPYARRAY(const double* a, double* b, const int dim) { const int one = 1; dcopy_(&dim, a, &one, b, &one); diff --git a/source/source_base/module_container/base/third_party/blas.h b/source/source_base/module_container/base/third_party/blas.h index 5c73032e05..b36df5a39b 100644 --- a/source/source_base/module_container/base/third_party/blas.h +++ b/source/source_base/module_container/base/third_party/blas.h @@ -25,8 +25,8 @@ void daxpy_(const int *N, const double *alpha, const double *x, const int *incx, void caxpy_(const int *N, const std::complex *alpha, const std::complex *x, const int *incx, std::complex *y, const int *incy); void zaxpy_(const int *N, const std::complex *alpha, const std::complex *x, const int *incx, std::complex *y, const int *incy); -void dcopy_(long const *n, const double *a, int const *incx, double *b, int const *incy); -void zcopy_(long const *n, const std::complex *a, int const *incx, std::complex *b, int const *incy); +void dcopy_(const int *n, const double *a, const int *incx, double *b, const int *incy); +void zcopy_(const int *n, const std::complex *a, const int *incx, std::complex *b, const int *incy); //reason for passing results as argument instead of returning it: //see https://www.numbercrunch.de/blog/2014/07/lost-in-translation/ @@ -107,14 +107,26 @@ void dsymm_(const char *side, const char *uplo, const int *m, const int *n, const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, const double *beta, double *c, const int *ldc); //a is hermitian -void zhemm_(char *side, char *uplo, int *m, int *n,std::complex *alpha, - std::complex *a, int *lda, std::complex *b, int *ldb, std::complex *beta, std::complex *c, int *ldc); +void zhemm_(const char *side, const char *uplo, + const int *m, const int *n, + const std::complex *alpha, + const std::complex *a, const int *lda, + const std::complex *b, const int *ldb, + const std::complex *beta, + std::complex *c, const int *ldc); //solving triangular matrix with multiple right hand sides -void dtrsm_(char *side, char* uplo, char *transa, char *diag, int *m, int *n, - double* alpha, double* a, int *lda, double*b, int *ldb); -void ztrsm_(char *side, char* uplo, char *transa, char *diag, int *m, int *n, - std::complex* alpha, std::complex* a, int *lda, std::complex*b, int *ldb); +void dtrsm_(const char *side, const char *uplo, const char *transa, const char *diag, + const int *m, const int *n, + const double *alpha, + const double *a, const int *lda, + double *b, const int *ldb); + +void ztrsm_(const char *side, const char *uplo, const char *transa, const char *diag, + const int *m, const int *n, + const std::complex *alpha, + const std::complex *a, const int *lda, + std::complex *b, const int *ldb); } @@ -339,12 +351,12 @@ double nrm2( const int n, const std::complex *x, const int incx ) // copies a into b static inline -void copy(const long n, const double *a, const int incx, double *b, const int incy) +void copy(const int n, const double *a, const int incx, double *b, const int incy) { dcopy_(&n, a, &incx, b, &incy); } static inline -void copy(const long n, const std::complex *a, const int incx, std::complex *b, const int incy) +void copy(const int n, const std::complex *a, const int incx, std::complex *b, const int incy) { zcopy_(&n, a, &incx, b, &incy); } diff --git a/source/source_base/module_container/base/third_party/lapack.h b/source/source_base/module_container/base/third_party/lapack.h index 6e272d2b29..bfb19eaca4 100644 --- a/source/source_base/module_container/base/third_party/lapack.h +++ b/source/source_base/module_container/base/third_party/lapack.h @@ -41,124 +41,159 @@ extern "C" { -int ilaenv_(int* ispec,const char* name,const char* opts, - const int* n1,const int* n2,const int* n3,const int* n4); +// ILAENV - environment inquiry +int ilaenv_(const int* ispec, const char* name, const char* opts, + const int* n1, const int* n2, const int* n3, const int* n4); - -// solve the generalized eigenproblem Ax=eBx, where A is Hermitian and complex couble -// zhegv_ & zhegvd_ returns all eigenvalues while zhegvx_ returns selected ones +// Generalized symmetric-definite eigenproblems (divide-and-conquer) void ssygvd_(const int* itype, const char* jobz, const char* uplo, const int* n, float* a, const int* lda, - const float* b, const int* ldb, float* w, - float* work, int* lwork, - int* iwork, int* liwork, int* info); + float* b, const int* ldb, + float* w, + float* work, const int* lwork, + int* iwork, const int* liwork, + int* info); void dsygvd_(const int* itype, const char* jobz, const char* uplo, const int* n, double* a, const int* lda, - const double* b, const int* ldb, double* w, - double* work, int* lwork, - int* iwork, int* liwork, int* info); + double* b, const int* ldb, + double* w, + double* work, const int* lwork, + int* iwork, const int* liwork, + int* info); void chegvd_(const int* itype, const char* jobz, const char* uplo, const int* n, std::complex* a, const int* lda, - const std::complex* b, const int* ldb, float* w, - std::complex* work, int* lwork, float* rwork, int* lrwork, - int* iwork, int* liwork, int* info); + std::complex* b, const int* ldb, + float* w, + std::complex* work, const int* lwork, + float* rwork, const int* lrwork, + int* iwork, const int* liwork, + int* info); +void zhegvd_(const int* itype, const char* jobz, const char* uplo, const int* n, + std::complex* a, const int* lda, + std::complex* b, const int* ldb, + double* w, + std::complex* work, const int* lwork, + double* rwork, const int* lrwork, + int* iwork, const int* liwork, + int* info); + +// Generalized symmetric-definite eigenproblems (selected eigenvalues/vectors) void ssygvx_(const int* itype, const char* jobz, const char* range, const char* uplo, const int* n, float* A, const int* lda, float* B, const int* ldb, const float* vl, const float* vu, const int* il, const int* iu, - const float* abstol, const int* m, float* w, float* Z, const int* ldz, + const float* abstol, int* m, float* w, float* Z, const int* ldz, float* work, const int* lwork, int* iwork, int* ifail, int* info); + void dsygvx_(const int* itype, const char* jobz, const char* range, const char* uplo, const int* n, double* A, const int* lda, double* B, const int* ldb, const double* vl, const double* vu, const int* il, const int* iu, - const double* abstol, const int* m, double* w, double* Z, const int* ldz, + const double* abstol, int* m, double* w, double* Z, const int* ldz, double* work, const int* lwork, int* iwork, int* ifail, int* info); + void chegvx_(const int* itype, const char* jobz, const char* range, const char* uplo, const int* n, std::complex* A, const int* lda, std::complex* B, const int* ldb, const float* vl, const float* vu, const int* il, const int* iu, - const float* abstol, const int* m, float* w, std::complex* Z, const int* ldz, + const float* abstol, int* m, float* w, std::complex* Z, const int* ldz, std::complex* work, const int* lwork, float* rwork, int* iwork, int* ifail, int* info); + void zhegvx_(const int* itype, const char* jobz, const char* range, const char* uplo, const int* n, std::complex* A, const int* lda, std::complex* B, const int* ldb, const double* vl, const double* vu, const int* il, const int* iu, - const double* abstol, const int* m, double* w, std::complex* Z, const int* ldz, + const double* abstol, int* m, double* w, std::complex* Z, const int* ldz, std::complex* work, const int* lwork, double* rwork, int* iwork, int* ifail, int* info); -void zhegvd_(const int* itype, const char* jobz, const char* uplo, const int* n, - std::complex* a, const int* lda, - const std::complex* b, const int* ldb, double* w, - std::complex* work, int* lwork, double* rwork, int* lrwork, - int* iwork, int* liwork, int* info); - +// Standard symmetric eigenproblems (selected) void ssyevx_(const char* jobz, const char* range, const char* uplo, const int* n, - float *a, const int* lda, - const float* vl, const float* vu, const int* il, const int* iu, const float* abstol, - const int* m, float* w, float *z, const int *ldz, - float *work, const int* lwork, float* rwork, int* iwork, int* ifail, int* info); + float* a, const int* lda, + const float* vl, const float* vu, const int* il, const int* iu, + const float* abstol, int* m, float* w, float* z, const int* ldz, + float* work, const int* lwork, float* rwork, int* iwork, int* ifail, int* info); + void dsyevx_(const char* jobz, const char* range, const char* uplo, const int* n, - double *a, const int* lda, - const double* vl, const double* vu, const int* il, const int* iu, const double* abstol, - const int* m, double* w, double *z, const int *ldz, - double *work, const int* lwork, double* rwork, int* iwork, int* ifail, int* info); + double* a, const int* lda, + const double* vl, const double* vu, const int* il, const int* iu, + const double* abstol, int* m, double* w, double* z, const int* ldz, + double* work, const int* lwork, int* iwork, int* ifail, int* info); + void cheevx_(const char* jobz, const char* range, const char* uplo, const int* n, - std::complex *a, const int* lda, - const float* vl, const float* vu, const int* il, const int* iu, const float* abstol, - const int* m, float* w, std::complex *z, const int *ldz, - std::complex *work, const int* lwork, float* rwork, int* iwork, int* ifail, int* info); + std::complex* a, const int* lda, + const float* vl, const float* vu, const int* il, const int* iu, + const float* abstol, int* m, float* w, std::complex* z, const int* ldz, + std::complex* work, const int* lwork, float* rwork, int* iwork, int* ifail, int* info); + void zheevx_(const char* jobz, const char* range, const char* uplo, const int* n, - std::complex *a, const int* lda, - const double* vl, const double* vu, const int* il, const int* iu, const double* abstol, - const int* m, double* w, std::complex *z, const int *ldz, - std::complex *work, const int* lwork, double* rwork, int* iwork, int* ifail, int* info); - -void ssyevd_(const char *jobz, const char *uplo, const int *n, - float *a, const int *lda, float *w, - float *work, int *lwork, - int *iwork, int *liwork, int *info); -void dsyevd_(const char *jobz, const char *uplo, const int *n, - double *a, const int *lda, double *w, - double *work, int *lwork, - int *iwork, int *liwork, int *info); -void cheevd_(const char *jobz, const char *uplo, const int *n, - std::complex *a, const int *lda, float *w, - std::complex *work, int *lwork, float *rwork, int *lrwork, - int *iwork, int *liwork, int *info); -void zheevd_(const char *jobz, const char *uplo, const int *n, - std::complex *a, const int *lda, double *w, - std::complex *work, int *lwork, double *rwork, int *lrwork, - int *iwork, int *liwork, int *info); - -void spotrf_(const char*const uplo, const int*const n, float*const A, const int*const lda, int*const info); -void dpotrf_(const char*const uplo, const int*const n, double*const A, const int*const lda, int*const info); -void cpotrf_(const char*const uplo, const int*const n, std::complex*const A, const int*const lda, int*const info); -void zpotrf_(const char*const uplo, const int*const n, std::complex*const A, const int*const lda, int*const info); - -void spotri_(const char*const uplo, const int*const n, float*const A, const int*const lda, int*const info); -void dpotri_(const char*const uplo, const int*const n, double*const A, const int*const lda, int*const info); -void cpotri_(const char*const uplo, const int*const n, std::complex*const A, const int*const lda, int*const info); -void zpotri_(const char*const uplo, const int*const n, std::complex*const A, const int*const lda, int*const info); + std::complex* a, const int* lda, + const double* vl, const double* vu, const int* il, const int* iu, + const double* abstol, int* m, double* w, std::complex* z, const int* ldz, + std::complex* work, const int* lwork, double* rwork, int* iwork, int* ifail, int* info); +// Standard symmetric eigenproblems (divide-and-conquer) +void ssyevd_(const char* jobz, const char* uplo, const int* n, + float* a, const int* lda, float* w, + float* work, const int* lwork, + int* iwork, const int* liwork, int* info); + +void dsyevd_(const char* jobz, const char* uplo, const int* n, + double* a, const int* lda, double* w, + double* work, const int* lwork, + int* iwork, const int* liwork, int* info); + +void cheevd_(const char* jobz, const char* uplo, const int* n, + std::complex* a, const int* lda, float* w, + std::complex* work, const int* lwork, float* rwork, const int* lrwork, + int* iwork, const int* liwork, int* info); + +void zheevd_(const char* jobz, const char* uplo, const int* n, + std::complex* a, const int* lda, double* w, + std::complex* work, const int* lwork, double* rwork, const int* lrwork, + int* iwork, const int* liwork, int* info); + +// Cholesky factorization +void spotrf_(const char* uplo, const int* n, float* A, const int* lda, int* info); +void dpotrf_(const char* uplo, const int* n, double* A, const int* lda, int* info); +void cpotrf_(const char* uplo, const int* n, std::complex* A, const int* lda, int* info); +void zpotrf_(const char* uplo, const int* n, std::complex* A, const int* lda, int* info); + +// Inverse using Cholesky factorization +void spotri_(const char* uplo, const int* n, float* A, const int* lda, int* info); +void dpotri_(const char* uplo, const int* n, double* A, const int* lda, int* info); +void cpotri_(const char* uplo, const int* n, std::complex* A, const int* lda, int* info); +void zpotri_(const char* uplo, const int* n, std::complex* A, const int* lda, int* info); + +// Inverse of triangular matrix void strtri_(const char* uplo, const char* diag, const int* n, float* a, const int* lda, int* info); void dtrtri_(const char* uplo, const char* diag, const int* n, double* a, const int* lda, int* info); void ctrtri_(const char* uplo, const char* diag, const int* n, std::complex* a, const int* lda, int* info); void ztrtri_(const char* uplo, const char* diag, const int* n, std::complex* a, const int* lda, int* info); +// LU factorization void sgetrf_(const int* m, const int* n, float* a, const int* lda, int* ipiv, int* info); void dgetrf_(const int* m, const int* n, double* a, const int* lda, int* ipiv, int* info); void cgetrf_(const int* m, const int* n, std::complex* a, const int* lda, int* ipiv, int* info); void zgetrf_(const int* m, const int* n, std::complex* a, const int* lda, int* ipiv, int* info); +// Inverse using LU factorization void sgetri_(const int* n, float* A, const int* lda, const int* ipiv, float* work, const int* lwork, int* info); void dgetri_(const int* n, double* A, const int* lda, const int* ipiv, double* work, const int* lwork, int* info); void cgetri_(const int* n, std::complex* A, const int* lda, const int* ipiv, std::complex* work, const int* lwork, int* info); void zgetri_(const int* n, std::complex* A, const int* lda, const int* ipiv, std::complex* work, const int* lwork, int* info); -void sgetrs_(const char* trans, const int* n, const int* nrhs, const float* A, const int* lda, const int* ipiv, float* B, const int* ldb, int* info); -void dgetrs_(const char* trans, const int* n, const int* nrhs, const double* A, const int* lda, const int* ipiv, double* B, const int* ldb, int* info); -void cgetrs_(const char* trans, const int* n, const int* nrhs, const std::complex* A, const int* lda, const int* ipiv, std::complex* B, const int* ldb, int* info); -void zgetrs_(const char* trans, const int* n, const int* nrhs, const std::complex* A, const int* lda, const int* ipiv, std::complex* B, const int* ldb, int* info); +// Solve linear system using LU factorization +void sgetrs_(const char* trans, const int* n, const int* nrhs, + const float* A, const int* lda, const int* ipiv, + float* B, const int* ldb, int* info); +void dgetrs_(const char* trans, const int* n, const int* nrhs, + const double* A, const int* lda, const int* ipiv, + double* B, const int* ldb, int* info); +void cgetrs_(const char* trans, const int* n, const int* nrhs, + const std::complex* A, const int* lda, const int* ipiv, + std::complex* B, const int* ldb, int* info); +void zgetrs_(const char* trans, const int* n, const int* nrhs, + const std::complex* A, const int* lda, const int* ipiv, + std::complex* B, const int* ldb, int* info); } // Class LapackConnector provide the connector to fortran lapack routine. @@ -178,7 +213,7 @@ int ilaenv( int ispec, const char *name,const char *opts,const int n1,const int static inline void hegvd(const int itype, const char jobz, const char uplo, const int n, float* a, const int lda, - const float* b, const int ldb, float* w, + float* b, const int ldb, float* w, float* work, int lwork, float* rwork, int lrwork, int* iwork, int liwork, int info) { @@ -192,7 +227,7 @@ void hegvd(const int itype, const char jobz, const char uplo, const int n, static inline void hegvd(const int itype, const char jobz, const char uplo, const int n, double* a, const int lda, - const double* b, const int ldb, double* w, + double* b, const int ldb, double* w, double* work, int lwork, double* rwork, int lrwork, int* iwork, int liwork, int info) { @@ -205,7 +240,7 @@ void hegvd(const int itype, const char jobz, const char uplo, const int n, static inline void hegvd(const int itype, const char jobz, const char uplo, const int n, std::complex* a, const int lda, - const std::complex* b, const int ldb, float* w, + std::complex* b, const int ldb, float* w, std::complex* work, int lwork, float* rwork, int lrwork, int* iwork, int liwork, int info) { @@ -219,7 +254,7 @@ void hegvd(const int itype, const char jobz, const char uplo, const int n, static inline void hegvd(const int itype, const char jobz, const char uplo, const int n, std::complex* a, const int lda, - const std::complex* b, const int ldb, double* w, + std::complex* b, const int ldb, double* w, std::complex* work, int lwork, double* rwork, int lrwork, int* iwork, int liwork, int info) { @@ -239,7 +274,7 @@ static inline void hegvx(const int itype, const char jobz, const char range, const char uplo, const int n, float* a, const int lda, float* b, const int ldb, const float vl, const float vu, const int il, const int iu, const float abstol, - const int m, float* w, float* z, const int ldz, + int m, float* w, float* z, const int ldz, float* work, const int lwork, float* rwork, int* iwork, int* ifail, int& info) { ssygvx_(&itype, &jobz, &range, &uplo, &n, @@ -253,7 +288,7 @@ static inline void hegvx(const int itype, const char jobz, const char range, const char uplo, const int n, double* a, const int lda, double* b, const int ldb, const double vl, const double vu, const int il, const int iu, const double abstol, - const int m, double* w, double* z, const int ldz, + int m, double* w, double* z, const int ldz, double* work, const int lwork, double* rwork, int* iwork, int* ifail, int& info) { dsygvx_(&itype, &jobz, &range, &uplo, &n, @@ -267,7 +302,7 @@ static inline void hegvx(const int itype, const char jobz, const char range, const char uplo, const int n, std::complex* a, const int lda, std::complex* b, const int ldb, const float vl, const float vu, const int il, const int iu, const float abstol, - const int m, float* w, std::complex* z, const int ldz, + int m, float* w, std::complex* z, const int ldz, std::complex* work, const int lwork, float* rwork, int* iwork, int* ifail, int& info) { chegvx_(&itype, &jobz, &range, &uplo, &n, @@ -281,7 +316,7 @@ static inline void hegvx(const int itype, const char jobz, const char range, const char uplo, const int n, std::complex* a, const int lda, std::complex* b, const int ldb, const double vl, const double vu, const int il, const int iu, const double abstol, - const int m, double* w, std::complex* z, const int ldz, + int m, double* w, std::complex* z, const int ldz, std::complex* work, const int lwork, double* rwork, int* iwork, int* ifail, int& info) { zhegvx_(&itype, &jobz, &range, &uplo, &n, @@ -297,7 +332,7 @@ static inline void heevx(const char jobz, const char range, const char uplo, const int n, float* a, const int lda, const float vl, const float vu, const int il, const int iu, const float abstol, - const int m, float* w, float* z, const int ldz, + int m, float* w, float* z, const int ldz, float* work, const int lwork, float* rwork, int* iwork, int* ifail, int info) { ssyevx_(&jobz, &range, &uplo, &n, @@ -310,19 +345,19 @@ static inline void heevx(const char jobz, const char range, const char uplo, const int n, double* a, const int lda, const double vl, const double vu, const int il, const int iu, const double abstol, - const int m, double* w, double* z, const int ldz, + int m, double* w, double* z, const int ldz, double* work, const int lwork, double* rwork, int* iwork, int* ifail, int info) { dsyevx_(&jobz, &range, &uplo, &n, a, &lda, &vl, &vu, &il, &iu, &abstol, &m, w, z, &ldz, - work, &lwork, rwork, iwork, ifail, &info); + work, &lwork, iwork, ifail, &info); } static inline void heevx(const char jobz, const char range, const char uplo, const int n, std::complex* a, const int lda, const float vl, const float vu, const int il, const int iu, const float abstol, - const int m, float* w, std::complex* z, const int ldz, + int m, float* w, std::complex* z, const int ldz, std::complex* work, const int lwork, float* rwork, int* iwork, int* ifail, int info) { cheevx_(&jobz, &range, &uplo, &n, @@ -335,7 +370,7 @@ static inline void heevx(const char jobz, const char range, const char uplo, const int n, std::complex* a, const int lda, const double vl, const double vu, const int il, const int iu, const double abstol, - const int m, double* w, std::complex* z, const int ldz, + int m, double* w, std::complex* z, const int ldz, std::complex* work, const int lwork, double* rwork, int* iwork, int* ifail, int info) { zheevx_(&jobz, &range, &uplo, &n, diff --git a/source/source_base/module_external/blas_connector.h b/source/source_base/module_external/blas_connector.h index ea6834ce2d..920731b3f6 100644 --- a/source/source_base/module_external/blas_connector.h +++ b/source/source_base/module_external/blas_connector.h @@ -8,138 +8,192 @@ // These still need to be linked in the header file // Because quite a lot of code will directly use the original cblas kernels. +// The following declarations cover only a subset of BLAS routines. +// If you need a BLAS function that is not included here, feel free to add its declaration as needed. extern "C" { - // level 1: std::vector-std::vector operations, O(n) data and O(n) work. - - // Peize Lin add ?scal 2016-08-04, to compute x=a*x - void sscal_(const int *N, const float *alpha, float *X, const int *incX); - void dscal_(const int *N, const double *alpha, double *X, const int *incX); - void cscal_(const int *N, const std::complex *alpha, std::complex *X, const int *incX); - void zscal_(const int *N, const std::complex *alpha, std::complex *X, const int *incX); - - // Peize Lin add ?axpy 2016-08-04, to compute y=a*x+y - void saxpy_(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY); - void daxpy_(const int *N, const double *alpha, const double *X, const int *incX, double *Y, const int *incY); - void caxpy_(const int *N, const std::complex *alpha, const std::complex *X, const int *incX, std::complex *Y, const int *incY); - void zaxpy_(const int *N, const std::complex *alpha, const std::complex *X, const int *incX, std::complex *Y, const int *incY); - - void scopy_(long const *n, const float *a, int const *incx, float *b, int const *incy); - void dcopy_(long const *n, const double *a, int const *incx, double *b, int const *incy); - void ccopy_(long const *n, const std::complex *a, int const *incx, std::complex *b, int const *incy); - void zcopy_(long const *n, const std::complex *a, int const *incx, std::complex *b, int const *incy); - - //reason for passing results as argument instead of returning it: - //see https://www.numbercrunch.de/blog/2014/07/lost-in-translation/ - // void zdotc_(std::complex *result, const int *n, const std::complex *zx, - // const int *incx, const std::complex *zy, const int *incy); - // Peize Lin add ?dot 2017-10-27, to compute d=x*y - float sdot_(const int *N, const float *X, const int *incX, const float *Y, const int *incY); - double ddot_(const int *N, const double *X, const int *incX, const double *Y, const int *incY); - - // Peize Lin add ?nrm2 2018-06-12, to compute out = ||x||_2 = \sqrt{ \sum_i x_i**2 } - float snrm2_( const int *n, const float *X, const int *incX ); - double dnrm2_( const int *n, const double *X, const int *incX ); - double dznrm2_( const int *n, const std::complex *X, const int *incX ); - - // symmetric rank-k update - void dsyrk_( - const char* uplo, - const char* trans, - const int* n, - const int* k, - const double* alpha, - const double* a, - const int* lda, - const double* beta, - double* c, - const int* ldc - ); - - // level 2: matrix-std::vector operations, O(n^2) data and O(n^2) work. - void sgemv_(const char*const transa, const int*const m, const int*const n, - const float*const alpha, const float*const a, const int*const lda, const float*const x, const int*const incx, - const float*const beta, float*const y, const int*const incy); - void dgemv_(const char*const transa, const int*const m, const int*const n, - const double*const alpha, const double*const a, const int*const lda, const double*const x, const int*const incx, - const double*const beta, double*const y, const int*const incy); - - void cgemv_(const char *trans, const int *m, const int *n, const std::complex *alpha, - const std::complex *a, const int *lda, const std::complex *x, const int *incx, - const std::complex *beta, std::complex *y, const int *incy); - - void zgemv_(const char *trans, const int *m, const int *n, const std::complex *alpha, - const std::complex *a, const int *lda, const std::complex *x, const int *incx, - const std::complex *beta, std::complex *y, const int *incy); - - void dsymv_(const char *uplo, const int *n, - const double *alpha, const double *a, const int *lda, - const double *x, const int *incx, - const double *beta, double *y, const int *incy); - - // A := alpha x * y.T + A - void dger_(const int* m, - const int* n, - const double* alpha, - const double* x, - const int* incx, - const double* y, - const int* incy, - double* a, - const int* lda); - void zgerc_(const int* m, - const int* n, - const std::complex* alpha, - const std::complex* x, - const int* incx, - const std::complex* y, - const int* incy, - std::complex* a, - const int* lda); - - // level 3: matrix-matrix operations, O(n^2) data and O(n^3) work. - - // Peize Lin add ?gemm 2017-10-27, to compute C = a * A.? * B.? + b * C - // A is general - void sgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, - const float *alpha, const float *a, const int *lda, const float *b, const int *ldb, - const float *beta, float *c, const int *ldc); - void dgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, - const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, - const double *beta, double *c, const int *ldc); - void cgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, const std::complex *b, const int *ldb, - const std::complex *beta, std::complex *c, const int *ldc); - void zgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, const std::complex *b, const int *ldb, - const std::complex *beta, std::complex *c, const int *ldc); - - // A is symmetric. C = a * A.? * B.? + b * C - void ssymm_(const char *side, const char *uplo, const int *m, const int *n, - const float *alpha, const float *a, const int *lda, const float *b, const int *ldb, - const float *beta, float *c, const int *ldc); - void dsymm_(const char *side, const char *uplo, const int *m, const int *n, - const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, - const double *beta, double *c, const int *ldc); - void csymm_(const char *side, const char *uplo, const int *m, const int *n, - const std::complex *alpha, const std::complex *a, const int *lda, const std::complex *b, const int *ldb, - const std::complex *beta, std::complex *c, const int *ldc); - void zsymm_(const char *side, const char *uplo, const int *m, const int *n, - const std::complex *alpha, const std::complex *a, const int *lda, const std::complex *b, const int *ldb, - const std::complex *beta, std::complex *c, const int *ldc); - - // A is hermitian. C = a * A.? * B.? + b * C - void chemm_(char *side, char *uplo, int *m, int *n,std::complex *alpha, - std::complex *a, int *lda, std::complex *b, int *ldb, std::complex *beta, std::complex *c, int *ldc); - void zhemm_(char *side, char *uplo, int *m, int *n,std::complex *alpha, - std::complex *a, int *lda, std::complex *b, int *ldb, std::complex *beta, std::complex *c, int *ldc); - - //solving triangular matrix with multiple right hand sides - void dtrsm_(char *side, char* uplo, char *transa, char *diag, int *m, int *n, - double* alpha, double* a, int *lda, double*b, int *ldb); - void ztrsm_(char *side, char* uplo, char *transa, char *diag, int *m, int *n, - std::complex* alpha, std::complex* a, int *lda, std::complex*b, int *ldb); - +// Level 1 BLAS +void sscal_(const int *N, const float *alpha, float *X, const int *incX); +void dscal_(const int *N, const double *alpha, double *X, const int *incX); +void cscal_(const int *N, const std::complex *alpha, std::complex *X, const int *incX); +void zscal_(const int *N, const std::complex *alpha, std::complex *X, const int *incX); + +void saxpy_(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY); +void daxpy_(const int *N, const double *alpha, const double *X, const int *incX, double *Y, const int *incY); +void caxpy_(const int *N, const std::complex *alpha, const std::complex *X, const int *incX, std::complex *Y, const int *incY); +void zaxpy_(const int *N, const std::complex *alpha, const std::complex *X, const int *incX, std::complex *Y, const int *incY); + +void scopy_(const int *n, const float *a, const int *incx, float *b, const int *incy); +void dcopy_(const int *n, const double *a, const int *incx, double *b, const int *incy); +void ccopy_(const int *n, const std::complex *a, const int *incx, std::complex *b, const int *incy); +void zcopy_(const int *n, const std::complex *a, const int *incx, std::complex *b, const int *incy); + +// Note: sdot_/ddot_ return by value +float sdot_(const int *N, const float *X, const int *incX, const float *Y, const int *incY); +double ddot_(const int *N, const double *X, const int *incX, const double *Y, const int *incY); + +float snrm2_(const int *n, const float *X, const int *incX); +double dnrm2_(const int *n, const double *X, const int *incX); +float scnrm2_(const int *n, const std::complex *X, const int *incX); +double dznrm2_(const int *n, const std::complex *X, const int *incX); + +// Level 2 BLAS + +void sgemv_(const char *transa, const int *m, const int *n, + const float *alpha, const float *a, const int *lda, + const float *x, const int *incx, + const float *beta, float *y, const int *incy); + +void dgemv_(const char *transa, const int *m, const int *n, + const double *alpha, const double *a, const int *lda, + const double *x, const int *incx, + const double *beta, double *y, const int *incy); + +void cgemv_(const char *trans, const int *m, const int *n, + const std::complex *alpha, + const std::complex *a, const int *lda, + const std::complex *x, const int *incx, + const std::complex *beta, + std::complex *y, const int *incy); + +void zgemv_(const char *trans, const int *m, const int *n, + const std::complex *alpha, + const std::complex *a, const int *lda, + const std::complex *x, const int *incx, + const std::complex *beta, + std::complex *y, const int *incy); + +void dsymv_(const char *uplo, const int *n, + const double *alpha, const double *a, const int *lda, + const double *x, const int *incx, + const double *beta, double *y, const int *incy); + +void dger_(const int *m, const int *n, + const double *alpha, + const double *x, const int *incx, + const double *y, const int *incy, + double *a, const int *lda); + +void zgerc_(const int *m, const int *n, + const std::complex *alpha, + const std::complex *x, const int *incx, + const std::complex *y, const int *incy, + std::complex *a, const int *lda); + +// Level 3 BLAS + +void sgemm_(const char *transa, const char *transb, + const int *m, const int *n, const int *k, + const float *alpha, + const float *a, const int *lda, + const float *b, const int *ldb, + const float *beta, + float *c, const int *ldc); + +void dgemm_(const char *transa, const char *transb, + const int *m, const int *n, const int *k, + const double *alpha, + const double *a, const int *lda, + const double *b, const int *ldb, + const double *beta, + double *c, const int *ldc); + +void cgemm_(const char *transa, const char *transb, + const int *m, const int *n, const int *k, + const std::complex *alpha, + const std::complex *a, const int *lda, + const std::complex *b, const int *ldb, + const std::complex *beta, + std::complex *c, const int *ldc); + +void zgemm_(const char *transa, const char *transb, + const int *m, const int *n, const int *k, + const std::complex *alpha, + const std::complex *a, const int *lda, + const std::complex *b, const int *ldb, + const std::complex *beta, + std::complex *c, const int *ldc); + +void ssymm_(const char *side, const char *uplo, + const int *m, const int *n, + const float *alpha, + const float *a, const int *lda, + const float *b, const int *ldb, + const float *beta, + float *c, const int *ldc); + +void dsymm_(const char *side, const char *uplo, + const int *m, const int *n, + const double *alpha, + const double *a, const int *lda, + const double *b, const int *ldb, + const double *beta, + double *c, const int *ldc); + +void csymm_(const char *side, const char *uplo, + const int *m, const int *n, + const std::complex *alpha, + const std::complex *a, const int *lda, + const std::complex *b, const int *ldb, + const std::complex *beta, + std::complex *c, const int *ldc); + +void zsymm_(const char *side, const char *uplo, + const int *m, const int *n, + const std::complex *alpha, + const std::complex *a, const int *lda, + const std::complex *b, const int *ldb, + const std::complex *beta, + std::complex *c, const int *ldc); + +void chemm_(const char *side, const char *uplo, + const int *m, const int *n, + const std::complex *alpha, + const std::complex *a, const int *lda, + const std::complex *b, const int *ldb, + const std::complex *beta, + std::complex *c, const int *ldc); + +void zhemm_(const char *side, const char *uplo, + const int *m, const int *n, + const std::complex *alpha, + const std::complex *a, const int *lda, + const std::complex *b, const int *ldb, + const std::complex *beta, + std::complex *c, const int *ldc); + +void dtrsm_(const char *side, const char *uplo, const char *transa, const char *diag, + const int *m, const int *n, + const double *alpha, + const double *a, const int *lda, + double *b, const int *ldb); + +void ztrsm_(const char *side, const char *uplo, const char *transa, const char *diag, + const int *m, const int *n, + const std::complex *alpha, + const std::complex *a, const int *lda, + std::complex *b, const int *ldb); + +// === Hermitian rank-k update === +void cherk_(const char* uplo, const char* trans, const int* n, const int* k, + const float* alpha, + const std::complex* a, const int* lda, + const float* beta, + std::complex* c, const int* ldc); + +void zherk_(const char* uplo, const char* trans, const int* n, const int* k, + const double* alpha, + const std::complex* a, const int* lda, + const double* beta, + std::complex* c, const int* ldc); + +// === Symmetric rank-k update === +void dsyrk_(const char* uplo, const char* trans, const int* n, const int* k, + const double* alpha, + const double* a, const int* lda, + const double* beta, + double* c, + const int* ldc); } // Class BlasConnector provide the connector to fortran lapack routine. @@ -340,16 +394,16 @@ class BlasConnector // copies a into b static - void copy(const long n, const double *a, const int incx, double *b, const int incy, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + void copy(const int n, const double *a, const int incx, double *b, const int incy, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); static - void copy(const long n, const float *a, const int incx, float *b, const int incy, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + void copy(const int n, const float *a, const int incx, float *b, const int incy, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); static - void copy(const long n, const std::complex *a, const int incx, std::complex *b, const int incy, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + void copy(const int n, const std::complex *a, const int incx, std::complex *b, const int incy, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); static - void copy(const long n, const std::complex *a, const int incx, std::complex *b, const int incy, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); + void copy(const int n, const std::complex *a, const int incx, std::complex *b, const int incy, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice); // There is some other operators needed, so implemented manually here template diff --git a/source/source_base/module_external/blas_connector_vector.cpp b/source/source_base/module_external/blas_connector_vector.cpp index 8f8091ba7d..64116d5f3c 100644 --- a/source/source_base/module_external/blas_connector_vector.cpp +++ b/source/source_base/module_external/blas_connector_vector.cpp @@ -322,7 +322,7 @@ double BlasConnector::nrm2( const int n, const std::complex *X, const in } // copies a into b -void BlasConnector::copy(const long n, const float *a, const int incx, float *b, const int incy, base_device::AbacusDevice_t device_type) +void BlasConnector::copy(const int n, const float *a, const int incx, float *b, const int incy, base_device::AbacusDevice_t device_type) { if (device_type == base_device::AbacusDevice_t::CpuDevice) { scopy_(&n, a, &incx, b, &incy); @@ -332,7 +332,7 @@ void BlasConnector::copy(const long n, const float *a, const int incx, float *b, } } -void BlasConnector::copy(const long n, const double *a, const int incx, double *b, const int incy, base_device::AbacusDevice_t device_type) +void BlasConnector::copy(const int n, const double *a, const int incx, double *b, const int incy, base_device::AbacusDevice_t device_type) { if (device_type == base_device::AbacusDevice_t::CpuDevice) { dcopy_(&n, a, &incx, b, &incy); @@ -342,7 +342,7 @@ void BlasConnector::copy(const long n, const double *a, const int incx, double * } } -void BlasConnector::copy(const long n, const std::complex *a, const int incx, std::complex *b, const int incy, base_device::AbacusDevice_t device_type) +void BlasConnector::copy(const int n, const std::complex *a, const int incx, std::complex *b, const int incy, base_device::AbacusDevice_t device_type) { if (device_type == base_device::AbacusDevice_t::CpuDevice) { ccopy_(&n, a, &incx, b, &incy); @@ -352,7 +352,7 @@ void BlasConnector::copy(const long n, const std::complex *a, const int i } } -void BlasConnector::copy(const long n, const std::complex *a, const int incx, std::complex *b, const int incy, base_device::AbacusDevice_t device_type) +void BlasConnector::copy(const int n, const std::complex *a, const int incx, std::complex *b, const int incy, base_device::AbacusDevice_t device_type) { if (device_type == base_device::AbacusDevice_t::CpuDevice) { zcopy_(&n, a, &incx, b, &incy); diff --git a/source/source_base/module_external/lapack_connector.h b/source/source_base/module_external/lapack_connector.h index ebc558e409..abd29c107c 100644 --- a/source/source_base/module_external/lapack_connector.h +++ b/source/source_base/module_external/lapack_connector.h @@ -42,173 +42,253 @@ //"xxx" specifies the type of problem, for example: // - gv stands for generalized eigenvalue +// The following declarations cover only a subset of LAPACK routines. +// If you need a LAPACK function that is not included here, feel free to add its declaration as needed. extern "C" { -// solve the generalized eigenproblem Ax=eBx, where A is Hermitian and complex couble - // zhegv_ & zhegvd_ returns all eigenvalues while zhegvx_ returns selected ones - void dsygvd_(const int* itype, const char* jobz, const char* uplo, const int* n, - double* a, const int* lda, - const double* b, const int* ldb, double* w, - double* work, int* lwork, - int* iwork, int* liwork, int* info); - - void chegvd_(const int* itype, const char* jobz, const char* uplo, const int* n, +// === Generalized Hermitian-definite eigenproblems === +void dsygvd_(const int* itype, const char* jobz, const char* uplo, const int* n, + double* a, const int* lda, + double* b, const int* ldb, + double* w, + double* work, const int* lwork, + int* iwork, const int* liwork, + int* info); + +void chegvd_(const int* itype, const char* jobz, const char* uplo, const int* n, std::complex* a, const int* lda, - const std::complex* b, const int* ldb, float* w, - std::complex* work, int* lwork, float* rwork, int* lrwork, - int* iwork, int* liwork, int* info); - - void zhegvd_(const int* itype, const char* jobz, const char* uplo, const int* n, - std::complex* a, const int* lda, - const std::complex* b, const int* ldb, double* w, - std::complex* work, int* lwork, double* rwork, int* lrwork, - int* iwork, int* liwork, int* info); - - void dsyevx_(const char* jobz, const char* range, const char* uplo, const int* n, - double* a, const int* lda, - const double* vl, const double* vu, const int* il, const int* iu, const double* abstol, - const int* m, double* w, double* z, const int* ldz, - double* work, const int* lwork, double* rwork, int* iwork, int* ifail, int* info); - - void cheevx_(const char* jobz, const char* range, const char* uplo, const int* n, - std::complex *a, const int* lda, - const float* vl, const float* vu, const int* il, const int* iu, const float* abstol, - const int* m, float* w, std::complex *z, const int *ldz, - std::complex *work, const int* lwork, float* rwork, int* iwork, int* ifail, int* info); - - void zheevx_(const char* jobz, const char* range, const char* uplo, const int* n, - std::complex *a, const int* lda, - const double* vl, const double* vu, const int* il, const int* iu, const double* abstol, - const int* m, double* w, std::complex *z, const int *ldz, - std::complex *work, const int* lwork, double* rwork, int* iwork, int* ifail, int* info); - - - void dsygvx_(const int* itype, const char* jobz, const char* range, const char* uplo, - const int* n, double* A, const int* lda, double* B, const int* ldb, - const double* vl, const double* vu, const int* il, const int* iu, - const double* abstol, const int* m, double* w, double* Z, const int* ldz, - double* work, const int* lwork, int* iwork, int* ifail, int* info); - - void chegvx_(const int* itype,const char* jobz,const char* range,const char* uplo, - const int* n,std::complex *a,const int* lda,std::complex *b, - const int* ldb,const float* vl,const float* vu,const int* il, - const int* iu,const float* abstol,const int* m,float* w, - std::complex *z,const int *ldz,std::complex *work,const int* lwork, - float* rwork,int* iwork,int* ifail,int* info); - - void zhegvx_(const int* itype,const char* jobz,const char* range,const char* uplo, - const int* n,std::complex *a,const int* lda,std::complex *b, - const int* ldb,const double* vl,const double* vu,const int* il, - const int* iu,const double* abstol,const int* m,double* w, - std::complex *z,const int *ldz,std::complex *work,const int* lwork, - double* rwork,int* iwork,int* ifail,int* info); - - void zhegv_(const int* itype,const char* jobz,const char* uplo,const int* n, - std::complex* a,const int* lda,std::complex* b,const int* ldb, - double* w,std::complex* work,int* lwork,double* rwork,int* info); - void chegv_(const int* itype,const char* jobz,const char* uplo,const int* n, - std::complex* a,const int* lda,std::complex* b,const int* ldb, - float* w,std::complex* work,int* lwork,float* rwork,int* info); - void dsygv_(const int* itype, const char* jobz,const char* uplo, const int* n, - double* a,const int* lda,double* b,const int* ldb, - double* w,double* work,int* lwork,int* info); - - // solve the eigenproblem Ax=ex, where A is Hermitian and complex couble - // zheev_ returns all eigenvalues while zheevx_ returns selected ones - void zheev_(const char* jobz,const char* uplo,const int* n,std::complex *a, - const int* lda,double* w,std::complex* work,const int* lwork, - double* rwork,int* info); - void cheev_(const char* jobz,const char* uplo,const int* n,std::complex *a, - const int* lda,float* w,std::complex* work,const int* lwork, - float* rwork,int* info); - void dsyev_(const char* jobz,const char* uplo,const int* n,double *a, - const int* lda,double* w,double* work,const int* lwork, int* info); - - // solve the eigenproblem Ax=ex, where A is a general matrix - void dgeev_(const char* jobvl, const char* jobvr, const int* n, double* a, const int* lda, - double* wr, double* wi, double* vl, const int* ldvl, double* vr, const int* ldvr, - double* work, const int* lwork, int* info); - void zgeev_(const char* jobvl, const char* jobvr, const int* n, std::complex* a, const int* lda, - std::complex* w, std::complex* vl, const int* ldvl, std::complex* vr, const int* ldvr, - std::complex* work, const int* lwork, double* rwork, int* info); - // liuyu add 2023-10-03 - // dgetri and dgetrf computes the inverse of a n*n real matrix - void dgetri_(const int* n, double* a, const int* lda, const int* ipiv, double* work, const int* lwork, int* info); - void dgetrf_(const int* m, const int* n, double* a, const int* lda, int* ipiv, int* info); - - // dsytrf_ computes the Bunch-Kaufman factorization of a double precision - // symmetric matrix, while dsytri takes its output to perform martrix inversion - void dsytrf_(const char* uplo, const int* n, double * a, const int* lda, - int *ipiv,double *work, const int* lwork ,int *info); - void dsytri_(const char* uplo,const int* n,double *a, const int *lda, - int *ipiv, double * work,int *info); - // Peize Lin add dsptrf and dsptri 2016-06-21, to compute inverse real symmetry indefinit matrix. - // dpotrf computes the Cholesky factorization of a real symmetric positive definite matrix - // while dpotri taks its output to perform matrix inversion - void spotrf_(const char*const uplo, const int*const n, float*const A, const int*const lda, int*const info); - void dpotrf_(const char*const uplo, const int*const n, double*const A, const int*const lda, int*const info); - void cpotrf_(const char*const uplo, const int*const n, std::complex*const A, const int*const lda, int*const info); - void zpotrf_(const char*const uplo, const int*const n, std::complex*const A, const int*const lda, int*const info); - void spotri_(const char*const uplo, const int*const n, float*const A, const int*const lda, int*const info); - void dpotri_(const char*const uplo, const int*const n, double*const A, const int*const lda, int*const info); - void cpotri_(const char*const uplo, const int*const n, std::complex*const A, const int*const lda, int*const info); - void zpotri_(const char*const uplo, const int*const n, std::complex*const A, const int*const lda, int*const info); - - // zgetrf computes the LU factorization of a general matrix - // while zgetri takes its output to perform matrix inversion - void zgetrf_(const int* m, const int *n, std::complex *A, const int *lda, int *ipiv, int* info); - void zgetri_(const int* n, std::complex* A, const int* lda, const int* ipiv, std::complex* work, const int* lwork, int* info); - - // if trans=='N': C = alpha * A * A.H + beta * C - // if trans=='C': C = alpha * A.H * A + beta * C - void zherk_(const char *uplo, const char *trans, const int *n, const int *k, - const double *alpha, const std::complex *A, const int *lda, - const double *beta, std::complex *C, const int *ldc); - void cherk_(const char* uplo, const char* trans, const int* n, const int* k, - const float* alpha, const std::complex* A, const int* lda, - const float* beta, std::complex* C, const int* ldc); - - // computes all eigenvalues of a symmetric tridiagonal matrix - // using the Pal-Walker-Kahan variant of the QL or QR algorithm. - void dsterf_(int *n, double *d, double *e, int *info); - // computes the eigenvectors of a real symmetric tridiagonal - // matrix T corresponding to specified eigenvalues - void dstein_(int *n, double* d, double *e, int *m, double *w, - int* block, int* isplit, double* z, int *lda, double *work, - int* iwork, int* ifail, int *info); - // computes the eigenvectors of a complex symmetric tridiagonal - // matrix T corresponding to specified eigenvalues - void zstein_(int *n, double* d, double *e, int *m, double *w, - int* block, int* isplit, std::complex* z, int *lda, double *work, - int* iwork, int* ifail, int *info); - - // computes the Cholesky factorization of a symmetric - // positive definite matrix A. - void dpotf2_(char *uplo, int *n, double *a, int *lda, int *info); - void zpotf2_(char *uplo,int *n,std::complex *a, int *lda, int *info); - - // reduces a symmetric definite generalized eigenproblem to standard form - // using the factorization results obtained from spotrf - void dsygs2_(int *itype, char *uplo, int *n, double *a, int *lda, double *b, int *ldb, int *info); - void zhegs2_(int *itype, char *uplo, int *n, std::complex *a, int *lda, std::complex *b, int *ldb, int *info); - - // copies a into b - void dlacpy_(char *uplo, int *m, int *n, double* a, int *lda, double *b, int *ldb); - void zlacpy_(char *uplo, int *m, int *n, std::complex* a, int *lda, std::complex *b, int *ldb); - - // generates a real elementary reflector H of order n, such that - // H * ( alpha ) = ( beta ), H is unitary. - // ( x ) ( 0 ) - void dlarfg_(int *n, double *alpha, double *x, int *incx, double *tau); - void zlarfg_(int *n, std::complex *alpha, std::complex *x, int *incx, std::complex *tau); - - // solve a tridiagonal linear system - void dgtsv_(int* N, int* NRHS, double* DL, double* D, double* DU, double* B, int* LDB, int* INFO); - - // solve Ax = b - void dsysv_(const char* uplo, const int* n, const int* m, double * a, const int* lda, - int *ipiv, double * b, const int* ldb, double *work, const int* lwork ,int *info); -} + std::complex* b, const int* ldb, + float* w, + std::complex* work, const int* lwork, + float* rwork, const int* lrwork, + int* iwork, const int* liwork, + int* info); + +void zhegvd_(const int* itype, const char* jobz, const char* uplo, const int* n, + std::complex* a, const int* lda, + std::complex* b, const int* ldb, + double* w, + std::complex* work, const int* lwork, + double* rwork, const int* lrwork, + int* iwork, const int* liwork, + int* info); + +// === Selected eigenvalues/vectors: standard Hermitian === + +void dsyevx_(const char* jobz, const char* range, const char* uplo, const int* n, + double* a, const int* lda, + const double* vl, const double* vu, + const int* il, const int* iu, + const double* abstol, + int* m, double* w, double* z, const int* ldz, + double* work, const int* lwork, + int* iwork, int* ifail, + int* info); + +void cheevx_(const char* jobz, const char* range, const char* uplo, const int* n, + std::complex* a, const int* lda, + const float* vl, const float* vu, + const int* il, const int* iu, + const float* abstol, + int* m, float* w, std::complex* z, const int* ldz, + std::complex* work, const int* lwork, + float* rwork, int* iwork, int* ifail, + int* info); + +void zheevx_(const char* jobz, const char* range, const char* uplo, const int* n, + std::complex* a, const int* lda, + const double* vl, const double* vu, + const int* il, const int* iu, + const double* abstol, + int* m, double* w, std::complex* z, const int* ldz, + std::complex* work, const int* lwork, + double* rwork, int* iwork, int* ifail, + int* info); + +// === Selected eigenvalues/vectors: generalized Hermitian === + +void dsygvx_(const int* itype, const char* jobz, const char* range, const char* uplo, + const int* n, + double* a, const int* lda, + double* b, const int* ldb, + const double* vl, const double* vu, + const int* il, const int* iu, + const double* abstol, + int* m, double* w, double* z, const int* ldz, + double* work, const int* lwork, + int* iwork, int* ifail, + int* info); + +void chegvx_(const int* itype, const char* jobz, const char* range, const char* uplo, + const int* n, + std::complex* a, const int* lda, + std::complex* b, const int* ldb, + const float* vl, const float* vu, + const int* il, const int* iu, + const float* abstol, + int* m, float* w, std::complex* z, const int* ldz, + std::complex* work, const int* lwork, + float* rwork, int* iwork, int* ifail, + int* info); + +void zhegvx_(const int* itype, const char* jobz, const char* range, const char* uplo, + const int* n, + std::complex* a, const int* lda, + std::complex* b, const int* ldb, + const double* vl, const double* vu, + const int* il, const int* iu, + const double* abstol, + int* m, double* w, std::complex* z, const int* ldz, + std::complex* work, const int* lwork, + double* rwork, int* iwork, int* ifail, + int* info); + +// === Generalized Hermitian: all eigenvalues (simple driver) === + +void dsygv_(const int* itype, const char* jobz, const char* uplo, const int* n, + double* a, const int* lda, + double* b, const int* ldb, + double* w, + double* work, const int* lwork, + int* info); + +void chegv_(const int* itype, const char* jobz, const char* uplo, const int* n, + std::complex* a, const int* lda, + std::complex* b, const int* ldb, + float* w, + std::complex* work, const int* lwork, + float* rwork, + int* info); + +void zhegv_(const int* itype, const char* jobz, const char* uplo, const int* n, + std::complex* a, const int* lda, + std::complex* b, const int* ldb, + double* w, + std::complex* work, const int* lwork, + double* rwork, + int* info); + +// === Standard Hermitian eigenproblem === + +void dsyev_(const char* jobz, const char* uplo, const int* n, + double* a, const int* lda, + double* w, + double* work, const int* lwork, + int* info); + +void cheev_(const char* jobz, const char* uplo, const int* n, + std::complex* a, const int* lda, + float* w, + std::complex* work, const int* lwork, + float* rwork, + int* info); + +void zheev_(const char* jobz, const char* uplo, const int* n, + std::complex* a, const int* lda, + double* w, + std::complex* work, const int* lwork, + double* rwork, + int* info); + +// === General (non-Hermitian) eigenproblem === + +void dgeev_(const char* jobvl, const char* jobvr, const int* n, + double* a, const int* lda, + double* wr, double* wi, + double* vl, const int* ldvl, + double* vr, const int* ldvr, + double* work, const int* lwork, + int* info); + +void zgeev_(const char* jobvl, const char* jobvr, const int* n, + std::complex* a, const int* lda, + std::complex* w, + std::complex* vl, const int* ldvl, + std::complex* vr, const int* ldvr, + std::complex* work, const int* lwork, + double* rwork, + int* info); + +// === Matrix inversion (LU) === + +void dgetrf_(const int* m, const int* n, double* a, const int* lda, + int* ipiv, int* info); + +void dgetri_(const int* n, double* a, const int* lda, + const int* ipiv, + double* work, const int* lwork, + int* info); + +// === Symmetric indefinite inversion (Bunch-Kaufman) === + +void dsytrf_(const char* uplo, const int* n, double* a, const int* lda, + int* ipiv, double* work, const int* lwork, int* info); + +void dsytri_(const char* uplo, const int* n, double* a, const int* lda, + const int* ipiv, double* work, int* info); + +// === Cholesky factorization & inversion === + +void spotrf_(const char* uplo, const int* n, float* a, const int* lda, int* info); +void dpotrf_(const char* uplo, const int* n, double* a, const int* lda, int* info); +void cpotrf_(const char* uplo, const int* n, std::complex* a, const int* lda, int* info); +void zpotrf_(const char* uplo, const int* n, std::complex* a, const int* lda, int* info); + +void spotri_(const char* uplo, const int* n, float* a, const int* lda, int* info); +void dpotri_(const char* uplo, const int* n, double* a, const int* lda, int* info); +void cpotri_(const char* uplo, const int* n, std::complex* a, const int* lda, int* info); +void zpotri_(const char* uplo, const int* n, std::complex* a, const int* lda, int* info); + +// === Complex LU inversion === + +void zgetrf_(const int* m, const int* n, std::complex* a, const int* lda, + int* ipiv, int* info); + +void zgetri_(const int* n, std::complex* a, const int* lda, + const int* ipiv, + std::complex* work, const int* lwork, + int* info); + + +// === Tridiagonal eigen solvers === + +void dsterf_(const int* n, double* d, double* e, int* info); + +void dstein_(const int* n, const double* d, const double* e, + const int* m, const double* w, + const int* iblock, const int* isplit, + double* z, const int* ldz, + double* work, int* iwork, int* ifail, + int* info); + +void zstein_(const int* n, const double* d, const double* e, + const int* m, const double* w, + const int* iblock, const int* isplit, + std::complex* z, const int* ldz, + double* work, int* iwork, int* ifail, + int* info); + +// === Unblocked Cholesky (level 2 BLAS) === + +void dpotf2_(const char* uplo, const int* n, double* a, const int* lda, int* info); +void zpotf2_(const char* uplo, const int* n, std::complex* a, const int* lda, int* info); + +// === Tridiagonal solver === + +void dgtsv_(const int* n, const int* nrhs, + double* dl, double* d, double* du, + double* b, const int* ldb, + int* info); + +// === Symmetric indefinite linear solver === + +void dsysv_(const char* uplo, const int* n, const int* nrhs, + double* a, const int* lda, + int* ipiv, + double* b, const int* ldb, + double* work, const int* lwork, + int* info); +} // extern "C" #ifdef GATHER_INFO #define zhegvx_ zhegvx_i @@ -226,7 +306,7 @@ void zhegvx_i(const int* itype, const int* il, const int* iu, const double* abstol, - const int* m, + int* m, double* w, std::complex* z, const int* ldz, diff --git a/source/source_base/test/blas_connector_test.cpp b/source/source_base/test/blas_connector_test.cpp index 87a2adbcef..21de7ef2e2 100644 --- a/source/source_base/test/blas_connector_test.cpp +++ b/source/source_base/test/blas_connector_test.cpp @@ -226,7 +226,7 @@ TEST(blas_connector, AxpyGpu) { TEST(blas_connector, dcopy_) { typedef double T; - long const size = 8; + int const size = 8; int const incx = 1; int const incy = 1; std::array x_const, result, answer; @@ -241,7 +241,7 @@ TEST(blas_connector, dcopy_) { TEST(blas_connector, zcopy_) { typedef std::complex T; - long const size = 8; + int const size = 8; int const incx = 1; int const incy = 1; std::array x_const, result, answer; @@ -259,7 +259,7 @@ TEST(blas_connector, zcopy_) { } TEST(blas_connector, copy) { - long const size = 8; + int const size = 8; int const incx = 1; int const incy = 1; std::complex result[8], answer[8]; diff --git a/source/source_estate/module_dm/cal_edm_tddft.cpp b/source/source_estate/module_dm/cal_edm_tddft.cpp index dd934d8a00..9a90ee4cf7 100644 --- a/source/source_estate/module_dm/cal_edm_tddft.cpp +++ b/source/source_estate/module_dm/cal_edm_tddft.cpp @@ -29,7 +29,7 @@ void cal_edm_tddft(Parallel_Orbitals& pv, // mohan add 2024-03-27 //! be careful, the type of nloc is 'long' //! whether the long type is safe, needs more discussion - const long nloc = pv.nloc; + const int nloc = pv.nloc; const int ncol = pv.ncol; const int nrow = pv.nrow; diff --git a/source/source_hsolver/diago_iter_assist.cpp b/source/source_hsolver/diago_iter_assist.cpp index dc7ce63b43..fb87ad2350 100644 --- a/source/source_hsolver/diago_iter_assist.cpp +++ b/source/source_hsolver/diago_iter_assist.cpp @@ -422,7 +422,7 @@ template void DiagoIterAssist::diag_hegvd(const int nstart, const int nbands, const T *hcc, - const T *scc, + T *scc, const int ldh, // nstart Real *e, // always in CPU T *vcc) @@ -540,7 +540,7 @@ void DiagoIterAssist::cal_hs_subspace(const hamilt::Hamilt template void DiagoIterAssist::diag_responce( const T* hcc, - const T* scc, + T* scc, const int nbands, const T* mat_in, // [out] target matrix to be multiplied T* mat_out, @@ -583,7 +583,7 @@ void DiagoIterAssist::diag_responce( const T* hcc, template void DiagoIterAssist::diag_subspace_psi(const T* hcc, - const T* scc, + T* scc, const int dim_subspace, psi::Psi& evc, Real* en diff --git a/source/source_hsolver/diago_iter_assist.h b/source/source_hsolver/diago_iter_assist.h index 7978321147..2867b50b5c 100644 --- a/source/source_hsolver/diago_iter_assist.h +++ b/source/source_hsolver/diago_iter_assist.h @@ -80,7 +80,7 @@ class DiagoIterAssist static void diag_hegvd(const int nstart, const int nbands, const T *hcc, - const T *sc, + T *sc, const int ldh, // nstart Real *e, T *vcc); @@ -104,7 +104,7 @@ class DiagoIterAssist /// @param mat_col : number of columns of target matrix /// @param en : eigenvalues static void diag_responce(const T* hcc, - const T* scc, + T* scc, const int nbands, const T* mat_in, T* mat_out, @@ -113,7 +113,7 @@ class DiagoIterAssist /// @brief calculate the response wavefunction psi from rotation matrix solved by diagonalization of H and S matrix static void diag_subspace_psi(const T* hcc, - const T* scc, + T* scc, const int dim_subspace, psi::Psi& evc, Real* en); diff --git a/source/source_hsolver/kernels/cuda/hegvd_op.cu b/source/source_hsolver/kernels/cuda/hegvd_op.cu index f4d03a7522..1a90d981b9 100644 --- a/source/source_hsolver/kernels/cuda/hegvd_op.cu +++ b/source/source_hsolver/kernels/cuda/hegvd_op.cu @@ -212,7 +212,7 @@ struct hegvd_op const int nstart, const int ldh, const T* A, // hcc - const T* B, // scc + T* B, // scc Real* W, // eigenvalue T* V) { diff --git a/source/source_hsolver/kernels/hegvd_op.cpp b/source/source_hsolver/kernels/hegvd_op.cpp index aa9d56239b..e9ecb10388 100644 --- a/source/source_hsolver/kernels/hegvd_op.cpp +++ b/source/source_hsolver/kernels/hegvd_op.cpp @@ -17,7 +17,7 @@ struct hegvd_op const int nstart, const int ldh, const T* hcc, - const T* scc, + T* scc, Real* eigenvalue, T* vcc) { diff --git a/source/source_hsolver/kernels/hegvd_op.h b/source/source_hsolver/kernels/hegvd_op.h index 5381f97415..dfe1aaf287 100644 --- a/source/source_hsolver/kernels/hegvd_op.h +++ b/source/source_hsolver/kernels/hegvd_op.h @@ -64,7 +64,7 @@ struct hegvd_op /// Output Parameter /// @param W : calculated eigenvalues /// @param V : calculated eigenvectors (col major) - void operator()(const Device* d, const int nstart, const int ldh, const T* A, const T* B, Real* W, T* V); + void operator()(const Device* d, const int nstart, const int ldh, const T* A, T* B, Real* W, T* V); }; // template diff --git a/source/source_hsolver/kernels/rocm/hegvd_op.hip.cu b/source/source_hsolver/kernels/rocm/hegvd_op.hip.cu index 121180de85..93b3457af6 100644 --- a/source/source_hsolver/kernels/rocm/hegvd_op.hip.cu +++ b/source/source_hsolver/kernels/rocm/hegvd_op.hip.cu @@ -32,7 +32,7 @@ void hegvd_op::operator()(const base_device::DE const int nstart, const int ldh, const double* _hcc, - const double* _scc, + double* _scc, double* _eigenvalue, double* _vcc) {