From 2c9b933c81737d2eab77505fbaaafb5c49102e5e Mon Sep 17 00:00:00 2001 From: Critsium-xy Date: Tue, 29 Apr 2025 16:47:34 +0800 Subject: [PATCH 1/4] INITIAL COMMIT --- source/module_base/CMakeLists.txt | 5 +- source/module_base/blas_connector.h | 21 +- source/module_base/blas_connector_base.cpp | 79 +++ source/module_base/blas_connector_matrix.cpp | 575 +++++++++++++++++++ source/module_base/blas_connector_vector.cpp | 473 +++++++++++++++ 5 files changed, 1150 insertions(+), 3 deletions(-) create mode 100644 source/module_base/blas_connector_base.cpp create mode 100644 source/module_base/blas_connector_matrix.cpp create mode 100644 source/module_base/blas_connector_vector.cpp diff --git a/source/module_base/CMakeLists.txt b/source/module_base/CMakeLists.txt index e6b016b311..3fb5015188 100644 --- a/source/module_base/CMakeLists.txt +++ b/source/module_base/CMakeLists.txt @@ -10,7 +10,10 @@ add_library( base OBJECT assoc_laguerre.cpp - blas_connector.cpp + #blas_connector.cpp + blas_connector_base.cpp + blas_connector_vector.cpp + blas_connector_matrix.cpp clebsch_gordan_coeff.cpp complexarray.cpp complexmatrix.cpp diff --git a/source/module_base/blas_connector.h b/source/module_base/blas_connector.h index b6398bced3..7e988fd1bb 100644 --- a/source/module_base/blas_connector.h +++ b/source/module_base/blas_connector.h @@ -368,9 +368,26 @@ class BlasConnector #ifdef __CUDA +#include +#include "cublas_v2.h" + +// If you want to use cublas, you need these functions to create and destroy the cublas/hipblas handle. +// You also need to use these functions to translate the transpose parameter into cublas/hipblas datatype. + namespace BlasUtils{ - void createGpuBlasHandle(); - void destoryBLAShandle(); + + static cublasHandle_t cublas_handle = nullptr; + + void createGpuBlasHandle(); // Create a cublas/hipblas handle. + + void destoryBLAShandle(); // Destroy the cublas/hipblas handle. Do this when the software is about to end. + + cublasOperation_t judge_trans(bool is_complex, const char& trans, const char* name); // Translate a normal transpose parameter to a cublas/hipblas type. + + cublasSideMode_t judge_side(const char& trans); // Translate a normal side parameter to a cublas/hipblas type. + + cublasFillMode_t judge_fill(const char& trans); // Translate a normal fill parameter to a cublas/hipblas type. + } #endif diff --git a/source/module_base/blas_connector_base.cpp b/source/module_base/blas_connector_base.cpp new file mode 100644 index 0000000000..4b9080a561 --- /dev/null +++ b/source/module_base/blas_connector_base.cpp @@ -0,0 +1,79 @@ +#include "blas_connector.h" +#include "macros.h" + +#ifdef __CUDA +#include +#include +#include "cublas_v2.h" +#include "module_base/kernels/math_kernel_op.h" +#include "module_base/module_device/memory_op.h" + + +namespace BlasUtils{ + + static cublasHandle_t cublas_handle = nullptr; + + void createGpuBlasHandle(){ + if (cublas_handle == nullptr) { + cublasErrcheck(cublasCreate(&cublas_handle)); + } + } + + void destoryBLAShandle(){ + if (cublas_handle != nullptr) { + cublasErrcheck(cublasDestroy(cublas_handle)); + cublas_handle = nullptr; + } + } + + + cublasOperation_t judge_trans(bool is_complex, const char& trans, const char* name) + { + if (trans == 'N') + { + return CUBLAS_OP_N; + } + else if(trans == 'T') + { + return CUBLAS_OP_T; + } + else if(is_complex && trans == 'C') + { + return CUBLAS_OP_C; + } + return CUBLAS_OP_N; + } + + cublasSideMode_t judge_side(const char& trans) + { + if (trans == 'L') + { + return CUBLAS_SIDE_LEFT; + } + else if (trans == 'R') + { + return CUBLAS_SIDE_RIGHT; + } + return CUBLAS_SIDE_LEFT; + } + + cublasFillMode_t judge_fill(const char& trans) + { + if (trans == 'F') + { + return CUBLAS_FILL_MODE_FULL; + } + else if (trans == 'U') + { + return CUBLAS_FILL_MODE_UPPER; + } + else if (trans == 'D') + { + return CUBLAS_FILL_MODE_LOWER; + } + return CUBLAS_FILL_MODE_FULL; + } + +} // namespace BlasUtils + +#endif \ No newline at end of file diff --git a/source/module_base/blas_connector_matrix.cpp b/source/module_base/blas_connector_matrix.cpp new file mode 100644 index 0000000000..9beb7a7d59 --- /dev/null +++ b/source/module_base/blas_connector_matrix.cpp @@ -0,0 +1,575 @@ +#include "blas_connector.h" +#include "macros.h" + +#ifdef __DSP +#include "module_base/kernels/dsp/dsp_connector.h" +#include "module_base/global_variable.h" +#endif + +#ifdef __CUDA +#include +#include +#include "cublas_v2.h" +#include "module_base/kernels/math_kernel_op.h" +#include "module_base/module_device/memory_op.h" +#endif + + +// C = a * A.? * B.? + b * C +// Row-Major part +void BlasConnector::gemm(const char transa, const char transb, const int m, const int n, const int k, + const float alpha, const float *a, const int lda, const float *b, const int ldb, + const float beta, float *c, const int ldc, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + sgemm_(&transb, &transa, &n, &m, &k, + &alpha, b, &ldb, a, &lda, + &beta, c, &ldc); + } +#ifdef __DSP + else if (device_type == base_device::AbacusDevice_t::DspDevice){ + mtfunc::sgemm_mth_(&transb, &transa, &n, &m, &k, + &alpha, b, &ldb, a, &lda, + &beta, c, &ldc, GlobalV::MY_RANK); + } +#endif +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice) { + cublasOperation_t cutransA = BlasUtils::judge_trans(false, transa, "gemm_op"); + cublasOperation_t cutransB = BlasUtils::judge_trans(false, transb, "gemm_op"); + cublasErrcheck(cublasSgemm(BlasUtils::cublas_handle, cutransA, cutransB, n, m, k, &alpha, b, ldb, a, lda, &beta, c, ldc)); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void BlasConnector::gemm(const char transa, + const char transb, + const int m, + const int n, + const int k, + const double alpha, + const double* a, + const int lda, + const double* b, + const int ldb, + const double beta, + double* c, + const int ldc, + base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + dgemm_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc); + } +#ifdef __DSP + else if (device_type == base_device::AbacusDevice_t::DspDevice) + { + mtfunc::dgemm_mth_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc, GlobalV::MY_RANK); + } +#endif + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { +#ifdef __CUDA + cublasOperation_t cutransA = BlasUtils::judge_trans(false, transa, "gemm_op"); + cublasOperation_t cutransB = BlasUtils::judge_trans(false, transb, "gemm_op"); + cublasErrcheck( + cublasDgemm(BlasUtils::cublas_handle, cutransA, cutransB, n, m, k, &alpha, b, ldb, a, lda, &beta, c, ldc)); +#endif + } + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void BlasConnector::gemm(const char transa, + const char transb, + const int m, + const int n, + const int k, + const std::complex alpha, + const std::complex* a, + const int lda, + const std::complex* b, + const int ldb, + const std::complex beta, + std::complex* c, + const int ldc, + base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + cgemm_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc); + } +#ifdef __DSP + else if (device_type == base_device::AbacusDevice_t::DspDevice) + { + mtfunc::cgemm_mth_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc, GlobalV::MY_RANK); + } +#endif + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { +#ifdef __CUDA + cublasOperation_t cutransA = BlasUtils::judge_trans(false, transa, "gemm_op"); + cublasOperation_t cutransB = BlasUtils::judge_trans(false, transb, "gemm_op"); + cublasErrcheck(cublasCgemm(BlasUtils::cublas_handle, + cutransA, + cutransB, + n, + m, + k, + (float2*)&alpha, + (float2*)b, + ldb, + (float2*)a, + lda, + (float2*)&beta, + (float2*)c, + ldc)); +#endif + } + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void BlasConnector::gemm(const char transa, + const char transb, + const int m, + const int n, + const int k, + const std::complex alpha, + const std::complex* a, + const int lda, + const std::complex* b, + const int ldb, + const std::complex beta, + std::complex* c, + const int ldc, + base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + zgemm_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc); + } +#ifdef __DSP + else if (device_type == base_device::AbacusDevice_t::DspDevice) + { + mtfunc::zgemm_mth_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc, GlobalV::MY_RANK); + } +#endif + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { +#ifdef __CUDA + cublasOperation_t cutransA = BlasUtils::judge_trans(false, transa, "gemm_op"); + cublasOperation_t cutransB = BlasUtils::judge_trans(false, transb, "gemm_op"); + cublasErrcheck(cublasZgemm(BlasUtils::cublas_handle, + cutransA, + cutransB, + n, + m, + k, + (double2*)&alpha, + (double2*)b, + ldb, + (double2*)a, + lda, + (double2*)&beta, + (double2*)c, + ldc)); +#endif + } + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +// Col-Major part +void BlasConnector::gemm_cm(const char transa, const char transb, const int m, const int n, const int k, + const float alpha, const float *a, const int lda, const float *b, const int ldb, + const float beta, float *c, const int ldc, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + sgemm_(&transa, &transb, &m, &n, &k, + &alpha, a, &lda, b, &ldb, + &beta, c, &ldc); + } +#ifdef __DSP + else if (device_type == base_device::AbacusDevice_t::DspDevice){ + mtfunc::sgemm_mth_(&transb, &transa, &m, &n, &k, + &alpha, a, &lda, b, &ldb, + &beta, c, &ldc, GlobalV::MY_RANK); + } +#endif +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice) { + cublasOperation_t cutransA = BlasUtils::judge_trans(false, transa, "gemm_op"); + cublasOperation_t cutransB = BlasUtils::judge_trans(false, transb, "gemm_op"); + cublasErrcheck(cublasSgemm(BlasUtils::cublas_handle, cutransA, cutransB, m, n, k, &alpha, a, lda, b, ldb, &beta, c, ldc)); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void BlasConnector::gemm_cm(const char transa, + const char transb, + const int m, + const int n, + const int k, + const double alpha, + const double* a, + const int lda, + const double* b, + const int ldb, + const double beta, + double* c, + const int ldc, + base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + dgemm_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); + } +#ifdef __DSP + else if (device_type == base_device::AbacusDevice_t::DspDevice) + { + mtfunc::dgemm_mth_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc, GlobalV::MY_RANK); + } +#endif +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + cublasOperation_t cutransA = BlasUtils::judge_trans(false, transa, "gemm_op"); + cublasOperation_t cutransB = BlasUtils::judge_trans(false, transb, "gemm_op"); + cublasErrcheck( + cublasDgemm(BlasUtils::cublas_handle, cutransA, cutransB, m, n, k, &alpha, a, lda, b, ldb, &beta, c, ldc)); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void BlasConnector::gemm_cm(const char transa, + const char transb, + const int m, + const int n, + const int k, + const std::complex alpha, + const std::complex* a, + const int lda, + const std::complex* b, + const int ldb, + const std::complex beta, + std::complex* c, + const int ldc, + base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + cgemm_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); + } +#ifdef __DSP + else if (device_type == base_device::AbacusDevice_t::DspDevice) + { + mtfunc::cgemm_mth_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc, GlobalV::MY_RANK); + } +#endif +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + cublasOperation_t cutransA = BlasUtils::judge_trans(false, transa, "gemm_op"); + cublasOperation_t cutransB = BlasUtils::judge_trans(false, transb, "gemm_op"); + cublasErrcheck(cublasCgemm(BlasUtils::cublas_handle, + cutransA, + cutransB, + m, + n, + k, + (float2*)&alpha, + (float2*)a, + lda, + (float2*)b, + ldb, + (float2*)&beta, + (float2*)c, + ldc)); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void BlasConnector::gemm_cm(const char transa, + const char transb, + const int m, + const int n, + const int k, + const std::complex alpha, + const std::complex* a, + const int lda, + const std::complex* b, + const int ldb, + const std::complex beta, + std::complex* c, + const int ldc, + base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) + { + zgemm_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); + } +#ifdef __DSP + else if (device_type == base_device::AbacusDevice_t::DspDevice) + { + mtfunc::zgemm_mth_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc, GlobalV::MY_RANK); + } +#endif +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice) + { + cublasOperation_t cutransA = BlasUtils::judge_trans(false, transa, "gemm_op"); + cublasOperation_t cutransB = BlasUtils::judge_trans(false, transb, "gemm_op"); + cublasErrcheck(cublasZgemm(BlasUtils::cublas_handle, + cutransA, + cutransB, + m, + n, + k, + (double2*)&alpha, + (double2*)a, + lda, + (double2*)b, + ldb, + (double2*)&beta, + (double2*)c, + ldc)); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +// Symm and Hemm part. Only col-major is supported. + +void BlasConnector::symm_cm(const char side, const char uplo, const int m, const int n, + const float alpha, const float *a, const int lda, const float *b, const int ldb, + const float beta, float *c, const int ldc, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + ssymm_(&side, &uplo, &m, &n, + &alpha, a, &lda, b, &ldb, + &beta, c, &ldc); + } +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice) { + cublasSideMode_t sideMode = BlasUtils::judge_side(side); + cublasFillMode_t fillMode = BlasUtils::judge_fill(uplo); + cublasErrcheck(cublasSsymm(BlasUtils::cublas_handle, sideMode, fillMode, m, n, &alpha, a, lda, b, ldb, &beta, c, ldc)); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void BlasConnector::symm_cm(const char side, const char uplo, const int m, const int n, + const double alpha, const double *a, const int lda, const double *b, const int ldb, + const double beta, double *c, const int ldc, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + dsymm_(&side, &uplo, &m, &n, + &alpha, a, &lda, b, &ldb, + &beta, c, &ldc); + } +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice) { + cublasSideMode_t sideMode = BlasUtils::judge_side(side); + cublasFillMode_t fillMode = BlasUtils::judge_fill(uplo); + cublasErrcheck(cublasDsymm(BlasUtils::cublas_handle, sideMode, fillMode, m, n, &alpha, a, lda, b, ldb, &beta, c, ldc)); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void BlasConnector::symm_cm(const char side, const char uplo, const int m, const int n, + const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, + const std::complex beta, std::complex *c, const int ldc, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + csymm_(&side, &uplo, &m, &n, + &alpha, a, &lda, b, &ldb, + &beta, c, &ldc); + } +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice) { + cublasSideMode_t sideMode = BlasUtils::judge_side(side); + cublasFillMode_t fillMode = BlasUtils::judge_fill(uplo); + cublasErrcheck(cublasCsymm(BlasUtils::cublas_handle, sideMode, fillMode, m, n, (float2*)&alpha, (float2*)a, lda, (float2*)b, ldb, (float2*)&beta, (float2*)c, ldc)); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void BlasConnector::symm_cm(const char side, const char uplo, const int m, const int n, + const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, + const std::complex beta, std::complex *c, const int ldc, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + zsymm_(&side, &uplo, &m, &n, + &alpha, a, &lda, b, &ldb, + &beta, c, &ldc); + } +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice) { + cublasSideMode_t sideMode = BlasUtils::judge_side(side); + cublasFillMode_t fillMode = BlasUtils::judge_fill(uplo); + cublasErrcheck(cublasZsymm(BlasUtils::cublas_handle, sideMode, fillMode, m, n, (double2*)&alpha, (double2*)a, lda, (double2*)b, ldb, (double2*)&beta, (double2*)c, ldc)); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void BlasConnector::hemm_cm(const char side, const char uplo, const int m, const int n, + const float alpha, const float *a, const int lda, const float *b, const int ldb, + const float beta, float *c, const int ldc, base_device::AbacusDevice_t device_type) +{ + symm_cm(side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc, device_type); +} + +void BlasConnector::hemm_cm(const char side, const char uplo, const int m, const int n, + const double alpha, const double *a, const int lda, const double *b, const int ldb, + const double beta, double *c, const int ldc, base_device::AbacusDevice_t device_type) +{ + symm_cm(side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc, device_type); +} + +void BlasConnector::hemm_cm(char side, char uplo, int m, int n, + std::complex alpha, std::complex *a, int lda, std::complex *b, int ldb, + std::complex beta, std::complex *c, int ldc, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + chemm_(&side, &uplo, &m, &n, + &alpha, a, &lda, b, &ldb, + &beta, c, &ldc); + } +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice) { + cublasSideMode_t sideMode = BlasUtils::judge_side(side); + cublasFillMode_t fillMode = BlasUtils::judge_fill(uplo); + cublasErrcheck(cublasChemm(BlasUtils::cublas_handle, sideMode, fillMode, m, n, (float2*)&alpha, (float2*)a, lda, (float2*)b, ldb, (float2*)&beta, (float2*)c, ldc)); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void BlasConnector::hemm_cm(char side, char uplo, int m, int n, + std::complex alpha, std::complex *a, int lda, std::complex *b, int ldb, + std::complex beta, std::complex *c, int ldc, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + zhemm_(&side, &uplo, &m, &n, + &alpha, a, &lda, b, &ldb, + &beta, c, &ldc); + } +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice) { + cublasSideMode_t sideMode = BlasUtils::judge_side(side); + cublasFillMode_t fillMode = BlasUtils::judge_fill(uplo); + cublasErrcheck(cublasZhemm(BlasUtils::cublas_handle, sideMode, fillMode, m, n, (double2*)&alpha, (double2*)a, lda, (double2*)b, ldb, (double2*)&beta, (double2*)c, ldc)); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void BlasConnector::gemv(const char trans, const int m, const int n, + const float alpha, const float* A, const int lda, const float* X, const int incx, + const float beta, float* Y, const int incy, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + sgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); + } +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice) { + cublasOperation_t cutransA = BlasUtils::judge_trans(false, trans, "gemv_op"); + cublasErrcheck(cublasSgemv(BlasUtils::cublas_handle, cutransA, m, n, &alpha, A, lda, X, incx, &beta, Y, incy)); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void BlasConnector::gemv(const char trans, const int m, const int n, + const double alpha, const double* A, const int lda, const double* X, const int incx, + const double beta, double* Y, const int incy, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + dgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); + } +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice) { + cublasOperation_t cutransA = BlasUtils::judge_trans(false, trans, "gemv_op"); + cublasErrcheck(cublasDgemv(BlasUtils::cublas_handle, cutransA, m, n, &alpha, A, lda, X, incx, &beta, Y, incy)); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void BlasConnector::gemv(const char trans, const int m, const int n, + const std::complex alpha, const std::complex *A, const int lda, const std::complex *X, const int incx, + const std::complex beta, std::complex *Y, const int incy, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + cgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); + } +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice) { + cuFloatComplex alpha_cu = make_cuFloatComplex(alpha.real(), alpha.imag()); + cuFloatComplex beta_cu = make_cuFloatComplex(beta.real(), beta.imag()); + cublasOperation_t cutransA = BlasUtils::judge_trans(true, trans, "gemv_op"); + cublasErrcheck(cublasCgemv(BlasUtils::cublas_handle, cutransA, m, n, &alpha_cu, (cuFloatComplex*)A, lda, (cuFloatComplex*)X, incx, &beta_cu, (cuFloatComplex*)Y, incy)); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void BlasConnector::gemv(const char trans, const int m, const int n, + const std::complex alpha, const std::complex *A, const int lda, const std::complex *X, const int incx, + const std::complex beta, std::complex *Y, const int incy, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + zgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); + } +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice) { + cuDoubleComplex alpha_cu = make_cuDoubleComplex(alpha.real(), alpha.imag()); + cuDoubleComplex beta_cu = make_cuDoubleComplex(beta.real(), beta.imag()); + cublasOperation_t cutransA = BlasUtils::judge_trans(true, trans, "gemv_op"); + cublasErrcheck(cublasZgemv(BlasUtils::cublas_handle, cutransA, m, n, &alpha_cu, (cuDoubleComplex*)A, lda, (cuDoubleComplex*)X, incx, &beta_cu, (cuDoubleComplex*)Y, incy)); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} \ No newline at end of file diff --git a/source/module_base/blas_connector_vector.cpp b/source/module_base/blas_connector_vector.cpp new file mode 100644 index 0000000000..e87394b652 --- /dev/null +++ b/source/module_base/blas_connector_vector.cpp @@ -0,0 +1,473 @@ +#include "blas_connector.h" +#include "macros.h" + +#ifdef __DSP +#include "module_base/kernels/dsp/dsp_connector.h" +#include "module_base/global_variable.h" +#endif + +#ifdef __CUDA +#include +#include +#include "cublas_v2.h" +#include "module_base/kernels/math_kernel_op.h" +#include "module_base/module_device/memory_op.h" +#endif + + +void BlasConnector::axpy( const int n, const float alpha, const float *X, const int incX, float *Y, const int incY, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + saxpy_(&n, &alpha, X, &incX, Y, &incY); + } +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice) { + cublasErrcheck(cublasSaxpy(BlasUtils::cublas_handle, n, &alpha, X, incX, Y, incY)); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void BlasConnector::axpy( const int n, const double alpha, const double *X, const int incX, double *Y, const int incY, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + daxpy_(&n, &alpha, X, &incX, Y, &incY); + } +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice) { + cublasErrcheck(cublasDaxpy(BlasUtils::cublas_handle, n, &alpha, X, incX, Y, incY)); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void BlasConnector::axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + caxpy_(&n, &alpha, X, &incX, Y, &incY); + } +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice) { + cublasErrcheck(cublasCaxpy(BlasUtils::cublas_handle, n, (float2*)&alpha, (float2*)X, incX, (float2*)Y, incY)); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void BlasConnector::axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + zaxpy_(&n, &alpha, X, &incX, Y, &incY); + } +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice) { + cublasErrcheck(cublasZaxpy(BlasUtils::cublas_handle, n, (double2*)&alpha, (double2*)X, incX, (double2*)Y, incY)); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + + +// x=a*x +void BlasConnector::scal( const int n, const float alpha, float *X, const int incX, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + sscal_(&n, &alpha, X, &incX); + } +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice) { + cublasErrcheck(cublasSscal(BlasUtils::cublas_handle, n, &alpha, X, incX)); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void BlasConnector::scal( const int n, const double alpha, double *X, const int incX, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + dscal_(&n, &alpha, X, &incX); + } +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice) { + cublasErrcheck(cublasDscal(BlasUtils::cublas_handle, n, &alpha, X, incX)); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void BlasConnector::scal( const int n, const std::complex alpha, std::complex *X, const int incX, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + cscal_(&n, &alpha, X, &incX); + } +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice) { + cublasErrcheck(cublasCscal(BlasUtils::cublas_handle, n, (float2*)&alpha, (float2*)X, incX)); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void BlasConnector::scal( const int n, const std::complex alpha, std::complex *X, const int incX, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + zscal_(&n, &alpha, X, &incX); + } +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice) { + cublasErrcheck(cublasZscal(BlasUtils::cublas_handle, n, (double2*)&alpha, (double2*)X, incX)); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + + +// d=x*y +float BlasConnector::dot( const int n, const float*const X, const int incX, const float*const Y, const int incY, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + return sdot_(&n, X, &incX, Y, &incY); + } +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice){ + float result = 0.0; + cublasErrcheck(cublasSdot(BlasUtils::cublas_handle, n, X, incX, Y, incY, &result)); + return result; + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +double BlasConnector::dot( const int n, const double*const X, const int incX, const double*const Y, const int incY, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + return ddot_(&n, X, &incX, Y, &incY); + } +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice){ + double result = 0.0; + cublasErrcheck(cublasDdot(BlasUtils::cublas_handle, n, X, incX, Y, incY, &result)); + return result; + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +// d=x*y +float BlasConnector::dotu(const int n, const float*const X, const int incX, const float*const Y, const int incY, base_device::AbacusDevice_t device_type) +{ + return BlasConnector::dot(n, X, incX, Y, incY, device_type); +} + +double BlasConnector::dotu(const int n, const double*const X, const int incX, const double*const Y, const int incY, base_device::AbacusDevice_t device_type) +{ + return BlasConnector::dot(n, X, incX, Y, incY, device_type); +} + +std::complex BlasConnector::dotu(const int n, const std::complex*const X, const int incX, const std::complex*const Y, const int incY, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + const int incX2 = 2 * incX; + const int incY2 = 2 * incY; + const float*const x = reinterpret_cast(X); + const float*const y = reinterpret_cast(Y); + //Re(result)=Re(x)*Re(y)-Im(x)*Im(y) + //Im(result)=Re(x)*Im(y)+Im(x)*Re(y) + return std::complex( + BlasConnector::dot(n, x, incX2, y, incY2, device_type) - dot(n, x+1, incX2, y+1, incY2, device_type), + BlasConnector::dot(n, x, incX2, y+1, incY2, device_type) + dot(n, x+1, incX2, y, incY2, device_type)); + } + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +std::complex BlasConnector::dotu(const int n, const std::complex*const X, const int incX, const std::complex*const Y, const int incY, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + const int incX2 = 2 * incX; + const int incY2 = 2 * incY; + const double*const x = reinterpret_cast(X); + const double*const y = reinterpret_cast(Y); + //Re(result)=Re(x)*Re(y)-Im(x)*Im(y) + //Im(result)=Re(x)*Im(y)+Im(x)*Re(y) + return std::complex( + BlasConnector::dot(n, x, incX2, y, incY2, device_type) - dot(n, x+1, incX2, y+1, incY2, device_type), + BlasConnector::dot(n, x, incX2, y+1, incY2, device_type) + dot(n, x+1, incX2, y, incY2, device_type)); + } + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +// d = x.conj() * Vy +float BlasConnector::dotc(const int n, const float*const X, const int incX, const float*const Y, const int incY, base_device::AbacusDevice_t device_type) +{ + return BlasConnector::dot(n, X, incX, Y, incY, device_type); +} + +double BlasConnector::dotc(const int n, const double*const X, const int incX, const double*const Y, const int incY, base_device::AbacusDevice_t device_type) +{ + return BlasConnector::dot(n, X, incX, Y, incY, device_type); +} + +std::complex BlasConnector::dotc(const int n, const std::complex*const X, const int incX, const std::complex*const Y, const int incY, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + const int incX2 = 2 * incX; + const int incY2 = 2 * incY; + const float*const x = reinterpret_cast(X); + const float*const y = reinterpret_cast(Y); + // Re(result)=Re(X)*Re(Y)+Im(X)*Im(Y) + // Im(result)=Re(X)*Im(Y)-Im(X)*Re(Y) + return std::complex( + BlasConnector::dot(n, x, incX2, y, incY2, device_type) + dot(n, x+1, incX2, y+1, incY2, device_type), + BlasConnector::dot(n, x, incX2, y+1, incY2, device_type) - dot(n, x+1, incX2, y, incY2, device_type)); + } + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +std::complex BlasConnector::dotc(const int n, const std::complex*const X, const int incX, const std::complex*const Y, const int incY, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + const int incX2 = 2 * incX; + const int incY2 = 2 * incY; + const double*const x = reinterpret_cast(X); + const double*const y = reinterpret_cast(Y); + // Re(result)=Re(X)*Re(Y)+Im(X)*Im(Y) + // Im(result)=Re(X)*Im(Y)-Im(X)*Re(Y) + return std::complex( + BlasConnector::dot(n, x, incX2, y, incY2, device_type) + dot(n, x+1, incX2, y+1, incY2, device_type), + BlasConnector::dot(n, x, incX2, y+1, incY2, device_type) - dot(n, x+1, incX2, y, incY2, device_type)); + } + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +// out = ||x||_2 +float BlasConnector::nrm2( const int n, const float *X, const int incX, base_device::AbacusDevice_t device_type ) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + return snrm2_( &n, X, &incX ); + } +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice){ + float result = 0.0; + cublasErrcheck(cublasSnrm2(BlasUtils::cublas_handle, n, X, incX, &result)); + return result; + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + + +double BlasConnector::nrm2( const int n, const double *X, const int incX, base_device::AbacusDevice_t device_type ) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + return dnrm2_( &n, X, &incX ); + } +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice){ + double result = 0.0; + cublasErrcheck(cublasDnrm2(BlasUtils::cublas_handle, n, X, incX, &result)); + return result; + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + + +double BlasConnector::nrm2( const int n, const std::complex *X, const int incX, base_device::AbacusDevice_t device_type ) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + return dznrm2_( &n, X, &incX ); + } +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice){ + double result = 0.0; + cublasErrcheck(cublasDznrm2(BlasUtils::cublas_handle, n, (double2*)X, incX, &result)); + return result; + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +// copies a into b +void BlasConnector::copy(const long n, const double *a, const int incx, double *b, const int incy, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + dcopy_(&n, a, &incx, b, &incy); + } + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void BlasConnector::copy(const long n, const std::complex *a, const int incx, std::complex *b, const int incy, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::AbacusDevice_t::CpuDevice) { + zcopy_(&n, a, &incx, b, &incy); + } + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + + +template +void vector_mul_vector(const int& dim, T* result, const T* vector1, const T* vector2, base_device::AbacusDevice_t device_type){ + using Real = typename GetTypeReal::type; + if (device_type == base_device::AbacusDevice_t::CpuDevice) { +#ifdef _OPENMP +#pragma omp parallel for schedule(static, 4096 / sizeof(Real)) +#endif + for (int i = 0; i < dim; i++) + { + result[i] = vector1[i] * vector2[i]; + } + } +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice) { + ModuleBase::vector_mul_vector_op()(dim, result, vector1, vector2); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + + +template +void vector_div_vector(const int& dim, T* result, const T* vector1, const T* vector2, base_device::AbacusDevice_t device_type){ + using Real = typename GetTypeReal::type; + if (device_type == base_device::AbacusDevice_t::CpuDevice) { +#ifdef _OPENMP +#pragma omp parallel for schedule(static, 4096 / sizeof(Real)) +#endif + for (int i = 0; i < dim; i++) + { + result[i] = vector1[i] / vector2[i]; + } + } +#ifdef __CUDA + else if (device_type == base_device::AbacusDevice_t::GpuDevice) { + ModuleBase::vector_div_vector_op()(dim, result, vector1, vector2); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void vector_add_vector(const int& dim, float *result, const float *vector1, const float constant1, const float *vector2, const float constant2, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::CpuDevice){ +#ifdef _OPENMP +#pragma omp parallel for schedule(static, 8192 / sizeof(float)) +#endif + for (int i = 0; i < dim; i++) + { + result[i] = vector1[i] * constant1 + vector2[i] * constant2; + } + } +#ifdef __CUDA + else if (device_type == base_device::GpuDevice) { + ModuleBase::vector_add_vector_op()(dim, result, vector1, constant1, vector2, constant2); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void vector_add_vector(const int& dim, double *result, const double *vector1, const double constant1, const double *vector2, const double constant2, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::CpuDevice){ +#ifdef _OPENMP +#pragma omp parallel for schedule(static, 8192 / sizeof(double)) +#endif + for (int i = 0; i < dim; i++) + { + result[i] = vector1[i] * constant1 + vector2[i] * constant2; + } + } +#ifdef __CUDA + else if (device_type == base_device::GpuDevice) { + ModuleBase::vector_add_vector_op()(dim, result, vector1, constant1, vector2, constant2); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void vector_add_vector(const int& dim, std::complex *result, const std::complex *vector1, const float constant1, const std::complex *vector2, const float constant2, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::CpuDevice){ +#ifdef _OPENMP +#pragma omp parallel for schedule(static, 8192 / sizeof(std::complex)) +#endif + for (int i = 0; i < dim; i++) + { + result[i] = vector1[i] * constant1 + vector2[i] * constant2; + } + } +#ifdef __CUDA + else if (device_type == base_device::GpuDevice) { + ModuleBase::vector_add_vector_op, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} + +void vector_add_vector(const int& dim, std::complex *result, const std::complex *vector1, const double constant1, const std::complex *vector2, const double constant2, base_device::AbacusDevice_t device_type) +{ + if (device_type == base_device::CpuDevice){ +#ifdef _OPENMP +#pragma omp parallel for schedule(static, 8192 / sizeof(std::complex)) +#endif + for (int i = 0; i < dim; i++) + { + result[i] = vector1[i] * constant1 + vector2[i] * constant2; + } + } +#ifdef __CUDA + else if (device_type == base_device::GpuDevice) { + ModuleBase::vector_add_vector_op, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2); + } +#endif + else { + throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); + } +} \ No newline at end of file From 52e797385441fe3447dff410ae5770c762123a1c Mon Sep 17 00:00:00 2001 From: Critsium-xy Date: Tue, 29 Apr 2025 17:10:33 +0800 Subject: [PATCH 2/4] MODIFY UNIT TESTS --- source/module_base/blas_connector_base.cpp | 2 -- source/module_basis/module_ao/test/CMakeLists.txt | 4 +++- source/module_basis/module_pw/kernels/test/CMakeLists.txt | 3 ++- source/module_basis/module_pw/test/CMakeLists.txt | 3 ++- source/module_hamilt_general/module_xc/test/CMakeLists.txt | 4 ++-- source/module_hamilt_pw/hamilt_pwdft/test/CMakeLists.txt | 2 +- source/module_md/test/CMakeLists.txt | 4 +++- source/module_relax/relax_new/test/CMakeLists.txt | 3 ++- 8 files changed, 15 insertions(+), 10 deletions(-) diff --git a/source/module_base/blas_connector_base.cpp b/source/module_base/blas_connector_base.cpp index 4b9080a561..1d1a81db94 100644 --- a/source/module_base/blas_connector_base.cpp +++ b/source/module_base/blas_connector_base.cpp @@ -11,8 +11,6 @@ namespace BlasUtils{ - static cublasHandle_t cublas_handle = nullptr; - void createGpuBlasHandle(){ if (cublas_handle == nullptr) { cublasErrcheck(cublasCreate(&cublas_handle)); diff --git a/source/module_basis/module_ao/test/CMakeLists.txt b/source/module_basis/module_ao/test/CMakeLists.txt index 054ddb52c0..1cde85d51e 100644 --- a/source/module_basis/module_ao/test/CMakeLists.txt +++ b/source/module_basis/module_ao/test/CMakeLists.txt @@ -7,7 +7,9 @@ list(APPEND depend_files ../../../module_base/math_ylmreal.cpp ../../../module_base/ylm.cpp ../../../module_base/memory.cpp - ../../../module_base/blas_connector.cpp + ../../../module_base/blas_connector_base.cpp + ../../../module_base/blas_connector_vector.cpp + ../../../module_base/blas_connector_matrix.cpp ../../../module_base/complexarray.cpp ../../../module_base/complexmatrix.cpp ../../../module_base/matrix.cpp diff --git a/source/module_basis/module_pw/kernels/test/CMakeLists.txt b/source/module_basis/module_pw/kernels/test/CMakeLists.txt index c190ded73d..b0c35b294a 100644 --- a/source/module_basis/module_pw/kernels/test/CMakeLists.txt +++ b/source/module_basis/module_pw/kernels/test/CMakeLists.txt @@ -8,5 +8,6 @@ AddTest( ../../../../module_base/parallel_global.cpp ../../../../module_base/parallel_reduce.cpp ../../../../module_base/parallel_comm.cpp ../../../../module_base/complexmatrix.cpp ../../../../module_base/matrix.cpp ../../../../module_base/memory.cpp - ../../../../module_base/libm/branred.cpp ../../../../module_base/libm/sincos.cpp ../../../../module_base/blas_connector.cpp + ../../../../module_base/libm/branred.cpp ../../../../module_base/libm/sincos.cpp + ../../../../module_base/blas_connector_base.cpp ../../../../module_base/blas_connector_vector.cpp ../../../../module_base/blas_connector_matrix.cpp ) \ No newline at end of file diff --git a/source/module_basis/module_pw/test/CMakeLists.txt b/source/module_basis/module_pw/test/CMakeLists.txt index f0464477df..8b1038af93 100644 --- a/source/module_basis/module_pw/test/CMakeLists.txt +++ b/source/module_basis/module_pw/test/CMakeLists.txt @@ -3,7 +3,8 @@ AddTest( TARGET pw_test LIBS parameter ${math_libs} planewave device SOURCES ../../../module_base/matrix.cpp ../../../module_base/complexmatrix.cpp ../../../module_base/matrix3.cpp ../../../module_base/tool_quit.cpp - ../../../module_base/mymath.cpp ../../../module_base/timer.cpp ../../../module_base/memory.cpp ../../../module_base/blas_connector.cpp + ../../../module_base/mymath.cpp ../../../module_base/timer.cpp ../../../module_base/memory.cpp + ../../../module_base/blas_connector_base.cpp ../../../module_base/blas_connector_vector.cpp ../../../module_base/blas_connector_matrix.cpp ../../../module_base/libm/branred.cpp ../../../module_base/libm/sincos.cpp ../../../module_base/module_device/memory_op.cpp depend_mock.cpp pw_test.cpp test1-1-1.cpp test1-1-2.cpp test1-2.cpp test1-3.cpp test1-4.cpp test1-5.cpp diff --git a/source/module_hamilt_general/module_xc/test/CMakeLists.txt b/source/module_hamilt_general/module_xc/test/CMakeLists.txt index 0dda934ac6..8212efe463 100644 --- a/source/module_hamilt_general/module_xc/test/CMakeLists.txt +++ b/source/module_hamilt_general/module_xc/test/CMakeLists.txt @@ -42,7 +42,7 @@ AddTest( ../../../module_base/memory.cpp ../../../module_base/libm/branred.cpp ../../../module_base/libm/sincos.cpp - ../../../module_base/blas_connector.cpp + ../../../module_base/blas_connector_base.cpp ../../../module_base/blas_connector_vector.cpp ../../../module_base/blas_connector_matrix.cpp ../../../module_basis/module_pw/module_fft/fft_bundle.cpp ../../../module_basis/module_pw/module_fft/fft_cpu.cpp ${FFT_SRC} @@ -75,7 +75,7 @@ AddTest( ../xc_functional_vxc.cpp ../xc_functional_libxc_vxc.cpp ../xc_functional_libxc_tools.cpp - ../../../module_base/blas_connector.cpp + ../../../module_base/blas_connector_base.cpp ../../../module_base/blas_connector_vector.cpp ../../../module_base/blas_connector_matrix.cpp ../../../module_base/matrix.cpp ../../../module_base/memory.cpp ../../../module_base/timer.cpp diff --git a/source/module_hamilt_pw/hamilt_pwdft/test/CMakeLists.txt b/source/module_hamilt_pw/hamilt_pwdft/test/CMakeLists.txt index c1b56517b7..f4f6ff247c 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/test/CMakeLists.txt +++ b/source/module_hamilt_pw/hamilt_pwdft/test/CMakeLists.txt @@ -15,7 +15,7 @@ AddTest( ../../../module_base/global_file.cpp ../../../module_base/memory.cpp ../../../module_base/timer.cpp - ../../../module_base/blas_connector.cpp + ../../../module_base/blas_connector_base.cpp ../../../module_base/blas_connector_vector.cpp ../../../module_base/blas_connector_matrix.cpp ../../../module_base/parallel_global.cpp ../../../module_base/parallel_comm.cpp ../../../module_base/parallel_common.cpp diff --git a/source/module_md/test/CMakeLists.txt b/source/module_md/test/CMakeLists.txt index 66476fcf88..861b6fcbc2 100644 --- a/source/module_md/test/CMakeLists.txt +++ b/source/module_md/test/CMakeLists.txt @@ -23,7 +23,9 @@ list(APPEND depend_files ../../module_base/matrix3.cpp ../../module_base/matrix.cpp ../../module_base/timer.cpp - ../../module_base/blas_connector.cpp + ../../module_base/blas_connector_base.cpp + ../../module_base/blas_connector_matrix.cpp + ../../module_base/blas_connector_vector.cpp ../../module_base/memory.cpp ../../module_base/global_variable.cpp ../../module_base/global_function.cpp diff --git a/source/module_relax/relax_new/test/CMakeLists.txt b/source/module_relax/relax_new/test/CMakeLists.txt index bc0240e104..fe81c8ef0a 100644 --- a/source/module_relax/relax_new/test/CMakeLists.txt +++ b/source/module_relax/relax_new/test/CMakeLists.txt @@ -17,7 +17,8 @@ AddTest( SOURCES relax_test.cpp ../relax.cpp ../line_search.cpp ../../../module_base/tool_quit.cpp ../../../module_base/global_variable.cpp ../../../module_base/global_file.cpp ../../../module_base/memory.cpp ../../../module_base/timer.cpp ../../../module_base/matrix3.cpp ../../../module_base/intarray.cpp ../../../module_base/tool_title.cpp ../../../module_base/global_function.cpp ../../../module_base/complexmatrix.cpp ../../../module_base/matrix.cpp - ../../../module_base/complexarray.cpp ../../../module_base/tool_quit.cpp ../../../module_base/realarray.cpp ../../../module_base/blas_connector.cpp + ../../../module_base/complexarray.cpp ../../../module_base/tool_quit.cpp ../../../module_base/realarray.cpp + ../../../module_base/blas_connector_base.cpp ../../../module_base/blas_connector_vector.cpp ../../../module_base/blas_connector_matrix.cpp ../../../module_cell/update_cell.cpp ../../../module_cell/print_cell.cpp ../../../module_cell/bcast_cell.cpp ../../../module_io/output.cpp LIBS parameter ${math_libs} ) From 040be92f77ae63ff75c4951b07e9fd7d7bf32eca Mon Sep 17 00:00:00 2001 From: Critsium-xy Date: Wed, 30 Apr 2025 11:46:41 +0800 Subject: [PATCH 3/4] REMOVE BLAS_CONNECTOR.CPP --- source/module_base/CMakeLists.txt | 1 - source/module_base/blas_connector.cpp | 1100 ------------------------- 2 files changed, 1101 deletions(-) delete mode 100644 source/module_base/blas_connector.cpp diff --git a/source/module_base/CMakeLists.txt b/source/module_base/CMakeLists.txt index 3fb5015188..0d5088c28e 100644 --- a/source/module_base/CMakeLists.txt +++ b/source/module_base/CMakeLists.txt @@ -10,7 +10,6 @@ add_library( base OBJECT assoc_laguerre.cpp - #blas_connector.cpp blas_connector_base.cpp blas_connector_vector.cpp blas_connector_matrix.cpp diff --git a/source/module_base/blas_connector.cpp b/source/module_base/blas_connector.cpp deleted file mode 100644 index d3b4cd4d6e..0000000000 --- a/source/module_base/blas_connector.cpp +++ /dev/null @@ -1,1100 +0,0 @@ -#include "blas_connector.h" -#include "macros.h" - -#ifdef __DSP -#include "module_base/kernels/dsp/dsp_connector.h" -#include "module_base/global_variable.h" -#endif - -#ifdef __CUDA -#include -#include -#include "cublas_v2.h" -#include "module_base/kernels/math_kernel_op.h" -#include "module_base/module_device/memory_op.h" - - -namespace BlasUtils{ - - static cublasHandle_t cublas_handle = nullptr; - - void createGpuBlasHandle(){ - if (cublas_handle == nullptr) { - cublasErrcheck(cublasCreate(&cublas_handle)); - } - } - - void destoryBLAShandle(){ - if (cublas_handle != nullptr) { - cublasErrcheck(cublasDestroy(cublas_handle)); - cublas_handle = nullptr; - } - } - - - cublasOperation_t judge_trans(bool is_complex, const char& trans, const char* name) - { - if (trans == 'N') - { - return CUBLAS_OP_N; - } - else if(trans == 'T') - { - return CUBLAS_OP_T; - } - else if(is_complex && trans == 'C') - { - return CUBLAS_OP_C; - } - return CUBLAS_OP_N; - } - - cublasSideMode_t judge_side(const char& trans) - { - if (trans == 'L') - { - return CUBLAS_SIDE_LEFT; - } - else if (trans == 'R') - { - return CUBLAS_SIDE_RIGHT; - } - return CUBLAS_SIDE_LEFT; - } - - cublasFillMode_t judge_fill(const char& trans) - { - if (trans == 'F') - { - return CUBLAS_FILL_MODE_FULL; - } - else if (trans == 'U') - { - return CUBLAS_FILL_MODE_UPPER; - } - else if (trans == 'D') - { - return CUBLAS_FILL_MODE_LOWER; - } - return CUBLAS_FILL_MODE_FULL; - } - -} // namespace BlasUtils - -#endif - -void BlasConnector::axpy( const int n, const float alpha, const float *X, const int incX, float *Y, const int incY, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - saxpy_(&n, &alpha, X, &incX, Y, &incY); - } -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasErrcheck(cublasSaxpy(BlasUtils::cublas_handle, n, &alpha, X, incX, Y, incY)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void BlasConnector::axpy( const int n, const double alpha, const double *X, const int incX, double *Y, const int incY, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - daxpy_(&n, &alpha, X, &incX, Y, &incY); - } -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasErrcheck(cublasDaxpy(BlasUtils::cublas_handle, n, &alpha, X, incX, Y, incY)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void BlasConnector::axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - caxpy_(&n, &alpha, X, &incX, Y, &incY); - } -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasErrcheck(cublasCaxpy(BlasUtils::cublas_handle, n, (float2*)&alpha, (float2*)X, incX, (float2*)Y, incY)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void BlasConnector::axpy( const int n, const std::complex alpha, const std::complex *X, const int incX, std::complex *Y, const int incY, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - zaxpy_(&n, &alpha, X, &incX, Y, &incY); - } -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasErrcheck(cublasZaxpy(BlasUtils::cublas_handle, n, (double2*)&alpha, (double2*)X, incX, (double2*)Y, incY)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - - -// x=a*x -void BlasConnector::scal( const int n, const float alpha, float *X, const int incX, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - sscal_(&n, &alpha, X, &incX); - } -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasErrcheck(cublasSscal(BlasUtils::cublas_handle, n, &alpha, X, incX)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void BlasConnector::scal( const int n, const double alpha, double *X, const int incX, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - dscal_(&n, &alpha, X, &incX); - } -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasErrcheck(cublasDscal(BlasUtils::cublas_handle, n, &alpha, X, incX)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void BlasConnector::scal( const int n, const std::complex alpha, std::complex *X, const int incX, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - cscal_(&n, &alpha, X, &incX); - } -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasErrcheck(cublasCscal(BlasUtils::cublas_handle, n, (float2*)&alpha, (float2*)X, incX)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void BlasConnector::scal( const int n, const std::complex alpha, std::complex *X, const int incX, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - zscal_(&n, &alpha, X, &incX); - } -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasErrcheck(cublasZscal(BlasUtils::cublas_handle, n, (double2*)&alpha, (double2*)X, incX)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - - -// d=x*y -float BlasConnector::dot( const int n, const float*const X, const int incX, const float*const Y, const int incY, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - return sdot_(&n, X, &incX, Y, &incY); - } -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice){ - float result = 0.0; - cublasErrcheck(cublasSdot(BlasUtils::cublas_handle, n, X, incX, Y, incY, &result)); - return result; - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -double BlasConnector::dot( const int n, const double*const X, const int incX, const double*const Y, const int incY, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - return ddot_(&n, X, &incX, Y, &incY); - } -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice){ - double result = 0.0; - cublasErrcheck(cublasDdot(BlasUtils::cublas_handle, n, X, incX, Y, incY, &result)); - return result; - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -// d=x*y -float BlasConnector::dotu(const int n, const float*const X, const int incX, const float*const Y, const int incY, base_device::AbacusDevice_t device_type) -{ - return BlasConnector::dot(n, X, incX, Y, incY, device_type); -} - -double BlasConnector::dotu(const int n, const double*const X, const int incX, const double*const Y, const int incY, base_device::AbacusDevice_t device_type) -{ - return BlasConnector::dot(n, X, incX, Y, incY, device_type); -} - -std::complex BlasConnector::dotu(const int n, const std::complex*const X, const int incX, const std::complex*const Y, const int incY, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - const int incX2 = 2 * incX; - const int incY2 = 2 * incY; - const float*const x = reinterpret_cast(X); - const float*const y = reinterpret_cast(Y); - //Re(result)=Re(x)*Re(y)-Im(x)*Im(y) - //Im(result)=Re(x)*Im(y)+Im(x)*Re(y) - return std::complex( - BlasConnector::dot(n, x, incX2, y, incY2, device_type) - dot(n, x+1, incX2, y+1, incY2, device_type), - BlasConnector::dot(n, x, incX2, y+1, incY2, device_type) + dot(n, x+1, incX2, y, incY2, device_type)); - } - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -std::complex BlasConnector::dotu(const int n, const std::complex*const X, const int incX, const std::complex*const Y, const int incY, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - const int incX2 = 2 * incX; - const int incY2 = 2 * incY; - const double*const x = reinterpret_cast(X); - const double*const y = reinterpret_cast(Y); - //Re(result)=Re(x)*Re(y)-Im(x)*Im(y) - //Im(result)=Re(x)*Im(y)+Im(x)*Re(y) - return std::complex( - BlasConnector::dot(n, x, incX2, y, incY2, device_type) - dot(n, x+1, incX2, y+1, incY2, device_type), - BlasConnector::dot(n, x, incX2, y+1, incY2, device_type) + dot(n, x+1, incX2, y, incY2, device_type)); - } - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -// d = x.conj() * Vy -float BlasConnector::dotc(const int n, const float*const X, const int incX, const float*const Y, const int incY, base_device::AbacusDevice_t device_type) -{ - return BlasConnector::dot(n, X, incX, Y, incY, device_type); -} - -double BlasConnector::dotc(const int n, const double*const X, const int incX, const double*const Y, const int incY, base_device::AbacusDevice_t device_type) -{ - return BlasConnector::dot(n, X, incX, Y, incY, device_type); -} - -std::complex BlasConnector::dotc(const int n, const std::complex*const X, const int incX, const std::complex*const Y, const int incY, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - const int incX2 = 2 * incX; - const int incY2 = 2 * incY; - const float*const x = reinterpret_cast(X); - const float*const y = reinterpret_cast(Y); - // Re(result)=Re(X)*Re(Y)+Im(X)*Im(Y) - // Im(result)=Re(X)*Im(Y)-Im(X)*Re(Y) - return std::complex( - BlasConnector::dot(n, x, incX2, y, incY2, device_type) + dot(n, x+1, incX2, y+1, incY2, device_type), - BlasConnector::dot(n, x, incX2, y+1, incY2, device_type) - dot(n, x+1, incX2, y, incY2, device_type)); - } - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -std::complex BlasConnector::dotc(const int n, const std::complex*const X, const int incX, const std::complex*const Y, const int incY, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - const int incX2 = 2 * incX; - const int incY2 = 2 * incY; - const double*const x = reinterpret_cast(X); - const double*const y = reinterpret_cast(Y); - // Re(result)=Re(X)*Re(Y)+Im(X)*Im(Y) - // Im(result)=Re(X)*Im(Y)-Im(X)*Re(Y) - return std::complex( - BlasConnector::dot(n, x, incX2, y, incY2, device_type) + dot(n, x+1, incX2, y+1, incY2, device_type), - BlasConnector::dot(n, x, incX2, y+1, incY2, device_type) - dot(n, x+1, incX2, y, incY2, device_type)); - } - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -// C = a * A.? * B.? + b * C -// Row-Major part -void BlasConnector::gemm(const char transa, const char transb, const int m, const int n, const int k, - const float alpha, const float *a, const int lda, const float *b, const int ldb, - const float beta, float *c, const int ldc, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - sgemm_(&transb, &transa, &n, &m, &k, - &alpha, b, &ldb, a, &lda, - &beta, c, &ldc); - } -#ifdef __DSP - else if (device_type == base_device::AbacusDevice_t::DspDevice){ - mtfunc::sgemm_mth_(&transb, &transa, &n, &m, &k, - &alpha, b, &ldb, a, &lda, - &beta, c, &ldc, GlobalV::MY_RANK); - } -#endif -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasOperation_t cutransA = BlasUtils::judge_trans(false, transa, "gemm_op"); - cublasOperation_t cutransB = BlasUtils::judge_trans(false, transb, "gemm_op"); - cublasErrcheck(cublasSgemm(BlasUtils::cublas_handle, cutransA, cutransB, n, m, k, &alpha, b, ldb, a, lda, &beta, c, ldc)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void BlasConnector::gemm(const char transa, - const char transb, - const int m, - const int n, - const int k, - const double alpha, - const double* a, - const int lda, - const double* b, - const int ldb, - const double beta, - double* c, - const int ldc, - base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) - { - dgemm_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc); - } -#ifdef __DSP - else if (device_type == base_device::AbacusDevice_t::DspDevice) - { - mtfunc::dgemm_mth_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc, GlobalV::MY_RANK); - } -#endif - else if (device_type == base_device::AbacusDevice_t::GpuDevice) - { -#ifdef __CUDA - cublasOperation_t cutransA = BlasUtils::judge_trans(false, transa, "gemm_op"); - cublasOperation_t cutransB = BlasUtils::judge_trans(false, transb, "gemm_op"); - cublasErrcheck( - cublasDgemm(BlasUtils::cublas_handle, cutransA, cutransB, n, m, k, &alpha, b, ldb, a, lda, &beta, c, ldc)); -#endif - } - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void BlasConnector::gemm(const char transa, - const char transb, - const int m, - const int n, - const int k, - const std::complex alpha, - const std::complex* a, - const int lda, - const std::complex* b, - const int ldb, - const std::complex beta, - std::complex* c, - const int ldc, - base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) - { - cgemm_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc); - } -#ifdef __DSP - else if (device_type == base_device::AbacusDevice_t::DspDevice) - { - mtfunc::cgemm_mth_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc, GlobalV::MY_RANK); - } -#endif - else if (device_type == base_device::AbacusDevice_t::GpuDevice) - { -#ifdef __CUDA - cublasOperation_t cutransA = BlasUtils::judge_trans(false, transa, "gemm_op"); - cublasOperation_t cutransB = BlasUtils::judge_trans(false, transb, "gemm_op"); - cublasErrcheck(cublasCgemm(BlasUtils::cublas_handle, - cutransA, - cutransB, - n, - m, - k, - (float2*)&alpha, - (float2*)b, - ldb, - (float2*)a, - lda, - (float2*)&beta, - (float2*)c, - ldc)); -#endif - } - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void BlasConnector::gemm(const char transa, - const char transb, - const int m, - const int n, - const int k, - const std::complex alpha, - const std::complex* a, - const int lda, - const std::complex* b, - const int ldb, - const std::complex beta, - std::complex* c, - const int ldc, - base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) - { - zgemm_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc); - } -#ifdef __DSP - else if (device_type == base_device::AbacusDevice_t::DspDevice) - { - mtfunc::zgemm_mth_(&transb, &transa, &n, &m, &k, &alpha, b, &ldb, a, &lda, &beta, c, &ldc, GlobalV::MY_RANK); - } -#endif - else if (device_type == base_device::AbacusDevice_t::GpuDevice) - { -#ifdef __CUDA - cublasOperation_t cutransA = BlasUtils::judge_trans(false, transa, "gemm_op"); - cublasOperation_t cutransB = BlasUtils::judge_trans(false, transb, "gemm_op"); - cublasErrcheck(cublasZgemm(BlasUtils::cublas_handle, - cutransA, - cutransB, - n, - m, - k, - (double2*)&alpha, - (double2*)b, - ldb, - (double2*)a, - lda, - (double2*)&beta, - (double2*)c, - ldc)); -#endif - } - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -// Col-Major part -void BlasConnector::gemm_cm(const char transa, const char transb, const int m, const int n, const int k, - const float alpha, const float *a, const int lda, const float *b, const int ldb, - const float beta, float *c, const int ldc, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - sgemm_(&transa, &transb, &m, &n, &k, - &alpha, a, &lda, b, &ldb, - &beta, c, &ldc); - } -#ifdef __DSP - else if (device_type == base_device::AbacusDevice_t::DspDevice){ - mtfunc::sgemm_mth_(&transb, &transa, &m, &n, &k, - &alpha, a, &lda, b, &ldb, - &beta, c, &ldc, GlobalV::MY_RANK); - } -#endif -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasOperation_t cutransA = BlasUtils::judge_trans(false, transa, "gemm_op"); - cublasOperation_t cutransB = BlasUtils::judge_trans(false, transb, "gemm_op"); - cublasErrcheck(cublasSgemm(BlasUtils::cublas_handle, cutransA, cutransB, m, n, k, &alpha, a, lda, b, ldb, &beta, c, ldc)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void BlasConnector::gemm_cm(const char transa, - const char transb, - const int m, - const int n, - const int k, - const double alpha, - const double* a, - const int lda, - const double* b, - const int ldb, - const double beta, - double* c, - const int ldc, - base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) - { - dgemm_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); - } -#ifdef __DSP - else if (device_type == base_device::AbacusDevice_t::DspDevice) - { - mtfunc::dgemm_mth_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc, GlobalV::MY_RANK); - } -#endif -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) - { - cublasOperation_t cutransA = BlasUtils::judge_trans(false, transa, "gemm_op"); - cublasOperation_t cutransB = BlasUtils::judge_trans(false, transb, "gemm_op"); - cublasErrcheck( - cublasDgemm(BlasUtils::cublas_handle, cutransA, cutransB, m, n, k, &alpha, a, lda, b, ldb, &beta, c, ldc)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void BlasConnector::gemm_cm(const char transa, - const char transb, - const int m, - const int n, - const int k, - const std::complex alpha, - const std::complex* a, - const int lda, - const std::complex* b, - const int ldb, - const std::complex beta, - std::complex* c, - const int ldc, - base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) - { - cgemm_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); - } -#ifdef __DSP - else if (device_type == base_device::AbacusDevice_t::DspDevice) - { - mtfunc::cgemm_mth_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc, GlobalV::MY_RANK); - } -#endif -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) - { - cublasOperation_t cutransA = BlasUtils::judge_trans(false, transa, "gemm_op"); - cublasOperation_t cutransB = BlasUtils::judge_trans(false, transb, "gemm_op"); - cublasErrcheck(cublasCgemm(BlasUtils::cublas_handle, - cutransA, - cutransB, - m, - n, - k, - (float2*)&alpha, - (float2*)a, - lda, - (float2*)b, - ldb, - (float2*)&beta, - (float2*)c, - ldc)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void BlasConnector::gemm_cm(const char transa, - const char transb, - const int m, - const int n, - const int k, - const std::complex alpha, - const std::complex* a, - const int lda, - const std::complex* b, - const int ldb, - const std::complex beta, - std::complex* c, - const int ldc, - base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) - { - zgemm_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); - } -#ifdef __DSP - else if (device_type == base_device::AbacusDevice_t::DspDevice) - { - mtfunc::zgemm_mth_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc, GlobalV::MY_RANK); - } -#endif -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) - { - cublasOperation_t cutransA = BlasUtils::judge_trans(false, transa, "gemm_op"); - cublasOperation_t cutransB = BlasUtils::judge_trans(false, transb, "gemm_op"); - cublasErrcheck(cublasZgemm(BlasUtils::cublas_handle, - cutransA, - cutransB, - m, - n, - k, - (double2*)&alpha, - (double2*)a, - lda, - (double2*)b, - ldb, - (double2*)&beta, - (double2*)c, - ldc)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -// Symm and Hemm part. Only col-major is supported. - -void BlasConnector::symm_cm(const char side, const char uplo, const int m, const int n, - const float alpha, const float *a, const int lda, const float *b, const int ldb, - const float beta, float *c, const int ldc, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - ssymm_(&side, &uplo, &m, &n, - &alpha, a, &lda, b, &ldb, - &beta, c, &ldc); - } -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasSideMode_t sideMode = BlasUtils::judge_side(side); - cublasFillMode_t fillMode = BlasUtils::judge_fill(uplo); - cublasErrcheck(cublasSsymm(BlasUtils::cublas_handle, sideMode, fillMode, m, n, &alpha, a, lda, b, ldb, &beta, c, ldc)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void BlasConnector::symm_cm(const char side, const char uplo, const int m, const int n, - const double alpha, const double *a, const int lda, const double *b, const int ldb, - const double beta, double *c, const int ldc, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - dsymm_(&side, &uplo, &m, &n, - &alpha, a, &lda, b, &ldb, - &beta, c, &ldc); - } -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasSideMode_t sideMode = BlasUtils::judge_side(side); - cublasFillMode_t fillMode = BlasUtils::judge_fill(uplo); - cublasErrcheck(cublasDsymm(BlasUtils::cublas_handle, sideMode, fillMode, m, n, &alpha, a, lda, b, ldb, &beta, c, ldc)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void BlasConnector::symm_cm(const char side, const char uplo, const int m, const int n, - const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, - const std::complex beta, std::complex *c, const int ldc, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - csymm_(&side, &uplo, &m, &n, - &alpha, a, &lda, b, &ldb, - &beta, c, &ldc); - } -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasSideMode_t sideMode = BlasUtils::judge_side(side); - cublasFillMode_t fillMode = BlasUtils::judge_fill(uplo); - cublasErrcheck(cublasCsymm(BlasUtils::cublas_handle, sideMode, fillMode, m, n, (float2*)&alpha, (float2*)a, lda, (float2*)b, ldb, (float2*)&beta, (float2*)c, ldc)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void BlasConnector::symm_cm(const char side, const char uplo, const int m, const int n, - const std::complex alpha, const std::complex *a, const int lda, const std::complex *b, const int ldb, - const std::complex beta, std::complex *c, const int ldc, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - zsymm_(&side, &uplo, &m, &n, - &alpha, a, &lda, b, &ldb, - &beta, c, &ldc); - } -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasSideMode_t sideMode = BlasUtils::judge_side(side); - cublasFillMode_t fillMode = BlasUtils::judge_fill(uplo); - cublasErrcheck(cublasZsymm(BlasUtils::cublas_handle, sideMode, fillMode, m, n, (double2*)&alpha, (double2*)a, lda, (double2*)b, ldb, (double2*)&beta, (double2*)c, ldc)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void BlasConnector::hemm_cm(const char side, const char uplo, const int m, const int n, - const float alpha, const float *a, const int lda, const float *b, const int ldb, - const float beta, float *c, const int ldc, base_device::AbacusDevice_t device_type) -{ - symm_cm(side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc, device_type); -} - -void BlasConnector::hemm_cm(const char side, const char uplo, const int m, const int n, - const double alpha, const double *a, const int lda, const double *b, const int ldb, - const double beta, double *c, const int ldc, base_device::AbacusDevice_t device_type) -{ - symm_cm(side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc, device_type); -} - -void BlasConnector::hemm_cm(char side, char uplo, int m, int n, - std::complex alpha, std::complex *a, int lda, std::complex *b, int ldb, - std::complex beta, std::complex *c, int ldc, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - chemm_(&side, &uplo, &m, &n, - &alpha, a, &lda, b, &ldb, - &beta, c, &ldc); - } -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasSideMode_t sideMode = BlasUtils::judge_side(side); - cublasFillMode_t fillMode = BlasUtils::judge_fill(uplo); - cublasErrcheck(cublasChemm(BlasUtils::cublas_handle, sideMode, fillMode, m, n, (float2*)&alpha, (float2*)a, lda, (float2*)b, ldb, (float2*)&beta, (float2*)c, ldc)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void BlasConnector::hemm_cm(char side, char uplo, int m, int n, - std::complex alpha, std::complex *a, int lda, std::complex *b, int ldb, - std::complex beta, std::complex *c, int ldc, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - zhemm_(&side, &uplo, &m, &n, - &alpha, a, &lda, b, &ldb, - &beta, c, &ldc); - } -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasSideMode_t sideMode = BlasUtils::judge_side(side); - cublasFillMode_t fillMode = BlasUtils::judge_fill(uplo); - cublasErrcheck(cublasZhemm(BlasUtils::cublas_handle, sideMode, fillMode, m, n, (double2*)&alpha, (double2*)a, lda, (double2*)b, ldb, (double2*)&beta, (double2*)c, ldc)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void BlasConnector::gemv(const char trans, const int m, const int n, - const float alpha, const float* A, const int lda, const float* X, const int incx, - const float beta, float* Y, const int incy, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - sgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); - } -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasOperation_t cutransA = BlasUtils::judge_trans(false, trans, "gemv_op"); - cublasErrcheck(cublasSgemv(BlasUtils::cublas_handle, cutransA, m, n, &alpha, A, lda, X, incx, &beta, Y, incy)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void BlasConnector::gemv(const char trans, const int m, const int n, - const double alpha, const double* A, const int lda, const double* X, const int incx, - const double beta, double* Y, const int incy, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - dgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); - } -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cublasOperation_t cutransA = BlasUtils::judge_trans(false, trans, "gemv_op"); - cublasErrcheck(cublasDgemv(BlasUtils::cublas_handle, cutransA, m, n, &alpha, A, lda, X, incx, &beta, Y, incy)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void BlasConnector::gemv(const char trans, const int m, const int n, - const std::complex alpha, const std::complex *A, const int lda, const std::complex *X, const int incx, - const std::complex beta, std::complex *Y, const int incy, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - cgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); - } -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cuFloatComplex alpha_cu = make_cuFloatComplex(alpha.real(), alpha.imag()); - cuFloatComplex beta_cu = make_cuFloatComplex(beta.real(), beta.imag()); - cublasOperation_t cutransA = BlasUtils::judge_trans(true, trans, "gemv_op"); - cublasErrcheck(cublasCgemv(BlasUtils::cublas_handle, cutransA, m, n, &alpha_cu, (cuFloatComplex*)A, lda, (cuFloatComplex*)X, incx, &beta_cu, (cuFloatComplex*)Y, incy)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void BlasConnector::gemv(const char trans, const int m, const int n, - const std::complex alpha, const std::complex *A, const int lda, const std::complex *X, const int incx, - const std::complex beta, std::complex *Y, const int incy, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - zgemv_(&trans, &m, &n, &alpha, A, &lda, X, &incx, &beta, Y, &incy); - } -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - cuDoubleComplex alpha_cu = make_cuDoubleComplex(alpha.real(), alpha.imag()); - cuDoubleComplex beta_cu = make_cuDoubleComplex(beta.real(), beta.imag()); - cublasOperation_t cutransA = BlasUtils::judge_trans(true, trans, "gemv_op"); - cublasErrcheck(cublasZgemv(BlasUtils::cublas_handle, cutransA, m, n, &alpha_cu, (cuDoubleComplex*)A, lda, (cuDoubleComplex*)X, incx, &beta_cu, (cuDoubleComplex*)Y, incy)); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -// out = ||x||_2 -float BlasConnector::nrm2( const int n, const float *X, const int incX, base_device::AbacusDevice_t device_type ) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - return snrm2_( &n, X, &incX ); - } -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice){ - float result = 0.0; - cublasErrcheck(cublasSnrm2(BlasUtils::cublas_handle, n, X, incX, &result)); - return result; - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - - -double BlasConnector::nrm2( const int n, const double *X, const int incX, base_device::AbacusDevice_t device_type ) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - return dnrm2_( &n, X, &incX ); - } -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice){ - double result = 0.0; - cublasErrcheck(cublasDnrm2(BlasUtils::cublas_handle, n, X, incX, &result)); - return result; - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - - -double BlasConnector::nrm2( const int n, const std::complex *X, const int incX, base_device::AbacusDevice_t device_type ) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - return dznrm2_( &n, X, &incX ); - } -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice){ - double result = 0.0; - cublasErrcheck(cublasDznrm2(BlasUtils::cublas_handle, n, (double2*)X, incX, &result)); - return result; - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -// copies a into b -void BlasConnector::copy(const long n, const double *a, const int incx, double *b, const int incy, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - dcopy_(&n, a, &incx, b, &incy); - } - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void BlasConnector::copy(const long n, const std::complex *a, const int incx, std::complex *b, const int incy, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::AbacusDevice_t::CpuDevice) { - zcopy_(&n, a, &incx, b, &incy); - } - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - - -template -void vector_mul_vector(const int& dim, T* result, const T* vector1, const T* vector2, base_device::AbacusDevice_t device_type){ - using Real = typename GetTypeReal::type; - if (device_type == base_device::AbacusDevice_t::CpuDevice) { -#ifdef _OPENMP -#pragma omp parallel for schedule(static, 4096 / sizeof(Real)) -#endif - for (int i = 0; i < dim; i++) - { - result[i] = vector1[i] * vector2[i]; - } - } -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - ModuleBase::vector_mul_vector_op()(dim, result, vector1, vector2); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - - -template -void vector_div_vector(const int& dim, T* result, const T* vector1, const T* vector2, base_device::AbacusDevice_t device_type){ - using Real = typename GetTypeReal::type; - if (device_type == base_device::AbacusDevice_t::CpuDevice) { -#ifdef _OPENMP -#pragma omp parallel for schedule(static, 4096 / sizeof(Real)) -#endif - for (int i = 0; i < dim; i++) - { - result[i] = vector1[i] / vector2[i]; - } - } -#ifdef __CUDA - else if (device_type == base_device::AbacusDevice_t::GpuDevice) { - ModuleBase::vector_div_vector_op()(dim, result, vector1, vector2); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void vector_add_vector(const int& dim, float *result, const float *vector1, const float constant1, const float *vector2, const float constant2, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::CpuDevice){ -#ifdef _OPENMP -#pragma omp parallel for schedule(static, 8192 / sizeof(float)) -#endif - for (int i = 0; i < dim; i++) - { - result[i] = vector1[i] * constant1 + vector2[i] * constant2; - } - } -#ifdef __CUDA - else if (device_type == base_device::GpuDevice) { - ModuleBase::vector_add_vector_op()(dim, result, vector1, constant1, vector2, constant2); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void vector_add_vector(const int& dim, double *result, const double *vector1, const double constant1, const double *vector2, const double constant2, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::CpuDevice){ -#ifdef _OPENMP -#pragma omp parallel for schedule(static, 8192 / sizeof(double)) -#endif - for (int i = 0; i < dim; i++) - { - result[i] = vector1[i] * constant1 + vector2[i] * constant2; - } - } -#ifdef __CUDA - else if (device_type == base_device::GpuDevice) { - ModuleBase::vector_add_vector_op()(dim, result, vector1, constant1, vector2, constant2); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void vector_add_vector(const int& dim, std::complex *result, const std::complex *vector1, const float constant1, const std::complex *vector2, const float constant2, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::CpuDevice){ -#ifdef _OPENMP -#pragma omp parallel for schedule(static, 8192 / sizeof(std::complex)) -#endif - for (int i = 0; i < dim; i++) - { - result[i] = vector1[i] * constant1 + vector2[i] * constant2; - } - } -#ifdef __CUDA - else if (device_type == base_device::GpuDevice) { - ModuleBase::vector_add_vector_op, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} - -void vector_add_vector(const int& dim, std::complex *result, const std::complex *vector1, const double constant1, const std::complex *vector2, const double constant2, base_device::AbacusDevice_t device_type) -{ - if (device_type == base_device::CpuDevice){ -#ifdef _OPENMP -#pragma omp parallel for schedule(static, 8192 / sizeof(std::complex)) -#endif - for (int i = 0; i < dim; i++) - { - result[i] = vector1[i] * constant1 + vector2[i] * constant2; - } - } -#ifdef __CUDA - else if (device_type == base_device::GpuDevice) { - ModuleBase::vector_add_vector_op, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2); - } -#endif - else { - throw std::invalid_argument("device_type = " + std::to_string(device_type) + " in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); - } -} \ No newline at end of file From a175d53c94833a2df5d2c3f5f427eaad3dedf8ae Mon Sep 17 00:00:00 2001 From: Critsium-xy Date: Wed, 30 Apr 2025 12:49:37 +0800 Subject: [PATCH 4/4] FIX MAKEFILE --- source/Makefile.Objects | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/Makefile.Objects b/source/Makefile.Objects index e034d476cb..fef10643cb 100644 --- a/source/Makefile.Objects +++ b/source/Makefile.Objects @@ -126,7 +126,9 @@ OBJS_MAIN=main.o\ OBJS_BASE=abfs-vector3_order.o\ assoc_laguerre.o\ - blas_connector.o\ + blas_connector_base.o\ + blas_connector_vector.o\ + blas_connector_matrix.o\ complexarray.o\ complexmatrix.o\ clebsch_gordan_coeff.o\