Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions source/module_base/blas_connector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -668,7 +668,7 @@ void vector_mul_vector(const int& dim, T* result, const T* vector1, const T* vec
}
else if (device_type == base_device::AbacusDevice_t::GpuDevice){
#ifdef __CUDA
ModuleBase::vector_mul_vector_op<T, base_device::DEVICE_GPU>()(gpu_ctx, dim, result, vector1, vector2);
ModuleBase::vector_mul_vector_op<T, base_device::DEVICE_GPU>()(dim, result, vector1, vector2);
#endif
}
}
Expand All @@ -688,7 +688,7 @@ void vector_div_vector(const int& dim, T* result, const T* vector1, const T* vec
}
else if (device_type == base_device::AbacusDevice_t::GpuDevice){
#ifdef __CUDA
ModuleBase::vector_div_vector_op<T, base_device::DEVICE_GPU>()(gpu_ctx, dim, result, vector1, vector2);
ModuleBase::vector_div_vector_op<T, base_device::DEVICE_GPU>()(dim, result, vector1, vector2);
#endif
}
}
Expand All @@ -706,7 +706,7 @@ void vector_add_vector(const int& dim, float *result, const float *vector1, cons
}
else if (device_type == base_device::GpuDevice){
#ifdef __CUDA
ModuleBase::constantvector_addORsub_constantVector_op<float, base_device::DEVICE_GPU>()(gpu_ctx, dim, result, vector1, constant1, vector2, constant2);
ModuleBase::constantvector_addORsub_constantVector_op<float, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2);
#endif
}
}
Expand All @@ -724,7 +724,7 @@ void vector_add_vector(const int& dim, double *result, const double *vector1, co
}
else if (device_type == base_device::GpuDevice){
#ifdef __CUDA
ModuleBase::constantvector_addORsub_constantVector_op<double, base_device::DEVICE_GPU>()(gpu_ctx, dim, result, vector1, constant1, vector2, constant2);
ModuleBase::constantvector_addORsub_constantVector_op<double, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2);
#endif
}
}
Expand All @@ -742,7 +742,7 @@ void vector_add_vector(const int& dim, std::complex<float> *result, const std::c
}
else if (device_type == base_device::GpuDevice){
#ifdef __CUDA
ModuleBase::constantvector_addORsub_constantVector_op<std::complex<float>, base_device::DEVICE_GPU>()(gpu_ctx, dim, result, vector1, constant1, vector2, constant2);
ModuleBase::constantvector_addORsub_constantVector_op<std::complex<float>, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2);
#endif
}
}
Expand All @@ -760,7 +760,7 @@ void vector_add_vector(const int& dim, std::complex<double> *result, const std::
}
else if (device_type == base_device::GpuDevice){
#ifdef __CUDA
ModuleBase::constantvector_addORsub_constantVector_op<std::complex<double>, base_device::DEVICE_GPU>()(gpu_ctx, dim, result, vector1, constant1, vector2, constant2);
ModuleBase::constantvector_addORsub_constantVector_op<std::complex<double>, base_device::DEVICE_GPU>()(dim, result, vector1, constant1, vector2, constant2);
#endif
}
}
111 changes: 38 additions & 73 deletions source/module_base/kernels/cuda/math_kernel_op.cu

Large diffs are not rendered by default.

35 changes: 13 additions & 22 deletions source/module_base/kernels/math_kernel_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,7 @@ struct calc_grad_with_block_op<T, base_device::DEVICE_CPU>
template <typename FPTYPE>
struct dot_real_op<FPTYPE, base_device::DEVICE_CPU>
{
FPTYPE operator()(const base_device::DEVICE_CPU* d,
const int& dim,
FPTYPE operator()(const int& dim,
const FPTYPE* psi_L,
const FPTYPE* psi_R,
const bool reduce)
Expand All @@ -129,8 +128,7 @@ struct dot_real_op<FPTYPE, base_device::DEVICE_CPU>
template <typename FPTYPE>
struct dot_real_op<std::complex<FPTYPE>, base_device::DEVICE_CPU>
{
FPTYPE operator()(const base_device::DEVICE_CPU* d,
const int& dim,
FPTYPE operator()(const int& dim,
const std::complex<FPTYPE>* psi_L,
const std::complex<FPTYPE>* psi_R,
const bool reduce)
Expand All @@ -153,7 +151,7 @@ template <typename T>
struct vector_div_constant_op<T, base_device::DEVICE_CPU>
{
using Real = typename GetTypeReal<T>::type;
void operator()(const base_device::DEVICE_CPU* d, const int dim, T* result, const T* vector, const Real constant)
void operator()(const int dim, T* result, const T* vector, const Real constant)
{
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 4096 / sizeof(Real))
Expand All @@ -169,7 +167,7 @@ template <typename T>
struct vector_mul_vector_op<T, base_device::DEVICE_CPU>
{
using Real = typename GetTypeReal<T>::type;
void operator()(const base_device::DEVICE_CPU* d, const int& dim, T* result, const T* vector1, const Real* vector2)
void operator()(const int& dim, T* result, const T* vector1, const Real* vector2)
{
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 4096 / sizeof(Real))
Expand All @@ -185,7 +183,7 @@ template <typename T>
struct vector_div_vector_op<T, base_device::DEVICE_CPU>
{
using Real = typename GetTypeReal<T>::type;
void operator()(const base_device::DEVICE_CPU* d, const int& dim, T* result, const T* vector1, const Real* vector2)
void operator()(const int& dim, T* result, const T* vector1, const Real* vector2)
{
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 4096 / sizeof(Real))
Expand All @@ -201,8 +199,7 @@ template <typename T>
struct constantvector_addORsub_constantVector_op<T, base_device::DEVICE_CPU>
{
using Real = typename GetTypeReal<T>::type;
void operator()(const base_device::DEVICE_CPU* d,
const int& dim,
void operator()(const int& dim,
T* result,
const T* vector1,
const Real constant1,
Expand All @@ -222,8 +219,7 @@ struct constantvector_addORsub_constantVector_op<T, base_device::DEVICE_CPU>
template <typename FPTYPE>
struct scal_op<FPTYPE, base_device::DEVICE_CPU>
{
void operator()(const base_device::DEVICE_CPU* /*ctx*/,
const int& N,
void operator()(const int& N,
const std::complex<FPTYPE>* alpha,
std::complex<FPTYPE>* X,
const int& incx)
Expand All @@ -235,8 +231,7 @@ struct scal_op<FPTYPE, base_device::DEVICE_CPU>
template <typename T>
struct gemv_op<T, base_device::DEVICE_CPU>
{
void operator()(const base_device::DEVICE_CPU* d,
const char& trans,
void operator()(const char& trans,
const int& m,
const int& n,
const T* alpha,
Expand All @@ -255,8 +250,7 @@ struct gemv_op<T, base_device::DEVICE_CPU>
template <typename T>
struct axpy_op<T, base_device::DEVICE_CPU>
{
void operator()(const base_device::DEVICE_CPU* /*ctx*/,
const int& dim,
void operator()(const int& dim,
const T* alpha,
const T* X,
const int& incX,
Expand All @@ -270,8 +264,7 @@ struct axpy_op<T, base_device::DEVICE_CPU>
template <typename T>
struct gemm_op<T, base_device::DEVICE_CPU>
{
void operator()(const base_device::DEVICE_CPU* /*ctx*/,
const char& transa,
void operator()(const char& transa,
const char& transb,
const int& m,
const int& n,
Expand All @@ -293,8 +286,7 @@ struct gemm_op<T, base_device::DEVICE_CPU>
template <typename T>
struct gemm_op_mt<T, base_device::DEVICE_CPU>
{
void operator()(const base_device::DEVICE_CPU* /*ctx*/,
const char& transa,
void operator()(const char& transa,
const char& transb,
const int& m,
const int& n,
Expand All @@ -316,8 +308,7 @@ struct gemm_op_mt<T, base_device::DEVICE_CPU>
template <typename T>
struct matrixTranspose_op<T, base_device::DEVICE_CPU>
{
void operator()(const base_device::DEVICE_CPU* d,
const int& row,
void operator()(const int& row,
const int& col,
const T* input_matrix,
T* output_matrix)
Expand Down Expand Up @@ -348,7 +339,7 @@ struct matrixTranspose_op<T, base_device::DEVICE_CPU>
template <typename T>
struct matrixSetToAnother<T, base_device::DEVICE_CPU>
{
void operator()(const base_device::DEVICE_CPU* d, const int& n, const T* A, const int& LDA, T* B, const int& LDB)
void operator()(const int& n, const T* A, const int& LDA, T* B, const int& LDB)
{
#ifdef _OPENMP
#pragma omp parallel for collapse(2) schedule(static, 8192 / sizeof(T))
Expand Down
Loading
Loading