Skip to content

Commit d5e9b6c

Browse files
committed
Add initializer and GPU tests
1 parent bc8ed09 commit d5e9b6c

File tree

2 files changed

+168
-0
lines changed

2 files changed

+168
-0
lines changed

source/module_base/blas_connector.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,15 @@ class BlasConnector
327327
void vector_add_vector(const int& dim, std::complex<double> *result, const std::complex<double> *vector1, const double constant1, const std::complex<double> *vector2, const double constant2, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice);
328328
};
329329

330+
#ifdef __CUDA
331+
332+
namespace BlasUtils{
333+
void createGpuBlasHandle();
334+
void destoryBLAShandle();
335+
}
336+
337+
#endif
338+
330339
// If GATHER_INFO is defined, the original function is replaced with a "i" suffix,
331340
// preventing changes on the original code.
332341
// The real function call is at gather_math_lib_info.cpp

source/module_base/test/blas_connector_test.cpp

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,33 @@ TEST(blas_connector, Scal) {
9393
}
9494
}
9595

96+
#ifdef __CUDA
97+
98+
TEST(blas_connector, ScalGpu) {
99+
const int size = 8;
100+
const std::complex<double> scale = {2, 3};
101+
const int incx = 1;
102+
std::complex<double> result[8], answer[8];
103+
std::complex<double>* result_gpu = nullptr;
104+
resmem_zd_op()(gpu_ctx, result_gpu, 8 * sizeof(std::complex<double>));
105+
for (int i=0; i< size; i++) {
106+
result[i] = std::complex<double>{static_cast<double>(std::rand() / double(RAND_MAX)),
107+
static_cast<double>(std::rand() / double(RAND_MAX))};
108+
};
109+
for (int i = 0; i < size; i++)
110+
answer[i] = result[i] * scale;
111+
syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, result_gpu, result, sizeof(std::complex<double>) * 8);
112+
BlasConnector::scal(size,scale,result_gpu,incx,base_device::AbacusDevice_t::GpuDevice);
113+
syncmem_z2z_d2h_op()(cpu_ctx, gpu_ctx, result, result_gpu, sizeof(std::complex<double>) * 8);
114+
delmem_zd_op()(gpu_ctx, result_gpu);
115+
// incx is the spacing between elements if result
116+
for (int i = 0; i < size; i++) {
117+
EXPECT_DOUBLE_EQ(answer[i].real(), result[i].real());
118+
EXPECT_DOUBLE_EQ(answer[i].imag(), result[i].imag());
119+
}
120+
}
121+
122+
#endif
96123

97124
TEST(blas_connector, daxpy_) {
98125
typedef double T;
@@ -136,6 +163,67 @@ TEST(blas_connector, zaxpy_) {
136163
}
137164
}
138165

166+
TEST(blas_connector, Axpy) {
167+
typedef std::complex<double> T;
168+
const int size = 8;
169+
const T scale = {2, 3};
170+
const int incx = 1;
171+
const int incy = 1;
172+
std::array<T, size> x_const, result, answer;
173+
std::generate(x_const.begin(), x_const.end(), []() {
174+
return T{static_cast<double>(std::rand() / double(RAND_MAX)),
175+
static_cast<double>(std::rand() / double(RAND_MAX))};
176+
});
177+
std::generate(result.begin(), result.end(), []() {
178+
return T{static_cast<double>(std::rand() / double(RAND_MAX)),
179+
static_cast<double>(std::rand() / double(RAND_MAX))};
180+
});
181+
for (int i = 0; i < size; i++)
182+
answer[i] = x_const[i] * scale + result[i];
183+
BlasConnector::axpy(size, scale, x_const.data(), incx, result.data(), incy);
184+
for (int i = 0; i < size; i++) {
185+
EXPECT_DOUBLE_EQ(answer[i].real(), result[i].real());
186+
EXPECT_DOUBLE_EQ(answer[i].imag(), result[i].imag());
187+
}
188+
}
189+
190+
#ifdef __CUDA
191+
192+
TEST(blas_connector, AxpyGpu) {
193+
typedef std::complex<double> T;
194+
const int size = 8;
195+
const T scale = {2, 3};
196+
const int incx = 1;
197+
const int incy = 1;
198+
std::array<T, size> x_const, result, answer;
199+
T* x_gpu = nullptr;
200+
T* result_gpu = nullptr;
201+
resmem_zd_op()(gpu_ctx, x_gpu, size * sizeof(std::complex<double>));
202+
resmem_zd_op()(gpu_ctx, result_gpu, size * sizeof(std::complex<double>));
203+
std::generate(x_const.begin(), x_const.end(), []() {
204+
return T{static_cast<double>(std::rand() / double(RAND_MAX)),
205+
static_cast<double>(std::rand() / double(RAND_MAX))};
206+
});
207+
std::generate(result.begin(), result.end(), []() {
208+
return T{static_cast<double>(std::rand() / double(RAND_MAX)),
209+
static_cast<double>(std::rand() / double(RAND_MAX))};
210+
});
211+
for (int i = 0; i < size; i++)
212+
answer[i] = x_const[i] * scale + result[i];
213+
syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, result_gpu, result.data(), sizeof(std::complex<double>) * size);
214+
syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, x_gpu, x_const.data(), sizeof(std::complex<double>) * size);
215+
BlasConnector::axpy(size, scale, x_gpu, incx, result_gpu, incy, base_device::AbacusDevice_t::GpuDevice);
216+
syncmem_z2z_d2h_op()(cpu_ctx, gpu_ctx, result.data(), result_gpu, sizeof(std::complex<double>) * size);
217+
delmem_zd_op()(gpu_ctx, result_gpu);
218+
delmem_zd_op()(gpu_ctx, x_gpu);
219+
for (int i = 0; i < size; i++) {
220+
EXPECT_DOUBLE_EQ(answer[i].real(), result[i].real());
221+
EXPECT_DOUBLE_EQ(answer[i].imag(), result[i].imag());
222+
}
223+
}
224+
225+
#endif
226+
139227
TEST(blas_connector, dcopy_) {
140228
typedef double T;
141229
long const size = 8;
@@ -532,7 +620,78 @@ TEST(blas_connector, Gemm) {
532620
}
533621
}
534622

623+
#ifdef __CUDA
624+
625+
TEST(blas_connector, GemmGpu) {
626+
typedef std::complex<double> T;
627+
const char transa_m = 'N';
628+
const char transb_m = 'N';
629+
const int size_m = 3;
630+
const int size_n = 4;
631+
const int size_k = 5;
632+
const T alpha_const = {2, 3};
633+
const T beta_const = {3, 4};
634+
const int lda = 6;
635+
const int ldb = 5;
636+
const int ldc = 4;
637+
std::array<T, size_k * lda> a_const;
638+
std::array<T, size_n * ldb> b_const;
639+
std::array<T, size_n * ldc> c_dot{}, answer, result;
640+
std::complex<double>* a_gpu = nullptr;
641+
std::complex<double>* b_gpu = nullptr;
642+
std::complex<double>* result_gpu = nullptr;
643+
resmem_zd_op()(gpu_ctx, a_gpu, size_k * lda * sizeof(std::complex<double>));
644+
resmem_zd_op()(gpu_ctx, b_gpu, size_n * ldb * sizeof(std::complex<double>));
645+
resmem_zd_op()(gpu_ctx, result_gpu, size_n * ldc * sizeof(std::complex<double>));
646+
std::generate(a_const.begin(), a_const.end(), []() {
647+
return T{static_cast<double>(std::rand() / double(RAND_MAX)),
648+
static_cast<double>(std::rand() / double(RAND_MAX))};
649+
});
650+
std::generate(b_const.begin(), b_const.end(), []() {
651+
return T{static_cast<double>(std::rand() / double(RAND_MAX)),
652+
static_cast<double>(std::rand() / double(RAND_MAX))};
653+
});
654+
std::generate(result.begin(), result.end(), []() {
655+
return T{static_cast<double>(std::rand() / double(RAND_MAX)),
656+
static_cast<double>(std::rand() / double(RAND_MAX))};
657+
});
658+
for (int i = 0; i < size_m; i++) {
659+
for (int j = 0; j < size_n; j++) {
660+
for (int k = 0; k < size_k; k++) {
661+
c_dot[i + j * ldc] +=
662+
a_const[i + k * lda] * b_const[k + j * ldb];
663+
}
664+
answer[i + j * ldc] = alpha_const * c_dot[i + j * ldc] +
665+
beta_const * result[i + j * ldc];
666+
}
667+
}
668+
syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, a_gpu, a_const.data(), sizeof(std::complex<double>) * size_k * lda);
669+
syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, b_gpu, b_const.data(), sizeof(std::complex<double>) * size_n * ldb);
670+
syncmem_z2z_h2d_op()(gpu_ctx, cpu_ctx, result_gpu, result.data(), sizeof(std::complex<double>) * size_n * ldc);
671+
BlasConnector::gemm_cm(transa_m, transb_m, size_m, size_n, size_k, alpha_const,
672+
a_gpu, lda, b_gpu, ldb, beta_const,
673+
result_gpu, ldc, base_device::AbacusDevice_t::GpuDevice);
674+
syncmem_z2z_d2h_op()(cpu_ctx, gpu_ctx, result.data(), result_gpu, sizeof(std::complex<double>) * size_n * ldc);
675+
delmem_zd_op()(gpu_ctx, result_gpu);
676+
delmem_zd_op()(gpu_ctx, a_gpu);
677+
delmem_zd_op()(gpu_ctx, b_gpu);
678+
for (int i = 0; i < size_m; i++)
679+
for (int j = 0; j < size_n; j++) {
680+
EXPECT_DOUBLE_EQ(answer[i + j * ldc].real(),
681+
result[i + j * ldc].real());
682+
EXPECT_DOUBLE_EQ(answer[i + j * ldc].imag(),
683+
result[i + j * ldc].imag());
684+
}
685+
}
686+
687+
#endif
688+
535689
int main(int argc, char **argv) {
690+
#ifdef __CUDA
691+
std::cout << "Initializing CublasHandle..." << std::endl;
692+
BlasUtils::createGpuBlasHandle();
693+
std::cout << "Initializing CublasHandle Done." << std::endl;
694+
#endif
536695
testing::InitGoogleTest(&argc, argv);
537696
return RUN_ALL_TESTS();
538697
}

0 commit comments

Comments
 (0)