@@ -93,6 +93,33 @@ TEST(blas_connector, Scal) {
9393 }
9494}
9595
96+ #ifdef __CUDA
97+
98+ TEST (blas_connector, ScalGpu) {
99+ const int size = 8 ;
100+ const std::complex <double > scale = {2 , 3 };
101+ const int incx = 1 ;
102+ std::complex <double > result[8 ], answer[8 ];
103+ std::complex <double >* result_gpu = nullptr ;
104+ resmem_zd_op ()(gpu_ctx, result_gpu, 8 * sizeof (std::complex <double >));
105+ for (int i=0 ; i< size; i++) {
106+ result[i] = std::complex <double >{static_cast <double >(std::rand () / double (RAND_MAX)),
107+ static_cast <double >(std::rand () / double (RAND_MAX))};
108+ };
109+ for (int i = 0 ; i < size; i++)
110+ answer[i] = result[i] * scale;
111+ syncmem_z2z_h2d_op ()(gpu_ctx, cpu_ctx, result_gpu, result, sizeof (std::complex <double >) * 8 );
112+ BlasConnector::scal (size,scale,result_gpu,incx,base_device::AbacusDevice_t::GpuDevice);
113+ syncmem_z2z_d2h_op ()(cpu_ctx, gpu_ctx, result, result_gpu, sizeof (std::complex <double >) * 8 );
114+ delmem_zd_op ()(gpu_ctx, result_gpu);
115+ // incx is the spacing between elements if result
116+ for (int i = 0 ; i < size; i++) {
117+ EXPECT_DOUBLE_EQ (answer[i].real (), result[i].real ());
118+ EXPECT_DOUBLE_EQ (answer[i].imag (), result[i].imag ());
119+ }
120+ }
121+
122+ #endif
96123
97124TEST (blas_connector, daxpy_) {
98125 typedef double T;
@@ -136,6 +163,67 @@ TEST(blas_connector, zaxpy_) {
136163 }
137164}
138165
166+ TEST (blas_connector, Axpy) {
167+ typedef std::complex <double > T;
168+ const int size = 8 ;
169+ const T scale = {2 , 3 };
170+ const int incx = 1 ;
171+ const int incy = 1 ;
172+ std::array<T, size> x_const, result, answer;
173+ std::generate (x_const.begin (), x_const.end (), []() {
174+ return T{static_cast <double >(std::rand () / double (RAND_MAX)),
175+ static_cast <double >(std::rand () / double (RAND_MAX))};
176+ });
177+ std::generate (result.begin (), result.end (), []() {
178+ return T{static_cast <double >(std::rand () / double (RAND_MAX)),
179+ static_cast <double >(std::rand () / double (RAND_MAX))};
180+ });
181+ for (int i = 0 ; i < size; i++)
182+ answer[i] = x_const[i] * scale + result[i];
183+ BlasConnector::axpy (size, scale, x_const.data (), incx, result.data (), incy);
184+ for (int i = 0 ; i < size; i++) {
185+ EXPECT_DOUBLE_EQ (answer[i].real (), result[i].real ());
186+ EXPECT_DOUBLE_EQ (answer[i].imag (), result[i].imag ());
187+ }
188+ }
189+
190+ #ifdef __CUDA
191+
192+ TEST (blas_connector, AxpyGpu) {
193+ typedef std::complex <double > T;
194+ const int size = 8 ;
195+ const T scale = {2 , 3 };
196+ const int incx = 1 ;
197+ const int incy = 1 ;
198+ std::array<T, size> x_const, result, answer;
199+ T* x_gpu = nullptr ;
200+ T* result_gpu = nullptr ;
201+ resmem_zd_op ()(gpu_ctx, x_gpu, size * sizeof (std::complex <double >));
202+ resmem_zd_op ()(gpu_ctx, result_gpu, size * sizeof (std::complex <double >));
203+ std::generate (x_const.begin (), x_const.end (), []() {
204+ return T{static_cast <double >(std::rand () / double (RAND_MAX)),
205+ static_cast <double >(std::rand () / double (RAND_MAX))};
206+ });
207+ std::generate (result.begin (), result.end (), []() {
208+ return T{static_cast <double >(std::rand () / double (RAND_MAX)),
209+ static_cast <double >(std::rand () / double (RAND_MAX))};
210+ });
211+ for (int i = 0 ; i < size; i++)
212+ answer[i] = x_const[i] * scale + result[i];
213+ syncmem_z2z_h2d_op ()(gpu_ctx, cpu_ctx, result_gpu, result.data (), sizeof (std::complex <double >) * size);
214+ syncmem_z2z_h2d_op ()(gpu_ctx, cpu_ctx, x_gpu, x_const.data (), sizeof (std::complex <double >) * size);
215+ BlasConnector::axpy (size, scale, x_gpu, incx, result_gpu, incy, base_device::AbacusDevice_t::GpuDevice);
216+ syncmem_z2z_d2h_op ()(cpu_ctx, gpu_ctx, result.data (), result_gpu, sizeof (std::complex <double >) * size);
217+ delmem_zd_op ()(gpu_ctx, result_gpu);
218+ delmem_zd_op ()(gpu_ctx, x_gpu);
219+ for (int i = 0 ; i < size; i++) {
220+ EXPECT_DOUBLE_EQ (answer[i].real (), result[i].real ());
221+ EXPECT_DOUBLE_EQ (answer[i].imag (), result[i].imag ());
222+ }
223+ }
224+
225+ #endif
226+
139227TEST (blas_connector, dcopy_) {
140228 typedef double T;
141229 long const size = 8 ;
@@ -532,7 +620,78 @@ TEST(blas_connector, Gemm) {
532620 }
533621}
534622
623+ #ifdef __CUDA
624+
625+ TEST (blas_connector, GemmGpu) {
626+ typedef std::complex <double > T;
627+ const char transa_m = ' N' ;
628+ const char transb_m = ' N' ;
629+ const int size_m = 3 ;
630+ const int size_n = 4 ;
631+ const int size_k = 5 ;
632+ const T alpha_const = {2 , 3 };
633+ const T beta_const = {3 , 4 };
634+ const int lda = 6 ;
635+ const int ldb = 5 ;
636+ const int ldc = 4 ;
637+ std::array<T, size_k * lda> a_const;
638+ std::array<T, size_n * ldb> b_const;
639+ std::array<T, size_n * ldc> c_dot{}, answer, result;
640+ std::complex <double >* a_gpu = nullptr ;
641+ std::complex <double >* b_gpu = nullptr ;
642+ std::complex <double >* result_gpu = nullptr ;
643+ resmem_zd_op ()(gpu_ctx, a_gpu, size_k * lda * sizeof (std::complex <double >));
644+ resmem_zd_op ()(gpu_ctx, b_gpu, size_n * ldb * sizeof (std::complex <double >));
645+ resmem_zd_op ()(gpu_ctx, result_gpu, size_n * ldc * sizeof (std::complex <double >));
646+ std::generate (a_const.begin (), a_const.end (), []() {
647+ return T{static_cast <double >(std::rand () / double (RAND_MAX)),
648+ static_cast <double >(std::rand () / double (RAND_MAX))};
649+ });
650+ std::generate (b_const.begin (), b_const.end (), []() {
651+ return T{static_cast <double >(std::rand () / double (RAND_MAX)),
652+ static_cast <double >(std::rand () / double (RAND_MAX))};
653+ });
654+ std::generate (result.begin (), result.end (), []() {
655+ return T{static_cast <double >(std::rand () / double (RAND_MAX)),
656+ static_cast <double >(std::rand () / double (RAND_MAX))};
657+ });
658+ for (int i = 0 ; i < size_m; i++) {
659+ for (int j = 0 ; j < size_n; j++) {
660+ for (int k = 0 ; k < size_k; k++) {
661+ c_dot[i + j * ldc] +=
662+ a_const[i + k * lda] * b_const[k + j * ldb];
663+ }
664+ answer[i + j * ldc] = alpha_const * c_dot[i + j * ldc] +
665+ beta_const * result[i + j * ldc];
666+ }
667+ }
668+ syncmem_z2z_h2d_op ()(gpu_ctx, cpu_ctx, a_gpu, a_const.data (), sizeof (std::complex <double >) * size_k * lda);
669+ syncmem_z2z_h2d_op ()(gpu_ctx, cpu_ctx, b_gpu, b_const.data (), sizeof (std::complex <double >) * size_n * ldb);
670+ syncmem_z2z_h2d_op ()(gpu_ctx, cpu_ctx, result_gpu, result.data (), sizeof (std::complex <double >) * size_n * ldc);
671+ BlasConnector::gemm_cm (transa_m, transb_m, size_m, size_n, size_k, alpha_const,
672+ a_gpu, lda, b_gpu, ldb, beta_const,
673+ result_gpu, ldc, base_device::AbacusDevice_t::GpuDevice);
674+ syncmem_z2z_d2h_op ()(cpu_ctx, gpu_ctx, result.data (), result_gpu, sizeof (std::complex <double >) * size_n * ldc);
675+ delmem_zd_op ()(gpu_ctx, result_gpu);
676+ delmem_zd_op ()(gpu_ctx, a_gpu);
677+ delmem_zd_op ()(gpu_ctx, b_gpu);
678+ for (int i = 0 ; i < size_m; i++)
679+ for (int j = 0 ; j < size_n; j++) {
680+ EXPECT_DOUBLE_EQ (answer[i + j * ldc].real (),
681+ result[i + j * ldc].real ());
682+ EXPECT_DOUBLE_EQ (answer[i + j * ldc].imag (),
683+ result[i + j * ldc].imag ());
684+ }
685+ }
686+
687+ #endif
688+
535689int main (int argc, char **argv) {
690+ #ifdef __CUDA
691+ std::cout << " Initializing CublasHandle..." << std::endl;
692+ BlasUtils::createGpuBlasHandle ();
693+ std::cout << " Initializing CublasHandle Done." << std::endl;
694+ #endif
536695 testing::InitGoogleTest (&argc, argv);
537696 return RUN_ALL_TESTS ();
538697}
0 commit comments