Skip to content

Commit 6994a29

Browse files
authored
[SYCLomatic #1847] Add test for the migration of cublas[Set|Get]Vector[Async]_64 and cublas[Set|Get]Matrix[Async]_64 (#676)
Signed-off-by: Jiang, Zhiwei <[email protected]>
1 parent 139eaf5 commit 6994a29

File tree

3 files changed

+107
-0
lines changed

3 files changed

+107
-0
lines changed

features/feature_case/cublas/cublas_64.cu

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,18 @@ void foo() {
5454
int64_t incx;
5555
int64_t incy;
5656

57+
int64_t elemSize;
58+
cudaStream_t stream;
59+
status = cublasSetVector_64(n, elemSize, A_s, incx, C_s, incy);
60+
status = cublasGetVector_64(n, elemSize, A_s, incx, C_s, incy);
61+
status = cublasSetVectorAsync_64(n, elemSize, A_s, incx, C_s, incy, stream);
62+
status = cublasGetVectorAsync_64(n, elemSize, A_s, incx, C_s, incy, stream);
63+
64+
status = cublasSetMatrix_64(m, n, elemSize, A_s, lda, C_s, ldb);
65+
status = cublasGetMatrix_64(m, n, elemSize, A_s, lda, C_s, ldb);
66+
status = cublasSetMatrixAsync_64(m, n, elemSize, A_s, lda, C_s, ldb, stream);
67+
status = cublasGetMatrixAsync_64(m, n, elemSize, A_s, lda, C_s, ldb, stream);
68+
5769
status = cublasIsamax_64(handle, n, A_s, lda, &result);
5870
status = cublasIdamax_64(handle, n, A_d, lda, &result);
5971
status = cublasIcamax_64(handle, n, A_c, lda, &result);

help_function/help_function.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
<test testName="blas_utils_get_value_usm" configFile="config/TEMPLATE_help_function_blas_usm.xml" splitGroup="double" />
7171
<test testName="blas_utils_parameter_wrapper_buf" configFile="config/TEMPLATE_help_function_blas_usmnone.xml" splitGroup="double"/>
7272
<test testName="blas_utils_parameter_wrapper_usm" configFile="config/TEMPLATE_help_function_blas_usm.xml" splitGroup="double"/>
73+
<test testName="blas_utils_matrix_mem_copy_test" configFile="config/TEMPLATE_help_function_skip_cuda_backend.xml" />
7374
<test testName="util_cast_value_test" configFile="config/TEMPLATE_help_function_skip_double.xml" splitGroup="double" />
7475
<test testName="util_fast_length_test" configFile="config/TEMPLATE_help_function_skip_cuda_backend.xml" />
7576
<test testName="util_make_index_sequence_test" configFile="config/TEMPLATE_help_function_skip_cuda_backend.xml" />
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
// ===------- blas_utils_matrix_mem_copy_test.cpp ---------- *- C++ -* ---=== //
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
// ===--------------------------------------------------------------------=== //
8+
9+
#include <sycl/sycl.hpp>
10+
#include <dpct/dpct.hpp>
11+
#include <dpct/blas_utils.hpp>
12+
13+
const std::int64_t M = 3;
14+
const std::int64_t N = 2;
15+
16+
void matrix_mem_copy_test_1() {
17+
dpct::device_ext &dev_ct1 = dpct::get_current_device();
18+
sycl::queue &q_ct1 = dev_ct1.default_queue();
19+
float *devPtrA;
20+
devPtrA = (float *)sycl::malloc_device(M * N * sizeof(float), q_ct1);
21+
float host_a[6] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
22+
float host_b[6] = {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f};
23+
float host_c[6] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
24+
25+
dpct::blas::matrix_mem_copy(devPtrA, host_a, M, M, M, N, sizeof(float),
26+
dpct::automatic, q_ct1, true);
27+
dpct::blas::matrix_mem_copy(host_b, devPtrA, M, M, M, N, sizeof(float));
28+
29+
for (int i = 0; i < M * N; i++) {
30+
if (fabs(host_b[i] - host_c[i]) > 1e-5) {
31+
printf("matrix_mem_copy_test_1.1 failed\n");
32+
exit(-1);
33+
}
34+
}
35+
36+
// Because to_ld == from_ld, matrix_mem_copy just do one copy.
37+
// All padding data is also copied except the last padding.
38+
float host_d[6] = {-2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f};
39+
float host_e[6] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, -2.0f};
40+
dpct::blas::matrix_mem_copy(host_d, devPtrA, M /*to_ld*/, M /*from_ld*/,
41+
M - 1 /*rows*/, N /*cols*/, sizeof(float));
42+
43+
for (int i = 0; i < M * N; i++) {
44+
if (fabs(host_d[i] - host_e[i]) > 1e-5) {
45+
printf("matrix_mem_copy_test_1.2 failed\n");
46+
exit(-1);
47+
}
48+
}
49+
50+
sycl::free(devPtrA, q_ct1);
51+
}
52+
53+
void matrix_mem_copy_test_2() {
54+
dpct::device_ext &dev_ct1 = dpct::get_current_device();
55+
sycl::queue &q_ct1 = dev_ct1.default_queue();
56+
57+
float *devPtrA;
58+
devPtrA = (float *)sycl::malloc_device(M * N * sizeof(float), q_ct1);
59+
float host_a[6] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
60+
float host_b[6] = {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f};
61+
float host_c[6] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
62+
63+
dpct::blas::matrix_mem_copy(devPtrA, host_a, M, M, M, N, sizeof(float),
64+
dpct::automatic, q_ct1, true);
65+
dpct::blas::matrix_mem_copy(host_b, devPtrA, M, M, M, N, sizeof(float));
66+
67+
for (int i = 0; i < M * N; i++) {
68+
if (fabs(host_b[i] - host_c[i]) > 1e-5) {
69+
printf("matrix_mem_copy_test_2.1 failed\n");
70+
exit(-1);
71+
}
72+
}
73+
74+
float host_d[4] = {-2.0f, -2.0f, -2.0f, -2.0f};
75+
float host_e[4] = {1.0f, 2.0f, 4.0f, 5.0f};
76+
dpct::blas::matrix_mem_copy(host_d, devPtrA, M - 1 /*to_ld*/, M /*from_ld*/,
77+
M - 1 /*rows*/, N /*cols*/, sizeof(float));
78+
79+
for (int i = 0; i < (M - 1) * N; i++) {
80+
if (fabs(host_d[i] - host_e[i]) > 1e-5) {
81+
printf("matrix_mem_copy_test_2.2 failed\n");
82+
exit(-1);
83+
}
84+
}
85+
86+
sycl::free(devPtrA, q_ct1);
87+
}
88+
89+
int main() {
90+
matrix_mem_copy_test_1();
91+
matrix_mem_copy_test_2();
92+
printf("matrix_mem_copy_test passed\n");
93+
return 0;
94+
}

0 commit comments

Comments
 (0)