Skip to content

Commit aba7e10

Browse files
authored
[SYCLomatic #1846] Add test case for the migration of cusparseSpSM (#674)
Signed-off-by: Jiang, Zhiwei <[email protected]>
1 parent 95b1461 commit aba7e10

File tree

6 files changed

+798
-1
lines changed

6 files changed

+798
-1
lines changed
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
3+
<test driverID="test_feature" name="TEMPLATE">
4+
<description>test</description>
5+
<files>
6+
<file path="feature_case/cusparse/${testName}.cu" />
7+
</files>
8+
<rules>
9+
<platformRule OSFamily="Linux" kit="CUDA11.3" kitRange="OLDER" runOnThisPlatform="false"/>
10+
<platformRule OSFamily="Windows" kit="CUDA11.3" kitRange="OLDER" runOnThisPlatform="false"/>
11+
<optlevelRule GPUFeature="NOT double" excludeOptlevelNameString="gpu" />
12+
<optlevelRule excludeOptlevelNameString="cuda" />
13+
</rules>
14+
</test>
Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
// ===------- cusparse_9.cu -------------------------------- *- CUDA -* ----===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
// ===----------------------------------------------------------------------===//
8+
9+
#include "cusparse.h"
10+
11+
#include <cmath>
12+
#include <complex>
13+
#include <cstdio>
14+
#include <vector>
15+
16+
template <class d_data_t>
17+
struct Data {
18+
float *h_data;
19+
d_data_t *d_data;
20+
int element_num;
21+
Data(int element_num) : element_num(element_num) {
22+
h_data = (float *)malloc(sizeof(float) * element_num);
23+
memset(h_data, 0, sizeof(float) * element_num);
24+
cudaMalloc(&d_data, sizeof(d_data_t) * element_num);
25+
cudaMemset(d_data, 0, sizeof(d_data_t) * element_num);
26+
}
27+
Data(float *input_data, int element_num) : element_num(element_num) {
28+
h_data = (float *)malloc(sizeof(float) * element_num);
29+
cudaMalloc(&d_data, sizeof(d_data_t) * element_num);
30+
cudaMemset(d_data, 0, sizeof(d_data_t) * element_num);
31+
memcpy(h_data, input_data, sizeof(float) * element_num);
32+
}
33+
~Data() {
34+
free(h_data);
35+
cudaFree(d_data);
36+
}
37+
void H2D() {
38+
d_data_t *h_temp = (d_data_t *)malloc(sizeof(d_data_t) * element_num);
39+
memset(h_temp, 0, sizeof(d_data_t) * element_num);
40+
from_float_convert(h_data, h_temp);
41+
cudaMemcpy(d_data, h_temp, sizeof(d_data_t) * element_num,
42+
cudaMemcpyHostToDevice);
43+
free(h_temp);
44+
}
45+
void D2H() {
46+
d_data_t *h_temp = (d_data_t *)malloc(sizeof(d_data_t) * element_num);
47+
memset(h_temp, 0, sizeof(d_data_t) * element_num);
48+
cudaMemcpy(h_temp, d_data, sizeof(d_data_t) * element_num,
49+
cudaMemcpyDeviceToHost);
50+
to_float_convert(h_temp, h_data);
51+
free(h_temp);
52+
}
53+
54+
private:
55+
inline void from_float_convert(float *in, d_data_t *out) {
56+
for (int i = 0; i < element_num; i++)
57+
out[i] = in[i];
58+
}
59+
inline void to_float_convert(d_data_t *in, float *out) {
60+
for (int i = 0; i < element_num; i++)
61+
out[i] = in[i];
62+
}
63+
};
64+
template <>
65+
inline void Data<float2>::from_float_convert(float *in, float2 *out) {
66+
for (int i = 0; i < element_num; i++)
67+
out[i].x = in[i];
68+
}
69+
template <>
70+
inline void Data<double2>::from_float_convert(float *in, double2 *out) {
71+
for (int i = 0; i < element_num; i++)
72+
out[i].x = in[i];
73+
}
74+
75+
template <>
76+
inline void Data<float2>::to_float_convert(float2 *in, float *out) {
77+
for (int i = 0; i < element_num; i++)
78+
out[i] = in[i].x;
79+
}
80+
template <>
81+
inline void Data<double2>::to_float_convert(double2 *in, float *out) {
82+
for (int i = 0; i < element_num; i++)
83+
out[i] = in[i].x;
84+
}
85+
86+
bool compare_result(float *expect, float *result, int element_num) {
87+
for (int i = 0; i < element_num; i++) {
88+
if (std::abs(result[i] - expect[i]) >= 0.05) {
89+
return false;
90+
}
91+
}
92+
return true;
93+
}
94+
95+
bool compare_result(float *expect, float *result, std::vector<int> indices) {
96+
for (int i = 0; i < indices.size(); i++) {
97+
if (std::abs(result[indices[i]] - expect[indices[i]]) >= 0.05) {
98+
return false;
99+
}
100+
}
101+
return true;
102+
}
103+
104+
bool test_passed = true;
105+
106+
// op(A) * C = alpha * op(B)
107+
//
108+
// | 1 1 2 | | 1 4 | | 9 21 |
109+
// | 0 1 3 | * | 2 5 | = | 11 23 |
110+
// | 0 0 1 | | 3 6 | | 3 6 |
111+
void test_cusparseSpSM() {
112+
std::vector<float> a_val_vec = {1, 1, 2, 1, 3, 1};
113+
Data<float> a_s_val(a_val_vec.data(), 6);
114+
Data<double> a_d_val(a_val_vec.data(), 6);
115+
Data<float2> a_c_val(a_val_vec.data(), 6);
116+
Data<double2> a_z_val(a_val_vec.data(), 6);
117+
std::vector<float> a_row_ptr_vec = {0, 3, 5, 6};
118+
Data<int> a_row_ptr_s(a_row_ptr_vec.data(), 4);
119+
Data<int> a_row_ptr_d(a_row_ptr_vec.data(), 4);
120+
Data<int> a_row_ptr_c(a_row_ptr_vec.data(), 4);
121+
Data<int> a_row_ptr_z(a_row_ptr_vec.data(), 4);
122+
std::vector<float> a_col_ind_vec = {0, 1, 2, 1, 2, 2};
123+
Data<int> a_col_ind_s(a_col_ind_vec.data(), 6);
124+
Data<int> a_col_ind_d(a_col_ind_vec.data(), 6);
125+
Data<int> a_col_ind_c(a_col_ind_vec.data(), 6);
126+
Data<int> a_col_ind_z(a_col_ind_vec.data(), 6);
127+
128+
std::vector<float> b_vec = {9, 11, 3, 21, 23, 6};
129+
Data<float> b_s(b_vec.data(), 6);
130+
Data<double> b_d(b_vec.data(), 6);
131+
Data<float2> b_c(b_vec.data(), 6);
132+
Data<double2> b_z(b_vec.data(), 6);
133+
134+
Data<float> c_s(6);
135+
Data<double> c_d(6);
136+
Data<float2> c_c(6);
137+
Data<double2> c_z(6);
138+
139+
a_s_val.H2D();
140+
a_d_val.H2D();
141+
a_c_val.H2D();
142+
a_z_val.H2D();
143+
a_row_ptr_s.H2D();
144+
a_row_ptr_d.H2D();
145+
a_row_ptr_c.H2D();
146+
a_row_ptr_z.H2D();
147+
a_col_ind_s.H2D();
148+
a_col_ind_d.H2D();
149+
a_col_ind_c.H2D();
150+
a_col_ind_z.H2D();
151+
b_s.H2D();
152+
b_d.H2D();
153+
b_c.H2D();
154+
b_z.H2D();
155+
156+
cusparseHandle_t handle;
157+
cusparseSpMatDescr_t matA_s, matA_d, matA_c, matA_z;
158+
cusparseDnMatDescr_t matB_s, matB_d, matB_c, matB_z;
159+
cusparseDnMatDescr_t matC_s, matC_d, matC_c, matC_z;
160+
cusparseCreate(&handle);
161+
cusparseCreateCsr(&matA_s, 3, 3, 6, a_row_ptr_s.d_data, a_col_ind_s.d_data, a_s_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
162+
cusparseCreateCsr(&matA_d, 3, 3, 6, a_row_ptr_d.d_data, a_col_ind_d.d_data, a_d_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
163+
cusparseCreateCsr(&matA_c, 3, 3, 6, a_row_ptr_c.d_data, a_col_ind_c.d_data, a_c_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_32F);
164+
cusparseCreateCsr(&matA_z, 3, 3, 6, a_row_ptr_z.d_data, a_col_ind_z.d_data, a_z_val.d_data, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_C_64F);
165+
166+
cusparseCreateDnMat(&matB_s, 3, 2, 3, b_s.d_data, CUDA_R_32F, CUSPARSE_ORDER_COL);
167+
cusparseCreateDnMat(&matB_d, 3, 2, 3, b_d.d_data, CUDA_R_64F, CUSPARSE_ORDER_COL);
168+
cusparseCreateDnMat(&matB_c, 3, 2, 3, b_c.d_data, CUDA_C_32F, CUSPARSE_ORDER_COL);
169+
cusparseCreateDnMat(&matB_z, 3, 2, 3, b_z.d_data, CUDA_C_64F, CUSPARSE_ORDER_COL);
170+
171+
cusparseCreateDnMat(&matC_s, 3, 2, 3, c_s.d_data, CUDA_R_32F, CUSPARSE_ORDER_COL);
172+
cusparseCreateDnMat(&matC_d, 3, 2, 3, c_d.d_data, CUDA_R_64F, CUSPARSE_ORDER_COL);
173+
cusparseCreateDnMat(&matC_c, 3, 2, 3, c_c.d_data, CUDA_C_32F, CUSPARSE_ORDER_COL);
174+
cusparseCreateDnMat(&matC_z, 3, 2, 3, c_z.d_data, CUDA_C_64F, CUSPARSE_ORDER_COL);
175+
176+
cusparseSpSMDescr_t spsmDescr_s;
177+
cusparseSpSMDescr_t spsmDescr_d;
178+
cusparseSpSMDescr_t spsmDescr_c;
179+
cusparseSpSMDescr_t spsmDescr_z;
180+
cusparseSpSM_createDescr(&spsmDescr_s);
181+
cusparseSpSM_createDescr(&spsmDescr_d);
182+
cusparseSpSM_createDescr(&spsmDescr_c);
183+
cusparseSpSM_createDescr(&spsmDescr_z);
184+
185+
cusparseFillMode_t fillmode = CUSPARSE_FILL_MODE_UPPER;
186+
cusparseSpMatSetAttribute(matA_s, CUSPARSE_SPMAT_FILL_MODE, &fillmode, sizeof(fillmode));
187+
cusparseSpMatSetAttribute(matA_d, CUSPARSE_SPMAT_FILL_MODE, &fillmode, sizeof(fillmode));
188+
cusparseSpMatSetAttribute(matA_c, CUSPARSE_SPMAT_FILL_MODE, &fillmode, sizeof(fillmode));
189+
cusparseSpMatSetAttribute(matA_z, CUSPARSE_SPMAT_FILL_MODE, &fillmode, sizeof(fillmode));
190+
cusparseDiagType_t diagtype = CUSPARSE_DIAG_TYPE_NON_UNIT;
191+
cusparseSpMatSetAttribute(matA_s, CUSPARSE_SPMAT_DIAG_TYPE, &diagtype, sizeof(diagtype));
192+
cusparseSpMatSetAttribute(matA_d, CUSPARSE_SPMAT_DIAG_TYPE, &diagtype, sizeof(diagtype));
193+
cusparseSpMatSetAttribute(matA_c, CUSPARSE_SPMAT_DIAG_TYPE, &diagtype, sizeof(diagtype));
194+
cusparseSpMatSetAttribute(matA_z, CUSPARSE_SPMAT_DIAG_TYPE, &diagtype, sizeof(diagtype));
195+
196+
float alpha_s = 1.0f;
197+
double alpha_d = 1.0f;
198+
float2 alpha_c = {1.0f, 0.0f};
199+
double2 alpha_z = {1.0f, 0.0f};
200+
size_t bufferSize_s = 0;
201+
size_t bufferSize_d = 0;
202+
size_t bufferSize_c = 0;
203+
size_t bufferSize_z = 0;
204+
cusparseSpSM_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha_s, matA_s, matB_s, matC_s, CUDA_R_32F, CUSPARSE_SPSM_ALG_DEFAULT, spsmDescr_s, &bufferSize_s);
205+
cusparseSpSM_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha_d, matA_d, matB_d, matC_d, CUDA_R_64F, CUSPARSE_SPSM_ALG_DEFAULT, spsmDescr_d, &bufferSize_d);
206+
cusparseSpSM_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha_c, matA_c, matB_c, matC_c, CUDA_C_32F, CUSPARSE_SPSM_ALG_DEFAULT, spsmDescr_c, &bufferSize_c);
207+
cusparseSpSM_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha_z, matA_z, matB_z, matC_z, CUDA_C_64F, CUSPARSE_SPSM_ALG_DEFAULT, spsmDescr_z, &bufferSize_z);
208+
209+
void* dBuffer_s = NULL;
210+
void* dBuffer_d = NULL;
211+
void* dBuffer_c = NULL;
212+
void* dBuffer_z = NULL;
213+
cudaMalloc(&dBuffer_s, bufferSize_s);
214+
cudaMalloc(&dBuffer_d, bufferSize_d);
215+
cudaMalloc(&dBuffer_c, bufferSize_c);
216+
cudaMalloc(&dBuffer_z, bufferSize_z);
217+
218+
cusparseSpSM_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha_s, matA_s, matB_s, matC_s, CUDA_R_32F, CUSPARSE_SPSM_ALG_DEFAULT, spsmDescr_s, &bufferSize_s);
219+
cusparseSpSM_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha_d, matA_d, matB_d, matC_d, CUDA_R_64F, CUSPARSE_SPSM_ALG_DEFAULT, spsmDescr_d, &bufferSize_d);
220+
cusparseSpSM_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha_c, matA_c, matB_c, matC_c, CUDA_C_32F, CUSPARSE_SPSM_ALG_DEFAULT, spsmDescr_c, &bufferSize_c);
221+
cusparseSpSM_analysis(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha_z, matA_z, matB_z, matC_z, CUDA_C_64F, CUSPARSE_SPSM_ALG_DEFAULT, spsmDescr_z, &bufferSize_z);
222+
223+
cusparseSpSM_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha_s, matA_s, matB_s, matC_s, CUDA_R_32F, CUSPARSE_SPSM_ALG_DEFAULT, spsmDescr_s);
224+
cusparseSpSM_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha_d, matA_d, matB_d, matC_d, CUDA_R_64F, CUSPARSE_SPSM_ALG_DEFAULT, spsmDescr_d);
225+
cusparseSpSM_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha_c, matA_c, matB_c, matC_c, CUDA_C_32F, CUSPARSE_SPSM_ALG_DEFAULT, spsmDescr_c);
226+
cusparseSpSM_solve(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha_z, matA_z, matB_z, matC_z, CUDA_C_64F, CUSPARSE_SPSM_ALG_DEFAULT, spsmDescr_z);
227+
228+
cusparseDestroySpMat(matA_s);
229+
cusparseDestroySpMat(matA_d);
230+
cusparseDestroySpMat(matA_c);
231+
cusparseDestroySpMat(matA_z);
232+
cusparseDestroyDnMat(matB_s);
233+
cusparseDestroyDnMat(matB_d);
234+
cusparseDestroyDnMat(matB_c);
235+
cusparseDestroyDnMat(matB_z);
236+
cusparseDestroyDnMat(matC_s);
237+
cusparseDestroyDnMat(matC_d);
238+
cusparseDestroyDnMat(matC_c);
239+
cusparseDestroyDnMat(matC_z);
240+
cusparseSpSM_destroyDescr(spsmDescr_s);
241+
cusparseSpSM_destroyDescr(spsmDescr_d);
242+
cusparseSpSM_destroyDescr(spsmDescr_c);
243+
cusparseSpSM_destroyDescr(spsmDescr_z);
244+
cusparseDestroy(handle);
245+
246+
c_s.D2H();
247+
c_d.D2H();
248+
c_c.D2H();
249+
c_z.D2H();
250+
251+
cudaFree(dBuffer_s);
252+
cudaFree(dBuffer_d);
253+
cudaFree(dBuffer_c);
254+
cudaFree(dBuffer_z);
255+
256+
float expect_c[6] = {1, 2, 3, 4, 5, 6};
257+
if (compare_result(expect_c, c_s.h_data, 6) &&
258+
compare_result(expect_c, c_d.h_data, 6) &&
259+
compare_result(expect_c, c_c.h_data, 6) &&
260+
compare_result(expect_c, c_z.h_data, 6))
261+
printf("SpSM pass\n");
262+
else {
263+
printf("SpSM fail\n");
264+
test_passed = false;
265+
printf("%f,%f,%f,%f,%f,%f\n", c_s.h_data[0], c_s.h_data[1], c_s.h_data[2],
266+
c_s.h_data[3], c_s.h_data[4], c_s.h_data[5]);
267+
printf("%f,%f,%f,%f,%f,%f\n", c_d.h_data[0], c_d.h_data[1], c_d.h_data[2],
268+
c_d.h_data[3], c_d.h_data[4], c_d.h_data[5]);
269+
printf("%f,%f,%f,%f,%f,%f\n", c_c.h_data[0], c_c.h_data[1], c_c.h_data[2],
270+
c_c.h_data[3], c_c.h_data[4], c_c.h_data[5]);
271+
printf("%f,%f,%f,%f,%f,%f\n", c_z.h_data[0], c_z.h_data[1], c_z.h_data[2],
272+
c_z.h_data[3], c_z.h_data[4], c_z.h_data[5]);
273+
}
274+
}
275+
276+
int main() {
277+
test_cusparseSpSM();
278+
279+
if (test_passed)
280+
return 0;
281+
return -1;
282+
}

features/features.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@
280280
<test testName="cusparse_6" configFile="config/TEMPLATE_cusparse_before_12_skip_double.xml" splitGroup="double"/>
281281
<test testName="cusparse_7" configFile="config/TEMPLATE_cusparse_before_11_skip_double.xml" splitGroup="double"/>
282282
<test testName="cusparse_8" configFile="config/TEMPLATE_cusparse_after_101_skip_double.xml" splitGroup="double"/>
283+
<test testName="cusparse_9" configFile="config/TEMPLATE_cusparse_after_113_skip_double.xml" splitGroup="double"/>
283284
<test testName="cusolver_test1" configFile="config/TEMPLATE_solver.xml" splitGroup="double"/>
284285
<test testName="cusolver_test2" configFile="config/TEMPLATE_solver_skip80.xml" splitGroup="double"/>
285286
<test testName="cusolver_test3" configFile="config/TEMPLATE_solver_11.xml" splitGroup="double"/>

features/test_feature.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
'math-bf16-conv', 'math-emu-bf16-conv-double', 'math-ext-bf16-conv-double', 'math-half-conv',
4040
'math-bfloat16', 'libcu_atomic', 'test_shared_memory', 'cudnn-reduction', 'cudnn-binary', 'cudnn-bnp1', 'cudnn-bnp2', 'cudnn-bnp3',
4141
'cudnn-normp1', 'cudnn-normp2', 'cudnn-normp3', 'cudnn-convp1', 'cudnn-convp2', 'cudnn-convp3', 'cudnn-convp4', 'cudnn-convp5', 'cudnn-convp6', 'cudnn-convp7',
42-
'cudnn_mutilple_files', "cusparse_1", "cusparse_2", "cusparse_3", "cusparse_4", "cusparse_5", "cusparse_6", "cusparse_7", "cusparse_8",
42+
'cudnn_mutilple_files', "cusparse_1", "cusparse_2", "cusparse_3", "cusparse_4", "cusparse_5", "cusparse_6", "cusparse_7", "cusparse_8", "cusparse_9",
4343
'cudnn-GetErrorString', 'cub_device_histgram', 'peer_access', 'driver_err_handle',
4444
'cudnn-types', 'cudnn-version', 'cudnn-dropout', 'const_opt',
4545
'constant_attr', 'sync_warp_p2', 'occupancy_calculation',

0 commit comments

Comments
 (0)