Skip to content

Commit 303d0fc

Browse files
[SYCLomatic] Add behavior test for cmake helper function target_link_sycl_libraries
Signed-off-by: Ahmed, Daiyaan <[email protected]>
1 parent d4e0e9a commit 303d0fc

File tree

4 files changed

+318
-0
lines changed

4 files changed

+318
-0
lines changed

behavior_tests/behavior_tests.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@
167167
<test testName="bt-no-dpct-helper-function" configFile="config/TEMPLATE_behavior_tests.xml" />
168168
<test testName="cmp-cmds-linker-entry-src-files" configFile="config/TEMPLATE_behavior_tests_lin.xml" />
169169
<test testName="cmake_dpct_helper_compile_sycl_code" configFile="config/TEMPLATE_behavior_tests.xml" />
170+
<test testName="cmake_target_link_libraries" configFile="config/TEMPLATE_behavior_tests.xml" />
170171
</tests>
171172

172173
</suite>
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
cmake_minimum_required(VERSION 3.24)
2+
project(foo LANGUAGES CXX )
3+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl")
4+
find_program(dpct_bin_path NAMES dpct PATHS)
5+
get_filename_component(bin_path_of_dpct ${dpct_bin_path} DIRECTORY)
6+
set(dpct_cmake_file_path "${bin_path_of_dpct}/../cmake/dpct.cmake")
7+
include(${dpct_cmake_file_path})
8+
find_package(CUDAToolkit)
9+
include_directories(${CUDNN_INCLUDE_DIR})
10+
11+
set(SOURCES
12+
${CMAKE_SOURCE_DIR}/main.dp.cpp
13+
)
14+
add_executable(foo-bar ${SOURCES})
15+
target_link_libraries(foo-bar PUBLIC -qmkl ${DNN_LIB})
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# ====------ do_test.py---------- *- Python -* ----===##
2+
#
3+
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
#
8+
# ===----------------------------------------------------------------------===#
9+
import subprocess
10+
import platform
11+
import os
12+
import sys
13+
from test_config import CT_TOOL
14+
15+
from test_utils import *
16+
17+
def setup_test():
18+
change_dir(test_config.current_test)
19+
return True
20+
21+
def migrate_test():
22+
# clean previous migration output
23+
if (os.path.exists("build")):
24+
shutil.rmtree("build")
25+
call_subprocess(" mkdir build && cd build && cmake -DCMAKE_CXX_COMPILER=icpx ../ && make")
26+
return os.path.exists(os.path.join("build", "foo-bar"))
27+
def build_test():
28+
return True
29+
def run_test():
30+
change_dir("build")
31+
return call_subprocess(os.path.join(os.path.curdir, "foo-bar"))
Lines changed: 271 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,271 @@
1+
#include <dpct/dnnl_utils.hpp>
2+
#include <sycl/sycl.hpp>
3+
#include <dpct/dpct.hpp>
4+
#include <cstdio>
5+
#include <cstdlib>
6+
#include <dpct/blas_utils.hpp>
7+
8+
#include <iostream>
9+
#include <stdexcept>
10+
#include <vector>
11+
#include <cmath>
12+
13+
using data_type = double;
14+
template <typename T>
15+
bool check(std::vector<T> &expect, std::vector<T> &actual, int num,
16+
float precision) {
17+
for (int i = 0; i < num; i++) {
18+
if (std::abs(expect[i] - actual[i]) > precision) {
19+
std::cout << "test failed" << std::endl;
20+
std::cout << "expect:" << expect[i] << std::endl;
21+
std::cout << "actual:" << actual[i] << std::endl;
22+
return false;
23+
}
24+
}
25+
return true;
26+
}
27+
bool cublasCheck() {
28+
dpct::device_ext &dev_ct1 = dpct::get_current_device();
29+
sycl::queue &q_ct1 = dev_ct1.in_order_queue();
30+
dpct::queue_ptr handle = NULL;
31+
dpct::queue_ptr stream = &q_ct1;
32+
33+
const std::vector<data_type> A = {1.0, 2.0, 3.0, 4.0};
34+
const int incx = 1;
35+
36+
int result = 0.0;
37+
38+
data_type *d_A = nullptr;
39+
40+
handle = &q_ct1;
41+
42+
/*
43+
DPCT1025:0: The SYCL queue is created ignoring the flag and priority options.
44+
*/
45+
stream = dev_ct1.create_queue();
46+
handle = stream;
47+
48+
d_A = (data_type *)sycl::malloc_device(sizeof(data_type) * A.size(), q_ct1);
49+
50+
stream->memcpy(d_A, A.data(), sizeof(data_type) * A.size());
51+
52+
int64_t *res_temp_ptr_ct1 = sycl::malloc_shared<int64_t>(1, q_ct1);
53+
oneapi::mkl::blas::column_major::iamax(*handle, A.size(), d_A, incx,
54+
res_temp_ptr_ct1,
55+
oneapi::mkl::index_base::one)
56+
.wait();
57+
int res_temp_host_ct2 = (int)*res_temp_ptr_ct1;
58+
dpct::dpct_memcpy(&result, &res_temp_host_ct2, sizeof(int));
59+
sycl::free(res_temp_ptr_ct1, q_ct1);
60+
61+
stream->wait();
62+
63+
sycl::free(d_A, q_ct1);
64+
65+
handle = nullptr;
66+
67+
dev_ct1.destroy_queue(stream);
68+
69+
dev_ct1.reset();
70+
if (result == 4) {
71+
return true;
72+
}
73+
return false;
74+
}
75+
template <typename T>
76+
void conv2d(int batch, int color, int rows, int cols, int kCols,
77+
int kRows, T *matrix, float *kernel, T *result,
78+
const sycl::nd_item<3> &item_ct1) {
79+
int tid = item_ct1.get_group(2) * item_ct1.get_local_range(2) +
80+
item_ct1.get_local_id(2);
81+
int kCenterX = kCols / 2;
82+
int kCenterY = kRows / 2;
83+
84+
for (int b = 0; b < batch; b++) {
85+
for (int c = 0; c < color; c++) {
86+
for (int i = 0; i < rows; i++) {
87+
for (int j = 0; j < cols; j++) {
88+
for (int m = 0; m < kRows; m++) {
89+
int mm = kRows - 1 - m;
90+
for (int n = 0; n < kCols; n++) {
91+
int nn = kCols - 1 - n;
92+
93+
int ii = i + (kCenterY - mm);
94+
int jj = j + (kCenterX - nn);
95+
96+
if (ii >= 0 && ii < rows && jj >= 0 && jj < cols) {
97+
result[b * color * rows * cols + c * rows * cols + i * cols +
98+
j] +=
99+
matrix[b * c * ii * jj + c * ii * jj + ii * kRows + jj] *
100+
kernel[mm * kRows + nn];
101+
result[tid] = result[b * color * rows * cols + c * rows * cols +
102+
i * cols + j];
103+
}
104+
}
105+
}
106+
}
107+
}
108+
}
109+
}
110+
}
111+
112+
bool cudnnCheck() {
113+
dpct::device_ext &dev_ct1 = dpct::get_current_device();
114+
sycl::queue &q_ct1 = dev_ct1.in_order_queue();
115+
dpct::dnnl::engine_ext handle;
116+
dpct::dnnl::memory_desc_ext dataTensor, outTensor, scalebiasTensor;
117+
handle.create_engine();
118+
119+
/*
120+
DPCT1026:1: The call to cudnnCreateTensorDescriptor was removed because this
121+
call is redundant in SYCL.
122+
*/
123+
/*
124+
DPCT1026:2: The call to cudnnCreateTensorDescriptor was removed because this
125+
call is redundant in SYCL.
126+
*/
127+
/*
128+
DPCT1026:3: The call to cudnnCreateTensorDescriptor was removed because this
129+
call is redundant in SYCL.
130+
*/
131+
132+
int in = 2, ic = 4, ih = 5, iw = 5;
133+
int on = 2, oc = 4, oh = 5, ow = 5;
134+
int sbn = 1, sbc = 4, sbh = 5, sbw = 5;
135+
int ele_num = in * ic * ih * iw;
136+
int oele_num = on * oc * oh * ow;
137+
int sele_num = sbn * sbc * sbh * sbw;
138+
dataTensor.set(dpct::dnnl::memory_format_tag::nchw,
139+
dpct::library_data_t::real_float, in, ic, ih, iw);
140+
outTensor.set(dpct::dnnl::memory_format_tag::nchw,
141+
dpct::library_data_t::real_float, on, oc, oh, ow);
142+
scalebiasTensor.set(dpct::dnnl::memory_format_tag::nchw,
143+
dpct::library_data_t::real_float, sbn, sbc, sbh, sbw);
144+
145+
int save = 1;
146+
float *data, *out, *scale, *bias, *rmean, *rvar, *smean, *svar, *z;
147+
std::vector<float> host_data(ele_num, 1.0f);
148+
std::vector<float> host_z(oele_num, 1.0f);
149+
std::vector<float> host_out(oele_num, 0.0f);
150+
std::vector<float> host_scale(sele_num, 1.0f);
151+
std::vector<float> host_bias(sele_num, 0.0f);
152+
std::vector<float> host_rmean(sele_num, 0.0f);
153+
std::vector<float> host_rvar(sele_num, 0.0f);
154+
std::vector<float> host_smean(save * sele_num, 0.0f);
155+
std::vector<float> host_svar(save * sele_num, 0.0f);
156+
157+
for (int i = 0; i < ele_num; i++) {
158+
host_data[i] = i + 4.f;
159+
host_out[i] = 1.f;
160+
host_z[i] = 10;
161+
}
162+
for (int i = 0; i < sele_num; i++) {
163+
host_scale[i] = i;
164+
host_bias[i] = i;
165+
host_rmean[i] = i;
166+
host_rvar[i] = i;
167+
host_smean[i] = i;
168+
host_svar[i] = i;
169+
}
170+
171+
data = sycl::malloc_device<float>(ele_num, q_ct1);
172+
z = sycl::malloc_device<float>(oele_num, q_ct1);
173+
out = sycl::malloc_device<float>(oele_num, q_ct1);
174+
scale = sycl::malloc_device<float>(sele_num, q_ct1);
175+
bias = sycl::malloc_device<float>(sele_num, q_ct1);
176+
rmean = sycl::malloc_device<float>(sele_num, q_ct1);
177+
rvar = sycl::malloc_device<float>(sele_num, q_ct1);
178+
smean = (float *)sycl::malloc_device(sizeof(float) * save * sele_num, q_ct1);
179+
svar = (float *)sycl::malloc_device(sizeof(float) * save * sele_num, q_ct1);
180+
181+
q_ct1.memcpy(data, host_data.data(), sizeof(float) * ele_num);
182+
q_ct1.memcpy(z, host_z.data(), sizeof(float) * oele_num);
183+
q_ct1.memcpy(out, host_out.data(), sizeof(float) * oele_num);
184+
q_ct1.memcpy(scale, host_scale.data(), sizeof(float) * sele_num);
185+
q_ct1.memcpy(bias, host_bias.data(), sizeof(float) * sele_num);
186+
q_ct1.memcpy(rmean, host_rmean.data(), sizeof(float) * sele_num);
187+
q_ct1.memcpy(rvar, host_rvar.data(), sizeof(float) * sele_num);
188+
q_ct1.memcpy(smean, host_smean.data(), sizeof(float) * save * sele_num);
189+
q_ct1.memcpy(svar, host_svar.data(), sizeof(float) * save * sele_num).wait();
190+
191+
float alpha = 2.5f, beta = 1.5f, eps = 1.f;
192+
double factor = 0.5f;
193+
dpct::dnnl::activation_desc ActivationDesc;
194+
/*
195+
DPCT1026:4: The call to cudnnCreateActivationDescriptor was removed because
196+
this call is redundant in SYCL.
197+
*/
198+
/*
199+
DPCT1007:5: Migration of Nan numbers propagation option is not supported.
200+
*/
201+
ActivationDesc.set(dnnl::algorithm::eltwise_relu_use_dst_for_bwd, 0.0f);
202+
203+
auto status =
204+
DPCT_CHECK_ERROR(handle.async_batch_normalization_forward_inference(
205+
dpct::dnnl::batch_normalization_mode::per_activation,
206+
dpct::dnnl::batch_normalization_ops::none, ActivationDesc, eps, alpha,
207+
dataTensor, data, beta, outTensor, out, dataTensor, z,
208+
scalebiasTensor, scale, bias, scalebiasTensor, smean, svar));
209+
210+
dev_ct1.queues_wait_and_throw();
211+
q_ct1.memcpy(host_out.data(), out, sizeof(float) * oele_num).wait();
212+
std::vector<float> expect = {
213+
1.5, 11.0711, 18.047, 24, 29.3885, 34.4124, 39.1779,
214+
43.7487, 48.1667, 52.4605, 56.6511, 60.7543, 64.782, 68.744,
215+
72.6478, 76.5, 80.3057, 84.0694, 87.7948, 91.4853, 95.1436,
216+
98.7721, 102.373, 105.949, 109.5,
217+
218+
113.029, 116.537, 120.025, 123.495, 126.947, 130.382, 133.801,
219+
137.205, 140.595, 143.97, 147.333, 150.684, 154.022, 157.349,
220+
160.664, 163.969, 167.264, 170.549, 173.825, 177.091, 180.349,
221+
183.598, 186.839, 190.071, 193.296,
222+
223+
196.514, 199.724, 202.927, 206.124, 209.314, 212.497, 215.674,
224+
218.845, 222.01, 225.169, 228.322, 231.47, 234.613, 237.75,
225+
240.882, 244.009, 247.132, 250.249, 253.362, 256.471, 259.575,
226+
262.674, 265.77, 268.861, 271.948,
227+
228+
275.031, 278.11, 281.185, 284.257, 287.325, 290.389, 293.45,
229+
296.507, 299.56, 302.611, 305.658, 308.702, 311.742, 314.78,
230+
317.814, 320.846, 323.874, 326.9, 329.922, 332.942, 335.959,
231+
338.973, 341.985, 344.994, 348,
232+
233+
1.5, 187.848, 306.722, 399, 476.602, 544.723, 606.125,
234+
662.467, 714.833, 763.973, 810.43, 854.611, 896.832, 937.343,
235+
976.344, 1014, 1050.45, 1085.8, 1120.17, 1153.62, 1186.23,
236+
1218.08, 1249.2, 1279.66, 1309.5,
237+
238+
1338.75, 1367.46, 1395.66, 1423.36, 1450.61, 1477.42, 1503.82,
239+
1529.83, 1555.46, 1580.73, 1605.67, 1630.27, 1654.57, 1678.57,
240+
1702.27, 1725.71, 1748.87, 1771.78, 1794.45, 1816.87, 1839.07,
241+
1861.05, 1882.81, 1904.36, 1925.71,
242+
243+
1946.86, 1967.83, 1988.61, 2009.22, 2029.65, 2049.92, 2070.02,
244+
2089.96, 2109.75, 2129.39, 2148.88, 2168.22, 2187.43, 2206.5,
245+
2225.44, 2244.25, 2262.93, 2281.49, 2299.92, 2318.24, 2336.44,
246+
2354.53, 2372.51, 2390.38, 2408.14,
247+
248+
2425.8, 2443.36, 2460.82, 2478.18, 2495.44, 2512.61, 2529.69,
249+
2546.67, 2563.57, 2580.38, 2597.1, 2613.74, 2630.3, 2646.78,
250+
2663.17, 2679.49, 2695.73, 2711.89, 2727.98, 2743.99, 2759.93,
251+
2775.8, 2791.6, 2807.34, 2823,
252+
};
253+
/*
254+
DPCT1026:6: The call to cudnnDestroy was removed because this call is
255+
redundant in SYCL.
256+
*/
257+
sycl::free(data, q_ct1);
258+
sycl::free(out, q_ct1);
259+
return check(expect, host_out, expect.size(), 1e-1);
260+
}
261+
262+
int main(int argc, char *argv[]) {
263+
if (cublasCheck() && cudnnCheck()) {
264+
printf("Both case passed \n");
265+
return 0;
266+
} else {
267+
printf("Tests failed");
268+
exit(-1);
269+
}
270+
return 0;
271+
}

0 commit comments

Comments
 (0)