Skip to content

Commit a4907dd

Browse files
committed
Merge branch 'master' into spokhode/dparray
2 parents ef2902f + 3d2dbd6 commit a4907dd

30 files changed

+1118
-26
lines changed

README.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,17 @@ Examples
7676
========
7777
See examples in folder `examples`.
7878

79-
Run examples:
79+
Run python examples:
8080
```bash
81-
python examples/create_sycl_queues.py
81+
for script in `ls examples/python/`; do echo "executing ${script}"; python examples/python/${script}; done
8282
```
8383

84+
Examples of building Cython extensions with DPC++ compiler, that interoperate with dpCtl can be found in
85+
folder `cython`.
86+
87+
Each example in `cython` folder can be built using `CC=clang CXX=dpcpp python setup.py build_ext --inplace`.
88+
Please refer to `run.py` script in respective folders to execute extensions.
89+
8490
Tests
8591
=====
8692
See tests in folder `dpctl/tests`.

backends/CMakeLists.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,15 +77,15 @@ add_library(
7777
source/dppl_sycl_queue_manager.cpp
7878
source/dppl_sycl_usm_interface.cpp
7979
source/dppl_utils.cpp
80-
details/source/dppl_utils_details.cpp
80+
helper/source/dppl_utils_helper.cpp
8181
)
8282

8383
# Install DPPLSyclInterface
8484
target_include_directories(
8585
DPPLSyclInterface
8686
PRIVATE
8787
${CMAKE_SOURCE_DIR}/include/
88-
${CMAKE_SOURCE_DIR}/details/include/
88+
${CMAKE_SOURCE_DIR}/helper/include/
8989
)
9090

9191
if(WIN32)
@@ -125,10 +125,10 @@ foreach(HEADER ${HEADERS})
125125
install(FILES "${HEADER}" DESTINATION include/Support)
126126
endforeach()
127127

128-
# Install all headers in details/include
129-
file(GLOB HEADERS "${CMAKE_SOURCE_DIR}/details/include/*.h*")
128+
# Install all headers in helper/include
129+
file(GLOB HEADERS "${CMAKE_SOURCE_DIR}/helper/include/*.h*")
130130
foreach(HEADER ${HEADERS})
131-
install(FILES "${HEADER}" DESTINATION details/include)
131+
install(FILES "${HEADER}" DESTINATION helper/include)
132132
endforeach()
133133

134134
option(

backends/details/include/dppl_utils_details.h renamed to backends/helper/include/dppl_utils_helper.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,5 +27,5 @@
2727
#include <CL/sycl.hpp>
2828
using namespace cl::sycl;
2929

30-
std::string DDPL_StrToDeviceType(info::device_type devTy);
31-
info::device_type DPPL_DeviceTypeToStr(std::string devTyStr);
30+
std::string DPPL_DeviceTypeToStr(info::device_type devTy);
31+
info::device_type DPPL_StrToDeviceType(std::string devTyStr);

backends/details/source/dppl_utils_details.cpp renamed to backends/helper/source/dppl_utils_helper.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//===------ dppl_utils_details.cpp - dpctl-C_API ----*---- C++ -----*-----===//
1+
//===------ dppl_utils_helper.cpp - dpctl-C_API ----*---- C++ -----*-----===//
22
//
33
// Data Parallel Control Library (dpCtl)
44
//
@@ -19,11 +19,11 @@
1919
//===----------------------------------------------------------------------===//
2020
///
2121
/// \file
22-
/// This file implements the helper functions defined in dppl_utils_details.h.
22+
/// This file implements the helper functions defined in dppl_utils_helper.h.
2323
///
2424
//===----------------------------------------------------------------------===//
2525

26-
#include "dppl_utils_details.h"
26+
#include "dppl_utils_helper.h"
2727
#include <string>
2828
#include <sstream>
2929

@@ -32,7 +32,7 @@ using namespace cl::sycl;
3232
/*!
3333
* Transforms enum info::device_type to string.
3434
*/
35-
std::string DDPL_StrToDeviceType(info::device_type devTy)
35+
std::string DPPL_DeviceTypeToStr(info::device_type devTy)
3636
{
3737
std::stringstream ss;
3838
switch (devTy)
@@ -61,7 +61,7 @@ std::string DDPL_StrToDeviceType(info::device_type devTy)
6161
/*!
6262
* Transforms string to enum info::device_type.
6363
*/
64-
info::device_type DPPL_DeviceTypeToStr(std::string devTyStr)
64+
info::device_type DPPL_StrToDeviceType(std::string devTyStr)
6565
{
6666
info::device_type devTy;
6767
if (devTyStr == "cpu") {

backends/source/dppl_sycl_context_interface.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,11 @@ bool DPPLContext_AreEq (__dppl_keep const DPPLSyclContextRef CtxRef1,
4747

4848
bool DPPLContext_IsHost (__dppl_keep const DPPLSyclContextRef CtxRef)
4949
{
50-
return unwrap(CtxRef)->is_host();
50+
auto Ctx = unwrap(CtxRef);
51+
if (Ctx) {
52+
return Ctx->is_host();
53+
}
54+
return false;
5155
}
5256

5357
void DPPLContext_Delete (__dppl_take DPPLSyclContextRef CtxRef)

backends/source/dppl_sycl_device_interface.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
#include <iostream>
3131
#include <cstring>
3232
#include <CL/sycl.hpp> /* SYCL headers */
33-
#include "../details/include/dppl_utils_details.h"
33+
#include "../helper/include/dppl_utils_helper.h"
3434

3535
using namespace cl::sycl;
3636

@@ -59,7 +59,7 @@ void dump_device_info (const device & Device)
5959
ss << std::setw(4) << " " << std::left << std::setw(16) << "Device type";
6060

6161
auto devTy = Device.get_info<info::device::device_type>();
62-
ss << DDPL_StrToDeviceType(devTy);
62+
ss << DPPL_DeviceTypeToStr(devTy);
6363

6464
std::cout << ss.str();
6565
}

backends/source/dppl_sycl_platform_interface.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
#include <iostream>
3030
#include <set>
3131
#include <sstream>
32-
#include "../details/include/dppl_utils_details.h"
32+
#include "../helper/include/dppl_utils_helper.h"
3333

3434
#include <CL/sycl.hpp>
3535

@@ -125,7 +125,7 @@ void DPPLPlatform_DumpInfo ()
125125
<< "Device type";
126126

127127
auto devTy = devices[dn].get_info<info::device::device_type>();
128-
ss << DDPL_StrToDeviceType(devTy);
128+
ss << DPPL_DeviceTypeToStr(devTy);
129129
}
130130
std::cout << ss.str();
131131
++i;
@@ -148,7 +148,12 @@ size_t DPPLPlatform_GetNumNonHostPlatforms ()
148148

149149
size_t DPPLPlatform_GetNumNonHostBackends ()
150150
{
151-
return get_set_of_non_hostbackends().size();
151+
auto be_set = get_set_of_non_hostbackends();
152+
153+
if (be_set.empty())
154+
return 0;
155+
156+
return be_set.size();
152157
}
153158

154159
__dppl_give DPPLSyclBackendType *DPPLPlatform_GetListOfNonHostBackends ()

examples/cython/sycl_buffer/README.md

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#1 Example of SYCL extension working NumPy array input via SYCL buffers
2+
3+
4+
#2 Decription
5+
6+
Cython function expecting a 2D array in C-contiguous layout that
7+
computes column-wise total by using SYCL oneMKL (as GEMV call with
8+
an all units vector).
9+
10+
Example illustrates compiling SYCL extension, linking to oneMKL.
11+
12+
13+
#2 Compiling
14+
15+
```
16+
# make sure oneAPI is activated, $ONEAPI_ROOT must be set
17+
CC=clang CXX=dpcpp python setup.py build_ext --inplace
18+
```
19+
20+
21+
#2 Running
22+
23+
```
24+
# SYCL_BE=PI_OPENCL sets SYCL backend to OpenCL to avoid a
25+
# transient issue with MKL's using the default Level-0 backend
26+
(idp) [08:16:12 ansatnuc04 simple]$ SYCL_BE=PI_OPENCL ipython
27+
Python 3.7.7 (default, Jul 14 2020, 22:02:37)
28+
Type 'copyright', 'credits' or 'license' for more information
29+
IPython 7.17.0 -- An enhanced Interactive Python. Type '?' for help.
30+
31+
In [1]: import syclbuffer as sb, numpy as np, dpctl
32+
33+
In [2]: x = np.random.randn(10**4, 2500)
34+
35+
In [3]: %time m1 = np.sum(x, axis=0)
36+
CPU times: user 22.3 ms, sys: 160 µs, total: 22.5 ms
37+
Wall time: 21.2 ms
38+
39+
In [4]: %time m = sb.columnwise_total(x) # first time is slower, due to JIT overhead
40+
CPU times: user 207 ms, sys: 36.1 ms, total: 243 ms
41+
Wall time: 248 ms
42+
43+
In [5]: %time m = sb.columnwise_total(x)
44+
CPU times: user 8.89 ms, sys: 4.12 ms, total: 13 ms
45+
Wall time: 12.4 ms
46+
47+
In [6]: %time m = sb.columnwise_total(x)
48+
CPU times: user 4.82 ms, sys: 8.06 ms, total: 12.9 ms
49+
Wall time: 12.3 ms
50+
```
51+
52+
Running bench.py:
53+
54+
```
55+
========== Executing warm-up ==========
56+
NumPy result: [1. 1. 1. ... 1. 1. 1.]
57+
SYCL(Intel(R) Core(TM) i7-10710U CPU @ 1.10GHz) result: [1. 1. 1. ... 1. 1. 1.]
58+
SYCL(Intel(R) Gen9 HD Graphics NEO) result: [1. 1. 1. ... 1. 1. 1.]
59+
Times for 'opencl:cpu:0'
60+
[2.864787499012891, 2.690436460019555, 2.5902308400254697, 2.5802528870408423, 2.538990616973024]
61+
Times for 'opencl:gpu:0'
62+
[1.9769684099592268, 2.3491444009705447, 2.293720397981815, 2.391633405990433, 1.9465659779962152]
63+
Times for NumPy
64+
[3.4011058019823395, 3.07286038500024, 3.0390414349967614, 3.0305576199898496, 3.002687797998078]
65+
```
66+
67+
Running run.py:
68+
69+
```
70+
(idp) [09:14:53 ansatnuc04 sycl_buffer]$ SYCL_BE=PI_OPENCL python run.py
71+
Result computed by NumPy
72+
[ 0.27170187 -23.36798583 7.31326489 -1.95121928]
73+
Result computed by SYCL extension
74+
[ 0.27170187 -23.36798583 7.31326489 -1.95121928]
75+
76+
Running on: Intel(R) Gen9 HD Graphics NEO
77+
[ 0.27170187 -23.36798583 7.31326489 -1.95121928]
78+
Running on: Intel(R) Core(TM) i7-10710U CPU @ 1.10GHz
79+
[ 0.27170187 -23.36798583 7.31326489 -1.95121928]
80+
```
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
cimport numpy as cnp
2+
import numpy as np
3+
4+
cimport dpctl as c_dpctl
5+
import dpctl
6+
7+
cdef extern from "use_sycl_buffer.h":
8+
int c_columnwise_total(c_dpctl.DPPLSyclQueueRef q, size_t n, size_t m, double *m, double *ct) nogil
9+
int c_columnwise_total_no_mkl(c_dpctl.DPPLSyclQueueRef q, size_t n, size_t m, double *m, double *ct) nogil
10+
11+
def columnwise_total(double[:, ::1] v, method='mkl'):
12+
cdef cnp.ndarray res_array = np.empty((v.shape[1],), dtype='d')
13+
cdef double[::1] res_memslice = res_array
14+
cdef int ret_status
15+
cdef c_dpctl.SyclQueue q
16+
cdef c_dpctl.DPPLSyclQueueRef q_ref
17+
18+
q = c_dpctl.get_current_queue()
19+
q_ref = q.get_queue_ref()
20+
21+
if method == 'mkl':
22+
with nogil:
23+
ret_status = c_columnwise_total(q_ref, v.shape[0], v.shape[1], &v[0,0], &res_memslice[0])
24+
else:
25+
with nogil:
26+
ret_status = c_columnwise_total_no_mkl(q_ref, v.shape[0], v.shape[1], &v[0,0], &res_memslice[0])
27+
28+
return res_array

examples/cython/sycl_buffer/bench.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import dpctl
2+
import syclbuffer as sb
3+
import numpy as np
4+
5+
X = np.full((10 ** 4, 4098), 1e-4, dtype="d")
6+
7+
# warm-up
8+
print("=" * 10 + " Executing warm-up " + "=" * 10)
9+
print("NumPy result: ", X.sum(axis=0))
10+
11+
dpctl.set_default_queue("opencl", "cpu", 0)
12+
print(
13+
"SYCL({}) result: {}".format(
14+
dpctl.get_current_queue().get_sycl_device().get_device_name(),
15+
sb.columnwise_total(X),
16+
)
17+
)
18+
19+
dpctl.set_default_queue("opencl", "gpu", 0)
20+
print(
21+
"SYCL({}) result: {}".format(
22+
dpctl.get_current_queue().get_sycl_device().get_device_name(),
23+
sb.columnwise_total(X),
24+
)
25+
)
26+
27+
import timeit
28+
29+
print("Times for 'opencl:cpu:0'")
30+
print(
31+
timeit.repeat(
32+
stmt="sb.columnwise_total(X)",
33+
setup='dpctl.set_default_queue("opencl", "cpu", 0); '
34+
"sb.columnwise_total(X)", # ensure JIT compilation is not counted
35+
number=100,
36+
globals=globals(),
37+
)
38+
)
39+
40+
print("Times for 'opencl:gpu:0'")
41+
print(
42+
timeit.repeat(
43+
stmt="sb.columnwise_total(X)",
44+
setup='dpctl.set_default_queue("opencl", "gpu", 0); sb.columnwise_total(X)',
45+
number=100,
46+
globals=globals(),
47+
)
48+
)
49+
50+
print("Times for NumPy")
51+
print(timeit.repeat(stmt="X.sum(axis=0)", number=100, globals=globals()))

0 commit comments

Comments
 (0)