Skip to content

Commit 241353c

Browse files
Merge remote-tracking branch 'origin/master' into gold/2021
2 parents dc863cd + fa8eb17 commit 241353c

File tree

12 files changed

+189
-32
lines changed

12 files changed

+189
-32
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2121
* Introduced `"syclinterface/dpctl_sycl_types_casters.hpp"` header file with declaration of conversion routines between SYCL type pointers and SyclInterface library opaque pointers [#960](https://github.com/IntelPython/dpctl/pull/960).
2222
* Added C-API to `dpctl.program.SyclKernel` and `dpctl.program.SyclProgram`. Added type casters for new types to "dpctl4pybind11" and added an example demonstrating its use [#970](https://github.com/IntelPython/dpctl/pull/970).
2323
* Introduced "dpctl/sycl.pxd" Cython declaration file to streamline use of SYCL functions from Cython, and added an example demonstrating its use [#981](https://github.com/IntelPython/dpctl/pull/981).
24+
* Added experimental support for sharing data allocated on sub-devices via dlpack [#984](https://github.com/IntelPython/dpctl/pull/984).
25+
* Added `dpctl.SyclDevice.sub_group_sizes` property to retrieve supported sizes of sub-group by the device [#985](https://github.com/IntelPython/dpctl/pull/985).
2426

2527
### Changed
2628
* Improved queue compatibility testing in `dpctl.tensor`'s implementation module [#900](https://github.com/IntelPython/dpctl/pull/900).

dpctl/_backend.pxd

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,8 @@ cdef extern from "syclinterface/dpctl_sycl_device_interface.h":
204204
cdef uint64_t DPCTLDevice_GetGlobalMemCacheSize(const DPCTLSyclDeviceRef DRef)
205205
cdef _global_mem_cache_type DPCTLDevice_GetGlobalMemCacheType(
206206
const DPCTLSyclDeviceRef DRef)
207+
cdef size_t *DPCTLDevice_GetSubGroupSizes(const DPCTLSyclDeviceRef DRef,
208+
size_t *res_len)
207209

208210

209211
cdef extern from "syclinterface/dpctl_sycl_device_manager.h":

dpctl/_sycl_device.pyx

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ from ._backend cimport ( # noqa: E211
6565
DPCTLDevice_GetPreferredVectorWidthShort,
6666
DPCTLDevice_GetProfilingTimerResolution,
6767
DPCTLDevice_GetSubGroupIndependentForwardProgress,
68+
DPCTLDevice_GetSubGroupSizes,
6869
DPCTLDevice_GetVendor,
6970
DPCTLDevice_HasAspect,
7071
DPCTLDevice_Hash,
@@ -884,6 +885,28 @@ cdef class SyclDevice(_SyclDevice):
884885
self._device_ref
885886
)
886887

888+
@property
889+
def sub_group_sizes(self):
890+
""" Returns list of supported sub-group sizes for this device.
891+
892+
Returns:
893+
List[int]: List of supported sub-group sizes.
894+
"""
895+
cdef size_t *sg_sizes = NULL
896+
cdef size_t sg_sizes_len = 0
897+
cdef size_t i
898+
899+
sg_sizes = DPCTLDevice_GetSubGroupSizes(
900+
self._device_ref, &sg_sizes_len)
901+
if (sg_sizes is not NULL and sg_sizes_len > 0):
902+
res = list()
903+
for i in range(sg_sizes_len):
904+
res.append(sg_sizes[i])
905+
DPCTLSize_t_Array_Delete(sg_sizes)
906+
return res
907+
else:
908+
return []
909+
887910
@property
888911
def sycl_platform(self):
889912
""" Returns the platform associated with this device.

dpctl/_sycl_platform.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ cdef class SyclPlatform(_SyclPlatform):
272272
)
273273

274274
if (CRef == NULL):
275-
raise
275+
raise RuntimeError("Getting default error ran into a problem")
276276
else:
277277
return SyclContext._create(CRef)
278278

dpctl/tensor/_dlpack.pyx

Lines changed: 71 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,18 @@ from .._backend cimport (
3333
)
3434
from ._usmarray cimport usm_ndarray
3535

36+
from platform import system as sys_platform
37+
3638
import numpy as np
3739

3840
import dpctl
3941
import dpctl.memory as dpmem
4042

4143

44+
cdef bint _IS_LINUX = sys_platform() == "Linux"
45+
46+
del sys_platform
47+
4248
cdef extern from 'dlpack/dlpack.h' nogil:
4349
cdef int DLPACK_VERSION
4450

@@ -140,6 +146,7 @@ cpdef to_dlpack_capsule(usm_ndarray usm_ary) except+:
140146
cdef c_dpctl.SyclQueue ary_sycl_queue
141147
cdef c_dpctl.SyclDevice ary_sycl_device
142148
cdef DPCTLSyclDeviceRef pDRef = NULL
149+
cdef DPCTLSyclDeviceRef tDRef = NULL
143150
cdef DLManagedTensor *dlm_tensor = NULL
144151
cdef DLTensor *dl_tensor = NULL
145152
cdef int nd = usm_ary.get_ndim()
@@ -157,19 +164,45 @@ cpdef to_dlpack_capsule(usm_ndarray usm_ary) except+:
157164
ary_sycl_queue = usm_ary.get_sycl_queue()
158165
ary_sycl_device = ary_sycl_queue.get_sycl_device()
159166

160-
# check that ary_sycl_device is a non-partitioned device
161-
pDRef = DPCTLDevice_GetParentDevice(ary_sycl_device.get_device_ref())
162-
if pDRef is not NULL:
163-
DPCTLDevice_Delete(pDRef)
164-
raise DLPackCreationError(
165-
"to_dlpack_capsule: DLPack can only export arrays allocated on "
166-
"non-partitioned SYCL devices."
167-
)
168-
default_context = dpctl.SyclQueue(ary_sycl_device).sycl_context
169-
if not usm_ary.sycl_context == default_context:
167+
try:
168+
if _IS_LINUX:
169+
default_context = ary_sycl_device.sycl_platform.default_context
170+
else:
171+
default_context = None
172+
except RuntimeError:
173+
# RT does not support default_context, e.g. Windows
174+
default_context = None
175+
if default_context is None:
176+
# check that ary_sycl_device is a non-partitioned device
177+
pDRef = DPCTLDevice_GetParentDevice(ary_sycl_device.get_device_ref())
178+
if pDRef is not NULL:
179+
DPCTLDevice_Delete(pDRef)
180+
raise DLPackCreationError(
181+
"to_dlpack_capsule: DLPack can only export arrays allocated "
182+
"on non-partitioned SYCL devices on platforms where "
183+
"default_context oneAPI extension is not supported."
184+
)
185+
else:
186+
if not usm_ary.sycl_context == default_context:
187+
raise DLPackCreationError(
188+
"to_dlpack_capsule: DLPack can only export arrays based on USM "
189+
"allocations bound to a default platform SYCL context"
190+
)
191+
# Find the unpartitioned parent of the allocation device
192+
pDRef = DPCTLDevice_GetParentDevice(ary_sycl_device.get_device_ref())
193+
if pDRef is not NULL:
194+
tDRef = DPCTLDevice_GetParentDevice(pDRef)
195+
while tDRef is not NULL:
196+
DPCTLDevice_Delete(pDRef)
197+
pDRef = tDRef
198+
tDRef = DPCTLDevice_GetParentDevice(pDRef)
199+
ary_sycl_device = c_dpctl.SyclDevice._create(pDRef)
200+
201+
# Find ordinal number of the parent device
202+
device_id = ary_sycl_device.get_overall_ordinal()
203+
if device_id < 0:
170204
raise DLPackCreationError(
171-
"to_dlpack_capsule: DLPack can only export arrays based on USM "
172-
"allocations bound to a default platform SYCL context"
205+
"to_dlpack_capsule: failed to determine device_id"
173206
)
174207

175208
dlm_tensor = <DLManagedTensor *> stdlib.malloc(
@@ -192,14 +225,6 @@ cpdef to_dlpack_capsule(usm_ndarray usm_ary) except+:
192225
for i in range(nd):
193226
shape_strides_ptr[nd + i] = strides_ptr[i]
194227

195-
device_id = ary_sycl_device.get_overall_ordinal()
196-
if device_id < 0:
197-
stdlib.free(shape_strides_ptr)
198-
stdlib.free(dlm_tensor)
199-
raise DLPackCreationError(
200-
"to_dlpack_capsule: failed to determine device_id"
201-
)
202-
203228
ary_dt = usm_ary.dtype
204229
ary_dtk = ary_dt.kind
205230
element_offset = usm_ary.get_offset()
@@ -278,15 +303,16 @@ cpdef usm_ndarray from_dlpack_capsule(object py_caps) except +:
278303
success.
279304
Raises:
280305
TypeError: if argument is not a "dltensor" capsule.
281-
ValueError: if argument is "used_dltensor" capsule,
282-
if the USM pointer is not bound to the reconstructed
306+
ValueError: if argument is "used_dltensor" capsule
307+
BufferError: if the USM pointer is not bound to the reconstructed
283308
sycl context, or the DLPack's device_type is not supported
284309
by dpctl.
285310
"""
286311
cdef DLManagedTensor *dlm_tensor = NULL
287312
cdef bytes usm_type
288313
cdef size_t sz = 1
289314
cdef int i
315+
cdef int device_id = -1
290316
cdef int element_bytesize = 0
291317
cdef Py_ssize_t offset_min = 0
292318
cdef Py_ssize_t offset_max = 0
@@ -308,26 +334,40 @@ cpdef usm_ndarray from_dlpack_capsule(object py_caps) except +:
308334
py_caps, "dltensor")
309335
# Verify that we can work with this device
310336
if dlm_tensor.dl_tensor.device.device_type == kDLOneAPI:
311-
q = dpctl.SyclQueue(str(<int>dlm_tensor.dl_tensor.device.device_id))
337+
device_id = dlm_tensor.dl_tensor.device.device_id
338+
root_device = dpctl.SyclDevice(str(<int>device_id))
339+
try:
340+
if _IS_LINUX:
341+
default_context = root_device.sycl_platform.default_context
342+
else:
343+
default_context = dpctl.SyclQueue(root_device).sycl_context
344+
except RuntimeError:
345+
default_context = dpctl.SyclQueue(root_device).sycl_context
312346
if dlm_tensor.dl_tensor.data is NULL:
313347
usm_type = b"device"
348+
q = dpctl.SyclQueue(default_context, root_device)
314349
else:
315350
usm_type = c_dpmem._Memory.get_pointer_type(
316351
<DPCTLSyclUSMRef> dlm_tensor.dl_tensor.data,
317-
<c_dpctl.SyclContext>q.sycl_context)
318-
if usm_type == b"unknown":
319-
raise ValueError(
320-
f"Data pointer in DLPack is not bound to default sycl "
321-
"context of device '{device_id}', translated to "
322-
"{q.sycl_device.filter_string}"
352+
<c_dpctl.SyclContext>default_context)
353+
if usm_type == b"unknown":
354+
raise BufferError(
355+
"Data pointer in DLPack is not bound to default sycl "
356+
f"context of device '{device_id}', translated to "
357+
f"{root_device.filter_string}"
358+
)
359+
alloc_device = c_dpmem._Memory.get_pointer_device(
360+
<DPCTLSyclUSMRef> dlm_tensor.dl_tensor.data,
361+
<c_dpctl.SyclContext>default_context
323362
)
363+
q = dpctl.SyclQueue(default_context, alloc_device)
324364
if dlm_tensor.dl_tensor.dtype.bits % 8:
325-
raise ValueError(
365+
raise BufferError(
326366
"Can not import DLPack tensor whose element's "
327367
"bitsize is not a multiple of 8"
328368
)
329369
if dlm_tensor.dl_tensor.dtype.lanes != 1:
330-
raise ValueError(
370+
raise BufferError(
331371
"Can not import DLPack tensor with lanes != 1"
332372
)
333373
if dlm_tensor.dl_tensor.strides is NULL:

dpctl/tests/_device_attributes_checks.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,11 @@ def check_max_num_sub_groups(device):
115115
assert max_num_sub_groups > 0
116116

117117

118+
def check_sub_group_sizes(device):
119+
sg_sizes = device.sub_group_sizes
120+
assert all(el > 0 for el in sg_sizes)
121+
122+
118123
def check_has_aspect_host(device):
119124
try:
120125
device.has_aspect_host
@@ -605,6 +610,7 @@ def check_global_mem_cache_line_size(device):
605610
check_max_work_item_sizes,
606611
check_max_work_group_size,
607612
check_max_num_sub_groups,
613+
check_sub_group_sizes,
608614
check_is_accelerator,
609615
check_is_cpu,
610616
check_is_gpu,

examples/pybind11/use_dpctl_syclqueue/tests/test_queue_device.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,11 @@ def test_offload_array_mod():
5555
Ynp = X % modulus_p
5656

5757
assert np.array_equal(Y, Ynp)
58+
59+
60+
def test_get_sub_group_sizes():
61+
d = dpctl.SyclDevice()
62+
szs = uqd.get_sub_group_sizes(d)
63+
assert type(szs) is list
64+
assert all(type(el) is int for el in szs)
65+
szs == d.sub_group_sizes

examples/pybind11/use_dpctl_syclqueue/use_queue_device/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
get_device_global_mem_size,
2121
get_device_local_mem_size,
2222
get_max_compute_units,
23+
get_sub_group_sizes,
2324
offloaded_array_mod,
2425
)
2526

@@ -28,6 +29,7 @@
2829
"get_device_global_mem_size",
2930
"get_device_local_mem_size",
3031
"offloaded_array_mod",
32+
"get_sub_group_sizes",
3133
]
3234

3335
__doc__ = """

examples/pybind11/use_dpctl_syclqueue/use_queue_device/_example.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include <cstdint>
3232
#include <pybind11/numpy.h>
3333
#include <pybind11/pybind11.h>
34+
#include <pybind11/stl.h>
3435

3536
namespace py = pybind11;
3637

@@ -84,6 +85,11 @@ offloaded_array_mod(sycl::queue q,
8485
return res;
8586
}
8687

88+
std::vector<std::size_t> get_sub_group_sizes(const sycl::device &d)
89+
{
90+
return d.get_info<sycl::info::device::sub_group_sizes>();
91+
}
92+
8793
PYBIND11_MODULE(_use_queue_device, m)
8894
{
8995
m.def(
@@ -100,4 +106,6 @@ PYBIND11_MODULE(_use_queue_device, m)
100106
"Computes amount of local memory of the given dpctl.SyclDevice");
101107
m.def("offloaded_array_mod", &offloaded_array_mod,
102108
"Compute offloaded modular reduction of integer-valued NumPy array");
109+
m.def("get_sub_group_sizes", &get_sub_group_sizes,
110+
"Gets info::device::sub_group_sizes property of given device");
103111
}

libsyclinterface/include/dpctl_sycl_device_interface.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,4 +651,17 @@ DPCTL_API
651651
DPCTLGlobalMemCacheType
652652
DPCTLDevice_GetGlobalMemCacheType(__dpctl_keep const DPCTLSyclDeviceRef DRef);
653653

654+
/*!
655+
* @brief Wrapper for get_info<info::device::sub_group_sizes>().
656+
*
657+
* @param DRef Opaque pointer to a ``sycl::device``
658+
* @param res_len Populated with size of the returned array
659+
* @return Returns the valid result if device exists else returns NULL.
660+
* @ingroup DeviceInterface
661+
*/
662+
DPCTL_API
663+
__dpctl_keep size_t *
664+
DPCTLDevice_GetSubGroupSizes(__dpctl_keep const DPCTLSyclDeviceRef DRef,
665+
size_t *res_len);
666+
654667
DPCTL_C_EXTERN_C_END

0 commit comments

Comments
 (0)