Skip to content

Commit 6ad1c5f

Browse files
committed
Support kDLCPU devices via DLPack protocol
Leverages NumPy to create an array with Python interpreter/host-accessible memory, which is required by the 2023.12 array API specification
1 parent 1ecd8a8 commit 6ad1c5f

File tree

3 files changed

+216
-10
lines changed

3 files changed

+216
-10
lines changed

dpctl/tensor/_dlpack.pxd

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
# cython: language_level=3
1919
# cython: linetrace=True
2020

21+
from numpy cimport ndarray
22+
2123
from .._sycl_device cimport SyclDevice
2224
from ._usmarray cimport usm_ndarray
2325

@@ -40,7 +42,9 @@ cdef extern from 'dlpack/dlpack.h' nogil:
4042

4143
cpdef object to_dlpack_capsule(usm_ndarray array) except +
4244
cpdef object to_dlpack_versioned_capsule(usm_ndarray array, bint copied) except +
43-
cpdef usm_ndarray from_dlpack_capsule(object dltensor) except +
45+
cpdef object numpy_to_dlpack_versioned_capsule(ndarray array, bint copied) except +
46+
cpdef object from_dlpack_capsule(object dltensor) except +
47+
cpdef object from_dlpack_versioned_capsule(object dltensor) except +
4448

4549
cdef int get_parent_device_ordinal_id(SyclDevice dev) except *
4650

dpctl/tensor/_dlpack.pyx

Lines changed: 192 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
cimport cpython
2222
from libc cimport stdlib
2323
from libc.stdint cimport int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t
24+
from numpy cimport ndarray
2425

2526
cimport dpctl as c_dpctl
2627
cimport dpctl.memory as c_dpmem
@@ -34,6 +35,8 @@ from .._backend cimport (
3435
)
3536
from ._usmarray cimport USM_ARRAY_C_CONTIGUOUS, USM_ARRAY_WRITABLE, usm_ndarray
3637

38+
import ctypes
39+
3740
import numpy as np
3841

3942
import dpctl
@@ -475,6 +478,108 @@ cpdef to_dlpack_versioned_capsule(usm_ndarray usm_ary, bint copied):
475478
return cpython.PyCapsule_New(dlmv_tensor, 'dltensor_versioned', _pycapsule_versioned_deleter)
476479

477480

481+
cpdef numpy_to_dlpack_versioned_capsule(ndarray npy_ary, bint copied):
482+
"""
483+
to_dlpack_versioned_capsule(npy_ary, copied)
484+
485+
Constructs named Python capsule object referencing
486+
instance of ``DLManagedTensorVersioned`` from
487+
:class:`numpy.ndarray` instance.
488+
489+
Args:
490+
npy_ary: An instance of :class:`numpy.ndarray`
491+
copied: A bint representing whether the data was previously
492+
copied in order to set the flags with the is-copied
493+
bitmask.
494+
Returns:
495+
A new capsule with name ``"dltensor_versioned"`` that
496+
contains a pointer to ``DLManagedTensorVersioned`` struct.
497+
Raises:
498+
DLPackCreationError: when array can be represented as
499+
DLPack tensor.
500+
MemoryError: when host allocation to needed for
501+
``DLManagedTensorVersioned`` did not succeed.
502+
ValueError: when array elements data type could not be represented
503+
in ``DLManagedTensorVersioned``.
504+
"""
505+
cdef DLManagedTensorVersioned *dlmv_tensor = NULL
506+
cdef DLTensor *dl_tensor = NULL
507+
cdef uint32_t dlmv_flags = 0
508+
cdef int nd = npy_ary.ndim
509+
cdef Py_ssize_t *shape_ptr = NULL
510+
cdef Py_ssize_t *strides_ptr = NULL
511+
cdef int64_t *shape_strides_ptr = NULL
512+
cdef int i = 0
513+
cdef int device_id = -1
514+
cdef Py_ssize_t byte_offset = 0
515+
516+
dlmv_tensor = <DLManagedTensorVersioned *> stdlib.malloc(
517+
sizeof(DLManagedTensorVersioned))
518+
if dlmv_tensor is NULL:
519+
raise MemoryError(
520+
"to_dlpack_versioned_capsule: Could not allocate memory "
521+
"for DLManagedTensorVersioned"
522+
)
523+
shape_strides_ptr = <int64_t *>stdlib.malloc((sizeof(int64_t) * 2) * nd)
524+
if shape_strides_ptr is NULL:
525+
stdlib.free(dlmv_tensor)
526+
raise MemoryError(
527+
"to_dlpack_versioned_capsule: Could not allocate memory "
528+
"for shape/strides"
529+
)
530+
# this can be a separate function for handling shapes and strides
531+
shape = npy_ary.ctypes.shape_as(ctypes.c_int64)
532+
strides = npy_ary.ctypes.strides_as(ctypes.c_int64)
533+
for i in range(nd):
534+
shape_strides_ptr[i] = shape[i]
535+
shape_strides_ptr[nd + i] = strides[i] // npy_ary.itemsize
536+
writable_flag = npy_ary.flags["W"]
537+
538+
ary_dt = npy_ary.dtype
539+
ary_dtk = ary_dt.kind
540+
541+
dl_tensor = &dlmv_tensor.dl_tensor
542+
dl_tensor.data = <void *> npy_ary.data
543+
dl_tensor.ndim = nd
544+
dl_tensor.byte_offset = <uint64_t>byte_offset
545+
dl_tensor.shape = &shape_strides_ptr[0]
546+
dl_tensor.strides = &shape_strides_ptr[nd]
547+
dl_tensor.device.device_type = kDLCPU
548+
dl_tensor.device.device_id = 0
549+
dl_tensor.dtype.lanes = <uint16_t>1
550+
dl_tensor.dtype.bits = <uint8_t>(ary_dt.itemsize * 8)
551+
if (ary_dtk == "b"):
552+
dl_tensor.dtype.code = <uint8_t>kDLBool
553+
elif (ary_dtk == "u"):
554+
dl_tensor.dtype.code = <uint8_t>kDLUInt
555+
elif (ary_dtk == "i"):
556+
dl_tensor.dtype.code = <uint8_t>kDLInt
557+
elif (ary_dtk == "f" and ary_dt.itemsize <= 8):
558+
dl_tensor.dtype.code = <uint8_t>kDLFloat
559+
elif (ary_dtk == "c" and ary_dt.itemsize <= 16):
560+
dl_tensor.dtype.code = <uint8_t>kDLComplex
561+
else:
562+
stdlib.free(shape_strides_ptr)
563+
stdlib.free(dlmv_tensor)
564+
raise ValueError("Unrecognized array data type")
565+
566+
# set flags down here
567+
if copied:
568+
dlmv_flags |= DLPACK_FLAG_BITMASK_IS_COPIED
569+
if not writable_flag:
570+
dlmv_flags |= DLPACK_FLAG_BITMASK_READ_ONLY
571+
dlmv_tensor.flags = dlmv_flags
572+
573+
dlmv_tensor.version.major = DLPACK_MAJOR_VERSION
574+
dlmv_tensor.version.minor = DLPACK_MINOR_VERSION
575+
576+
dlmv_tensor.manager_ctx = <void*>npy_ary
577+
cpython.Py_INCREF(npy_ary)
578+
dlmv_tensor.deleter = _managed_tensor_versioned_deleter
579+
580+
return cpython.PyCapsule_New(dlmv_tensor, 'dltensor_versioned', _pycapsule_versioned_deleter)
581+
582+
478583
cdef class _DLManagedTensorOwner:
479584
"""
480585
Helper class managing the lifetime of the DLManagedTensor struct
@@ -519,9 +624,81 @@ cdef class _DLManagedTensorVersionedOwner:
519624
return res
520625

521626

522-
cpdef usm_ndarray from_dlpack_capsule(object py_caps):
627+
cdef dict _numpy_array_interface_from_dl_tensor(DLTensor dlt, bint ro_flag):
628+
"""Constructs a NumPy `__array_interface__` dictionary from a DLTensor."""
629+
cdef int i = 0
630+
cdef int itemsize = 0
631+
632+
if dlt.dtype.lanes != 1:
633+
raise BufferError(
634+
"Can not import DLPack tensor with lanes != 1"
635+
)
636+
itemsize = dlt.dtype.bits // 8
637+
shape = list()
638+
if (dlt.strides is NULL):
639+
strides = None
640+
for dim in range(dlt.ndim):
641+
shape.append(dlt.shape[dim])
642+
else:
643+
strides = list()
644+
for dim in range(dlt.ndim):
645+
shape.append(dlt.shape[dim])
646+
# convert to byte-strides
647+
strides.append(dlt.strides[dim] * itemsize)
648+
strides = tuple(strides)
649+
shape = tuple(shape)
650+
if (dlt.dtype.code == kDLUInt):
651+
ary_dt = "u" + str(itemsize)
652+
elif (dlt.dtype.code == kDLInt):
653+
ary_dt = "i" + str(itemsize)
654+
elif (dlt.dtype.code == kDLFloat):
655+
ary_dt = "f" + str(itemsize)
656+
elif (dlt.dtype.code == kDLComplex):
657+
ary_dt = "c" + str(itemsize)
658+
elif (dlt.dtype.code == kDLBool):
659+
ary_dt = np.dtype("?")
660+
else:
661+
raise BufferError(
662+
"Can not import DLPack tensor with type code {}.".format(
663+
<object>dlt.dtype.code
664+
)
665+
)
666+
typestr = "|" + ary_dt
667+
return dict(
668+
version=3,
669+
shape=shape,
670+
strides=strides,
671+
data=(<size_t> dlt.data, True if ro_flag else False),
672+
offset=dlt.byte_offset,
673+
typestr=typestr,
674+
)
675+
676+
677+
class _numpy_array_interface_wrapper:
678+
"""
679+
Class that wraps a Python capsule and dictionary for consumption by NumPy.
680+
681+
Implementation taken from
682+
https://github.com/dmlc/dlpack/blob/main/apps/numpy_dlpack/dlpack/to_numpy.py
683+
684+
Args:
685+
array_interface:
686+
A dictionary describing the underlying memory. Formatted
687+
to match `numpy.ndarray.__array_interface__`.
688+
689+
pycapsule:
690+
A Python capsule wrapping the dlpack tensor that will be
691+
converted to numpy.
523692
"""
524-
from_dlpack_capsule(caps)
693+
694+
def __init__(self, array_interface, pycapsule) -> None:
695+
self.__array_interface__ = array_interface
696+
self._pycapsule = pycapsule
697+
698+
699+
cpdef object from_dlpack_capsule(object py_caps):
700+
"""
701+
from_dlpack_capsule(py_caps)
525702
526703
Reconstructs instance of :class:`dpctl.tensor.usm_ndarray` from
527704
named Python capsule object referencing instance of ``DLManagedTensor``
@@ -693,15 +870,20 @@ cpdef usm_ndarray from_dlpack_capsule(object py_caps):
693870
offset=element_offset
694871
)
695872
return res_ary
873+
elif dlm_tensor.dl_tensor.device.device_type == kDLCPU:
874+
ary_iface = _numpy_array_interface_from_dl_tensor(dlm_tensor.dl_tensor, False)
875+
dlm_holder = _DLManagedTensorOwner._create(dlm_tensor)
876+
cpython.PyCapsule_SetName(py_caps, 'used_dltensor')
877+
return np.ctypeslib.as_array(_numpy_array_interface_wrapper(ary_iface, py_caps))
696878
else:
697879
raise BufferError(
698880
"The DLPack tensor resides on unsupported device."
699881
)
700882

701883

702-
cpdef usm_ndarray from_dlpack_versioned_capsule(object py_caps):
884+
cpdef object from_dlpack_versioned_capsule(object py_caps):
703885
"""
704-
from_dlpack_versioned_capsule(caps)
886+
from_dlpack_versioned_capsule(py_caps)
705887
706888
Reconstructs instance of :class:`dpctl.tensor.usm_ndarray` from
707889
named Python capsule object referencing instance of
@@ -883,6 +1065,12 @@ cpdef usm_ndarray from_dlpack_versioned_capsule(object py_caps):
8831065
if (dlmv_tensor.flags & DLPACK_FLAG_BITMASK_READ_ONLY):
8841066
res_ary.flags_ = (res_ary.flags_ & ~USM_ARRAY_WRITABLE)
8851067
return res_ary
1068+
elif dlmv_tensor.dl_tensor.device.device_type == kDLCPU:
1069+
ro_flag = dlmv_tensor.flags & DLPACK_FLAG_BITMASK_READ_ONLY
1070+
ary_iface = _numpy_array_interface_from_dl_tensor(dlmv_tensor.dl_tensor, ro_flag)
1071+
dlmv_holder = _DLManagedTensorVersionedOwner._create(dlmv_tensor)
1072+
cpython.PyCapsule_SetName(py_caps, 'used_dltensor_versioned')
1073+
return np.ctypeslib.as_array(_numpy_array_interface_wrapper(ary_iface, py_caps))
8861074
else:
8871075
raise BufferError(
8881076
"The DLPack tensor resides on unsupported device."

dpctl/tensor/_usmarray.pyx

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1142,13 +1142,27 @@ cdef class usm_ndarray:
11421142
dpctl_dlpack_version = get_build_dlpack_version()
11431143
if max_version[0] >= dpctl_dlpack_version[0]:
11441144
# DLManagedTensorVersioned path
1145-
# TODO: add logic for targeting a device
11461145
if dl_device is not None:
11471146
if dl_device != self.__dlpack_device__():
1148-
raise NotImplementedError(
1149-
"targeting a device with `__dlpack__` is not "
1150-
"currently implemented"
1151-
)
1147+
if copy == False:
1148+
raise BufferError(
1149+
"array cannot be placed on the requested device without a copy"
1150+
)
1151+
if dl_device[0] == (DLDeviceType.kDLCPU):
1152+
assert dl_device[1] == 0
1153+
if stream is not None:
1154+
raise ValueError(
1155+
"`stream` must be `None` when `dl_device` is of type `kDLCPU`"
1156+
)
1157+
from ._copy_utils import _copy_to_numpy
1158+
_arr = _copy_to_numpy(self)
1159+
_arr.flags["W"] = self.flags["W"]
1160+
return c_dlpack.numpy_to_dlpack_versioned_capsule(_arr, True)
1161+
else:
1162+
raise NotImplementedError(
1163+
f"targeting `dl_device` {dl_device} with `__dlpack__` is not "
1164+
"yet implemented"
1165+
)
11521166
if copy is None:
11531167
copy = False
11541168
# TODO: strategy for handling stream on different device from dl_device

0 commit comments

Comments
 (0)