Skip to content

Commit 0714fee

Browse files
Memory class exposes __sycl_usm_array_interface__
1. Memory class exposes the interface 2. Memory variants constructors can consum objects exposing the said interface, and take over memory zero copy. 3. Class implements memInst.copy_to_host(pyobj=None) If `pyobj` supports Python's buffer protocol, content of USM memory in the instance is copied to the host buffer. Otherwise, bytearray is allocated, populated and returned memInst.copy_from_host(pyobj) Copies buffer of `pyobj` into USM memory of the instance. Raises exception if pyobj is not a byte array memInst.copy_from_device(sycl_usm_obj) Copies USM memory of sycl_usm_obj exposing __sycl_usm_array_interface__ into USM memory of the instance 4. Class is pickleable 5. Class implements tobytes method that produces bytes object populated by the content of USM memory. Methods are currently not releasing GIL, but I think they should.
1 parent 7d548cd commit 0714fee

File tree

3 files changed

+321
-27
lines changed

3 files changed

+321
-27
lines changed

dpctl/_memory.pxd

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,19 @@ cdef class Memory:
2929
cdef DPPLSyclUSMRef memory_ptr
3030
cdef Py_ssize_t nbytes
3131
cdef SyclQueue queue
32+
cdef object refobj
3233

33-
cdef _cinit(self, Py_ssize_t nbytes, ptr_type, SyclQueue queue)
34+
cdef _cinit_empty(self)
35+
cdef _cinit_alloc(self, Py_ssize_t nbytes, bytes ptr_type, SyclQueue queue)
36+
cdef _cinit_other(self, object other)
3437
cdef _getbuffer(self, Py_buffer *buffer, int flags)
3538

39+
cpdef copy_to_host(self, object obj=*)
40+
cpdef copy_from_host(self, object obj)
41+
cpdef copy_from_device(self, object obj)
42+
43+
cpdef bytes tobytes(self)
44+
3645

3746
cdef class MemoryUSMShared(Memory):
3847
pass

dpctl/_memory.pyx

Lines changed: 264 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -31,32 +31,131 @@
3131
import dpctl
3232
from dpctl._backend cimport *
3333
from ._sycl_core cimport SyclContext, SyclQueue
34+
from ._sycl_core cimport get_current_queue
3435

3536
from cpython cimport Py_buffer
37+
from cpython.bytes cimport PyBytes_AS_STRING, PyBytes_FromStringAndSize
3638

39+
import numpy as np
3740

38-
cdef class Memory:
41+
cdef _throw_sycl_usm_ary_iface():
42+
raise ValueError("__sycl_usm_array_interface__ is malformed")
3943

40-
cdef _cinit(self, Py_ssize_t nbytes, ptr_type, SyclQueue queue):
41-
cdef DPPLSyclUSMRef p
4244

45+
cdef void copy_via_host(void *dest_ptr, SyclQueue dest_queue,
46+
void *src_ptr, SyclQueue src_queue, size_t nbytes):
47+
"""
48+
Copies `nbytes` bytes from `src_ptr` USM memory to
49+
`dest_ptr` USM memory using host as the intemediary.
50+
51+
This is useful when `src_ptr` and `dest_ptr` are bound to incompatible
52+
SYCL contexts.
53+
"""
54+
cdef unsigned char[::1] host_buf = bytearray(nbytes)
55+
56+
DPPLQueue_Memcpy(
57+
src_queue.get_queue_ref(),
58+
<void *>&host_buf[0],
59+
src_ptr,
60+
nbytes
61+
)
62+
63+
DPPLQueue_Memcpy(
64+
dest_queue.get_queue_ref(),
65+
dest_ptr,
66+
<void *>&host_buf[0],
67+
nbytes
68+
)
69+
70+
71+
cdef class _BufferData:
72+
cdef DPPLSyclUSMRef p
73+
cdef int writeable
74+
cdef object dt
75+
cdef Py_ssize_t itemsize
76+
cdef Py_ssize_t nbytes
77+
cdef SyclQueue queue
78+
79+
@staticmethod
80+
cdef _BufferData from_sycl_usm_ary_iface(dict ary_iface):
81+
cdef object ary_data_tuple = ary_iface.get('data', None)
82+
cdef object ary_typestr = ary_iface.get('typestr', None)
83+
cdef object ary_shape = ary_iface.get('shape', None)
84+
cdef object ary_strides = ary_iface.get('strides', None)
85+
cdef object ary_syclobj = ary_iface.get('syclobj', None)
86+
cdef Py_ssize_t ary_offset = ary_iface.get('offset', 0)
87+
cdef int ary_version = ary_iface.get('version', 0)
88+
cdef object dt
89+
cdef _BufferData buf
90+
cdef Py_ssize_t arr_data_ptr
91+
92+
if ary_version != 1:
93+
_throw_sycl_usm_ary_iface()
94+
if not ary_data_tuple or len(ary_data_tuple) != 2:
95+
_throw_sycl_usm_ary_iface()
96+
if not ary_shape or len(ary_shape) != 1 or ary_shape[0] < 1:
97+
raise ValueError
98+
try:
99+
dt = np.dtype(ary_typestr)
100+
except TypeError:
101+
_throw_sycl_usm_ary_iface()
102+
if ary_strides and len(ary_strides) != dt.itemsize:
103+
raise ValueError("Must be contiguous")
104+
105+
if not ary_syclobj or not isinstance(ary_syclobj,
106+
(dpctl.SyclQueue, dpctl.SyclContext)):
107+
_throw_sycl_usm_ary_iface()
108+
109+
buf = _BufferData.__new__(_BufferData)
110+
arr_data_ptr = <Py_ssize_t>ary_data_tuple[0]
111+
buf.p = <DPPLSyclUSMRef>(<void*>arr_data_ptr)
112+
buf.writeable = 1 if ary_data_tuple[1] else 0
113+
buf.itemsize = <Py_ssize_t>(dt.itemsize)
114+
buf.nbytes = (<Py_ssize_t>ary_shape[0]) * buf.itemsize
115+
116+
if isinstance(ary_syclobj, dpctl.SyclQueue):
117+
buf.queue = <SyclQueue>ary_syclobj
118+
else:
119+
# FIXME: need a way to construct a queue from
120+
buf.queue = get_current_queue()
121+
122+
return buf
123+
124+
125+
def _to_memory(unsigned char [::1] b):
126+
"""Constructs Memory of the same size as the argument and
127+
copies data into it"""
128+
cdef Memory res = MemoryUSMShared(len(b))
129+
res.copy_from_host(b)
130+
131+
return res
132+
133+
134+
cdef class Memory:
135+
cdef _cinit_empty(self):
43136
self.memory_ptr = NULL
44137
self.nbytes = 0
45138
self.queue = None
139+
self.refobj = None
140+
141+
cdef _cinit_alloc(self, Py_ssize_t nbytes, bytes ptr_type, SyclQueue queue):
142+
cdef DPPLSyclUSMRef p
143+
144+
self._cinit_empty()
46145

47146
if (nbytes > 0):
48147
if queue is None:
49-
queue = dpctl.get_current_queue()
148+
queue = get_current_queue()
50149

51-
if (ptr_type == "shared"):
150+
if (ptr_type == b"shared"):
52151
p = DPPLmalloc_shared(nbytes, queue.get_queue_ref())
53-
elif (ptr_type == "host"):
152+
elif (ptr_type == b"host"):
54153
p = DPPLmalloc_host(nbytes, queue.get_queue_ref())
55-
elif (ptr_type == "device"):
154+
elif (ptr_type == b"device"):
56155
p = DPPLmalloc_device(nbytes, queue.get_queue_ref())
57156
else:
58157
raise RuntimeError("Pointer type is unknown: {}" \
59-
.format(ptr_type))
158+
.format(ptr_type.decode("UTF-8")))
60159

61160
if (p):
62161
self.memory_ptr = p
@@ -67,13 +166,32 @@ cdef class Memory:
67166
else:
68167
raise ValueError("Non-positive number of bytes found.")
69168

169+
cdef _cinit_other(self, object other):
170+
if hasattr(other, '__sycl_usm_array_interface__'):
171+
other_iface = other.__sycl_usm_array_interface__
172+
if isinstance(other_iface, dict):
173+
other_buf = _BufferData.from_sycl_usm_ary_iface(other_iface)
174+
self.memory_ptr = other_buf.p
175+
self.nbytes = other_buf.nbytes
176+
self.queue = other_buf.queue
177+
# self.writeable = other_buf.writeable
178+
self.refobj = other
179+
else:
180+
raise ValueError(
181+
"Argument {} does not correctly expose"
182+
"`__sycl_usm_array_interface__`.".format(other)
183+
)
184+
else:
185+
raise ValueError(
186+
"Argument {} does not expose "
187+
"`__sycl_usm_array_interface__`.".format(other)
188+
)
189+
70190
def __dealloc__(self):
71-
if (self.memory_ptr):
191+
if (self.refobj is None and self.memory_ptr):
72192
DPPLfree_with_queue(self.memory_ptr,
73193
self.queue.get_queue_ref())
74-
self.memory_ptr = NULL
75-
self.nbytes = 0
76-
self.queue = None
194+
self._cinit_empty()
77195

78196
cdef _getbuffer(self, Py_buffer *buffer, int flags):
79197
# memory_ptr is Ref which is pointer to SYCL type. For USM it is void*.
@@ -93,6 +211,10 @@ cdef class Memory:
93211
def __get__(self):
94212
return self.nbytes
95213

214+
property size:
215+
def __get__(self):
216+
return self.nbytes
217+
96218
property _pointer:
97219
def __get__(self):
98220
return <size_t>(self.memory_ptr)
@@ -105,11 +227,40 @@ cdef class Memory:
105227
def __get__(self):
106228
return self.queue
107229

230+
property reference_obj:
231+
def __get__(self):
232+
return self.refobj
233+
108234
def __repr__(self):
109235
return "<Intel(R) USM allocated memory block of {} bytes at {}>" \
110236
.format(self.nbytes, hex(<object>(<Py_ssize_t>self.memory_ptr)))
111237

112-
def _usm_type(self, syclobj=None):
238+
def __len__(self):
239+
return self.nbytes
240+
241+
def __sizeof__(self):
242+
return self.nbytes
243+
244+
def __bytes__(self):
245+
return self.tobytes()
246+
247+
def __reduce__(self):
248+
return _to_memory, (self.copy_to_host(), )
249+
250+
property __sycl_usm_array_interface__:
251+
def __get__ (self):
252+
cdef dict iface = {
253+
"data": (<Py_ssize_t>(<void *>self.memory_ptr),
254+
True), # bool(self.writeable)),
255+
"shape": (self.nbytes,),
256+
"strides": None,
257+
"typestr": "|u1",
258+
"version": 1,
259+
"syclobj": self.queue
260+
}
261+
return iface
262+
263+
def get_usm_type(self, syclobj=None):
113264
cdef const char* kind
114265
cdef SyclContext ctx
115266
cdef SyclQueue q
@@ -131,26 +282,120 @@ cdef class Memory:
131282
"or an instance of SyclConext or SyclQueue")
132283
return kind.decode('UTF-8')
133284

285+
cpdef copy_to_host (self, obj=None):
286+
"""Copy content of instance's memory into memory of
287+
`obj`, or allocate NumPy array of obj is None"""
288+
# Cython does the right thing here
289+
cdef unsigned char[::1] host_buf = obj
290+
291+
if (host_buf is None):
292+
# Python object did not have buffer interface
293+
# allocate new memory
294+
obj = np.empty((self.nbytes,), dtype="|u1")
295+
host_buf = obj
296+
elif (<Py_ssize_t>len(host_buf) < self.nbytes):
297+
raise ValueError("Destination object is too small to "
298+
"accommodate {} bytes".format(self.nbytes))
299+
# call kernel to copy from
300+
DPPLQueue_Memcpy(
301+
self.queue.get_queue_ref(),
302+
<void *>&host_buf[0], # destination
303+
<void *>self.memory_ptr, # source
304+
<size_t>self.nbytes
305+
)
306+
307+
return obj
308+
309+
cpdef copy_from_host (self, object obj):
310+
"""Copy contant of Python buffer provided by `obj` to instance memory."""
311+
cdef const unsigned char[::1] host_buf = obj
312+
cdef Py_ssize_t buf_len = len(host_buf)
313+
314+
if (buf_len > self.nbytes):
315+
raise ValueError("Source object is too large to be "
316+
"accommodated in {} bytes buffer".format(self.nbytes))
317+
# call kernel to copy from
318+
DPPLQueue_Memcpy(
319+
self.queue.get_queue_ref(),
320+
<void *>self.memory_ptr, # destination
321+
<void *>&host_buf[0], # source
322+
<size_t>buf_len
323+
)
324+
325+
cpdef copy_from_device (self, object sycl_usm_ary):
326+
"""Copy SYCL memory underlying the argument object into
327+
the memory of the instance"""
328+
cdef _BufferData src_buf
329+
cdef const char* kind
330+
331+
if not hasattr(sycl_usm_ary, '__sycl_usm_array_interface__'):
332+
raise ValueError("Object does not implement "
333+
"`__sycl_usm_array_interface__` protocol")
334+
sycl_usm_ary_iface = sycl_usm_ary.__sycl_usm_array_interface__
335+
if isinstance(sycl_usm_ary_iface, dict):
336+
src_buf = _BufferData.from_sycl_usm_ary_iface(sycl_usm_ary_iface)
337+
338+
if (src_buf.nbytes > self.nbytes):
339+
raise ValueError("Source object is too large to "
340+
"be accommondated in {} bytes buffer".format(self.nbytes))
341+
kind = DPPLUSM_GetPointerType(
342+
src_buf.p, self.queue.get_sycl_context().get_context_ref())
343+
if (kind == b'unknown'):
344+
copy_via_host(
345+
<void *>self.memory_ptr, self.queue, # dest
346+
<void *>src_buf.p, src_buf.queue, # src
347+
<size_t>src_buf.nbytes
348+
)
349+
else:
350+
DPPLQueue_Memcpy(
351+
self.queue.get_queue_ref(),
352+
<void *>self.memory_ptr,
353+
<void *>src_buf.p,
354+
<size_t>src_buf.nbytes
355+
)
356+
else:
357+
raise TypeError
358+
359+
cpdef bytes tobytes (self):
360+
""""""
361+
cdef Py_ssize_t nb = self.nbytes
362+
cdef bytes b = PyBytes_FromStringAndSize(NULL, nb)
363+
# convert bytes to memory view
364+
cdef unsigned char* ptr = <unsigned char*>PyBytes_AS_STRING(b)
365+
# string is null terminated
366+
cdef unsigned char[::1] mv = (<unsigned char[:(nb + 1):1]>ptr)[:nb]
367+
self.copy_to_host(mv) # output is discarded
368+
return b
369+
134370

135371
cdef class MemoryUSMShared(Memory):
136372

137-
def __cinit__(self, Py_ssize_t nbytes, SyclQueue queue=None):
138-
self._cinit(nbytes, "shared", queue)
373+
def __cinit__(self, other, SyclQueue queue=None):
374+
if isinstance(other, int):
375+
self._cinit_alloc(<Py_ssize_t>other, b"shared", queue)
376+
else:
377+
self._cinit_other(other)
139378

140379
def __getbuffer__(self, Py_buffer *buffer, int flags):
141380
self._getbuffer(buffer, flags)
142381

143382

144383
cdef class MemoryUSMHost(Memory):
145384

146-
def __cinit__(self, Py_ssize_t nbytes, SyclQueue queue=None):
147-
self._cinit(nbytes, "host", queue)
385+
def __cinit__(self, other, SyclQueue queue=None):
386+
if isinstance(other, int):
387+
self._cinit_alloc(<Py_ssize_t>other, b"host", queue)
388+
else:
389+
self._cinit_other(other)
148390

149391
def __getbuffer__(self, Py_buffer *buffer, int flags):
150392
self._getbuffer(buffer, flags)
151393

152394

153395
cdef class MemoryUSMDevice(Memory):
154396

155-
def __cinit__(self, Py_ssize_t nbytes, SyclQueue queue=None):
156-
self._cinit(nbytes, "device", queue)
397+
def __cinit__(self, other, SyclQueue queue=None):
398+
if isinstance(other, int):
399+
self._cinit_alloc(<Py_ssize_t>other, b"device", queue)
400+
else:
401+
self._cinit_other(other)

0 commit comments

Comments
 (0)