31
31
import dpctl
32
32
from dpctl._backend cimport *
33
33
from ._sycl_core cimport SyclContext, SyclQueue
34
+ from ._sycl_core cimport get_current_queue
34
35
35
36
from cpython cimport Py_buffer
37
+ from cpython.bytes cimport PyBytes_AS_STRING, PyBytes_FromStringAndSize
36
38
39
+ import numpy as np
37
40
38
- cdef class Memory:
41
+ cdef _throw_sycl_usm_ary_iface():
42
+ raise ValueError (" __sycl_usm_array_interface__ is malformed" )
39
43
40
- cdef _cinit(self , Py_ssize_t nbytes, ptr_type, SyclQueue queue):
41
- cdef DPPLSyclUSMRef p
42
44
45
+ cdef void copy_via_host(void * dest_ptr, SyclQueue dest_queue,
46
+ void * src_ptr, SyclQueue src_queue, size_t nbytes):
47
+ """
48
+ Copies `nbytes` bytes from `src_ptr` USM memory to
49
+ `dest_ptr` USM memory using host as the intemediary.
50
+
51
+ This is useful when `src_ptr` and `dest_ptr` are bound to incompatible
52
+ SYCL contexts.
53
+ """
54
+ cdef unsigned char [::1 ] host_buf = bytearray(nbytes)
55
+
56
+ DPPLQueue_Memcpy(
57
+ src_queue.get_queue_ref(),
58
+ < void * > & host_buf[0 ],
59
+ src_ptr,
60
+ nbytes
61
+ )
62
+
63
+ DPPLQueue_Memcpy(
64
+ dest_queue.get_queue_ref(),
65
+ dest_ptr,
66
+ < void * > & host_buf[0 ],
67
+ nbytes
68
+ )
69
+
70
+
71
+ cdef class _BufferData:
72
+ cdef DPPLSyclUSMRef p
73
+ cdef int writeable
74
+ cdef object dt
75
+ cdef Py_ssize_t itemsize
76
+ cdef Py_ssize_t nbytes
77
+ cdef SyclQueue queue
78
+
79
+ @staticmethod
80
+ cdef _BufferData from_sycl_usm_ary_iface(dict ary_iface):
81
+ cdef object ary_data_tuple = ary_iface.get(' data' , None )
82
+ cdef object ary_typestr = ary_iface.get(' typestr' , None )
83
+ cdef object ary_shape = ary_iface.get(' shape' , None )
84
+ cdef object ary_strides = ary_iface.get(' strides' , None )
85
+ cdef object ary_syclobj = ary_iface.get(' syclobj' , None )
86
+ cdef Py_ssize_t ary_offset = ary_iface.get(' offset' , 0 )
87
+ cdef int ary_version = ary_iface.get(' version' , 0 )
88
+ cdef object dt
89
+ cdef _BufferData buf
90
+ cdef Py_ssize_t arr_data_ptr
91
+
92
+ if ary_version != 1 :
93
+ _throw_sycl_usm_ary_iface()
94
+ if not ary_data_tuple or len (ary_data_tuple) != 2 :
95
+ _throw_sycl_usm_ary_iface()
96
+ if not ary_shape or len (ary_shape) != 1 or ary_shape[0 ] < 1 :
97
+ raise ValueError
98
+ try :
99
+ dt = np.dtype(ary_typestr)
100
+ except TypeError :
101
+ _throw_sycl_usm_ary_iface()
102
+ if ary_strides and len (ary_strides) != dt.itemsize:
103
+ raise ValueError (" Must be contiguous" )
104
+
105
+ if not ary_syclobj or not isinstance (ary_syclobj,
106
+ (dpctl.SyclQueue, dpctl.SyclContext)):
107
+ _throw_sycl_usm_ary_iface()
108
+
109
+ buf = _BufferData.__new__ (_BufferData)
110
+ arr_data_ptr = < Py_ssize_t> ary_data_tuple[0 ]
111
+ buf.p = < DPPLSyclUSMRef> (< void * > arr_data_ptr)
112
+ buf.writeable = 1 if ary_data_tuple[1 ] else 0
113
+ buf.itemsize = < Py_ssize_t> (dt.itemsize)
114
+ buf.nbytes = (< Py_ssize_t> ary_shape[0 ]) * buf.itemsize
115
+
116
+ if isinstance (ary_syclobj, dpctl.SyclQueue):
117
+ buf.queue = < SyclQueue> ary_syclobj
118
+ else :
119
+ # FIXME: need a way to construct a queue from
120
+ buf.queue = get_current_queue()
121
+
122
+ return buf
123
+
124
+
125
+ def _to_memory (unsigned char [::1] b ):
126
+ """ Constructs Memory of the same size as the argument and
127
+ copies data into it"""
128
+ cdef Memory res = MemoryUSMShared(len (b))
129
+ res.copy_from_host(b)
130
+
131
+ return res
132
+
133
+
134
+ cdef class Memory:
135
+ cdef _cinit_empty(self ):
43
136
self .memory_ptr = NULL
44
137
self .nbytes = 0
45
138
self .queue = None
139
+ self .refobj = None
140
+
141
+ cdef _cinit_alloc(self , Py_ssize_t nbytes, bytes ptr_type, SyclQueue queue):
142
+ cdef DPPLSyclUSMRef p
143
+
144
+ self ._cinit_empty()
46
145
47
146
if (nbytes > 0 ):
48
147
if queue is None :
49
- queue = dpctl. get_current_queue()
148
+ queue = get_current_queue()
50
149
51
- if (ptr_type == " shared" ):
150
+ if (ptr_type == b " shared" ):
52
151
p = DPPLmalloc_shared(nbytes, queue.get_queue_ref())
53
- elif (ptr_type == " host" ):
152
+ elif (ptr_type == b " host" ):
54
153
p = DPPLmalloc_host(nbytes, queue.get_queue_ref())
55
- elif (ptr_type == " device" ):
154
+ elif (ptr_type == b " device" ):
56
155
p = DPPLmalloc_device(nbytes, queue.get_queue_ref())
57
156
else :
58
157
raise RuntimeError (" Pointer type is unknown: {}" \
59
- .format(ptr_type))
158
+ .format(ptr_type.decode( " UTF-8 " ) ))
60
159
61
160
if (p):
62
161
self .memory_ptr = p
@@ -67,13 +166,32 @@ cdef class Memory:
67
166
else :
68
167
raise ValueError (" Non-positive number of bytes found." )
69
168
169
+ cdef _cinit_other(self , object other):
170
+ if hasattr (other, ' __sycl_usm_array_interface__' ):
171
+ other_iface = other.__sycl_usm_array_interface__
172
+ if isinstance (other_iface, dict ):
173
+ other_buf = _BufferData.from_sycl_usm_ary_iface(other_iface)
174
+ self .memory_ptr = other_buf.p
175
+ self .nbytes = other_buf.nbytes
176
+ self .queue = other_buf.queue
177
+ # self.writeable = other_buf.writeable
178
+ self .refobj = other
179
+ else :
180
+ raise ValueError (
181
+ " Argument {} does not correctly expose"
182
+ " `__sycl_usm_array_interface__`." .format(other)
183
+ )
184
+ else :
185
+ raise ValueError (
186
+ " Argument {} does not expose "
187
+ " `__sycl_usm_array_interface__`." .format(other)
188
+ )
189
+
70
190
def __dealloc__ (self ):
71
- if (self .memory_ptr):
191
+ if (self .refobj is None and self . memory_ptr):
72
192
DPPLfree_with_queue(self .memory_ptr,
73
193
self .queue.get_queue_ref())
74
- self .memory_ptr = NULL
75
- self .nbytes = 0
76
- self .queue = None
194
+ self ._cinit_empty()
77
195
78
196
cdef _getbuffer(self , Py_buffer * buffer , int flags):
79
197
# memory_ptr is Ref which is pointer to SYCL type. For USM it is void*.
@@ -93,6 +211,10 @@ cdef class Memory:
93
211
def __get__ (self ):
94
212
return self .nbytes
95
213
214
+ property size :
215
+ def __get__ (self ):
216
+ return self .nbytes
217
+
96
218
property _pointer :
97
219
def __get__ (self ):
98
220
return < size_t> (self .memory_ptr)
@@ -105,11 +227,40 @@ cdef class Memory:
105
227
def __get__ (self ):
106
228
return self .queue
107
229
230
+ property reference_obj :
231
+ def __get__ (self ):
232
+ return self .refobj
233
+
108
234
def __repr__ (self ):
109
235
return " <Intel(R) USM allocated memory block of {} bytes at {}>" \
110
236
.format(self .nbytes, hex (< object > (< Py_ssize_t> self .memory_ptr)))
111
237
112
- def _usm_type (self , syclobj = None ):
238
+ def __len__ (self ):
239
+ return self .nbytes
240
+
241
+ def __sizeof__ (self ):
242
+ return self .nbytes
243
+
244
+ def __bytes__ (self ):
245
+ return self .tobytes()
246
+
247
+ def __reduce__ (self ):
248
+ return _to_memory, (self .copy_to_host(), )
249
+
250
+ property __sycl_usm_array_interface__ :
251
+ def __get__ (self ):
252
+ cdef dict iface = {
253
+ " data" : (< Py_ssize_t> (< void * > self .memory_ptr),
254
+ True ), # bool(self.writeable)),
255
+ " shape" : (self .nbytes,),
256
+ " strides" : None ,
257
+ " typestr" : " |u1" ,
258
+ " version" : 1 ,
259
+ " syclobj" : self .queue
260
+ }
261
+ return iface
262
+
263
+ def get_usm_type (self , syclobj = None ):
113
264
cdef const char * kind
114
265
cdef SyclContext ctx
115
266
cdef SyclQueue q
@@ -131,26 +282,120 @@ cdef class Memory:
131
282
" or an instance of SyclConext or SyclQueue" )
132
283
return kind.decode(' UTF-8' )
133
284
285
+ cpdef copy_to_host (self , obj = None ):
286
+ """ Copy content of instance's memory into memory of
287
+ `obj`, or allocate NumPy array of obj is None"""
288
+ # Cython does the right thing here
289
+ cdef unsigned char [::1 ] host_buf = obj
290
+
291
+ if (host_buf is None ):
292
+ # Python object did not have buffer interface
293
+ # allocate new memory
294
+ obj = np.empty((self .nbytes,), dtype = " |u1" )
295
+ host_buf = obj
296
+ elif (< Py_ssize_t> len (host_buf) < self .nbytes):
297
+ raise ValueError (" Destination object is too small to "
298
+ " accommodate {} bytes" .format(self .nbytes))
299
+ # call kernel to copy from
300
+ DPPLQueue_Memcpy(
301
+ self .queue.get_queue_ref(),
302
+ < void * > & host_buf[0 ], # destination
303
+ < void * > self .memory_ptr, # source
304
+ < size_t> self .nbytes
305
+ )
306
+
307
+ return obj
308
+
309
+ cpdef copy_from_host (self , object obj):
310
+ """ Copy contant of Python buffer provided by `obj` to instance memory."""
311
+ cdef const unsigned char [::1 ] host_buf = obj
312
+ cdef Py_ssize_t buf_len = len (host_buf)
313
+
314
+ if (buf_len > self .nbytes):
315
+ raise ValueError (" Source object is too large to be "
316
+ " accommodated in {} bytes buffer" .format(self .nbytes))
317
+ # call kernel to copy from
318
+ DPPLQueue_Memcpy(
319
+ self .queue.get_queue_ref(),
320
+ < void * > self .memory_ptr, # destination
321
+ < void * > & host_buf[0 ], # source
322
+ < size_t> buf_len
323
+ )
324
+
325
+ cpdef copy_from_device (self , object sycl_usm_ary):
326
+ """ Copy SYCL memory underlying the argument object into
327
+ the memory of the instance"""
328
+ cdef _BufferData src_buf
329
+ cdef const char * kind
330
+
331
+ if not hasattr (sycl_usm_ary, ' __sycl_usm_array_interface__' ):
332
+ raise ValueError (" Object does not implement "
333
+ " `__sycl_usm_array_interface__` protocol" )
334
+ sycl_usm_ary_iface = sycl_usm_ary.__sycl_usm_array_interface__
335
+ if isinstance (sycl_usm_ary_iface, dict ):
336
+ src_buf = _BufferData.from_sycl_usm_ary_iface(sycl_usm_ary_iface)
337
+
338
+ if (src_buf.nbytes > self .nbytes):
339
+ raise ValueError (" Source object is too large to "
340
+ " be accommondated in {} bytes buffer" .format(self .nbytes))
341
+ kind = DPPLUSM_GetPointerType(
342
+ src_buf.p, self .queue.get_sycl_context().get_context_ref())
343
+ if (kind == b' unknown' ):
344
+ copy_via_host(
345
+ < void * > self .memory_ptr, self .queue, # dest
346
+ < void * > src_buf.p, src_buf.queue, # src
347
+ < size_t> src_buf.nbytes
348
+ )
349
+ else :
350
+ DPPLQueue_Memcpy(
351
+ self .queue.get_queue_ref(),
352
+ < void * > self .memory_ptr,
353
+ < void * > src_buf.p,
354
+ < size_t> src_buf.nbytes
355
+ )
356
+ else :
357
+ raise TypeError
358
+
359
+ cpdef bytes tobytes (self ):
360
+ """ """
361
+ cdef Py_ssize_t nb = self .nbytes
362
+ cdef bytes b = PyBytes_FromStringAndSize(NULL , nb)
363
+ # convert bytes to memory view
364
+ cdef unsigned char * ptr = < unsigned char * > PyBytes_AS_STRING(b)
365
+ # string is null terminated
366
+ cdef unsigned char [::1 ] mv = (< unsigned char [:(nb + 1 ):1 ]> ptr)[:nb]
367
+ self .copy_to_host(mv) # output is discarded
368
+ return b
369
+
134
370
135
371
cdef class MemoryUSMShared(Memory):
136
372
137
- def __cinit__ (self , Py_ssize_t nbytes , SyclQueue queue = None ):
138
- self ._cinit(nbytes, " shared" , queue)
373
+ def __cinit__ (self , other , SyclQueue queue = None ):
374
+ if isinstance (other, int ):
375
+ self ._cinit_alloc(< Py_ssize_t> other, b" shared" , queue)
376
+ else :
377
+ self ._cinit_other(other)
139
378
140
379
def __getbuffer__ (self , Py_buffer *buffer , int flags ):
141
380
self ._getbuffer(buffer , flags)
142
381
143
382
144
383
cdef class MemoryUSMHost(Memory):
145
384
146
- def __cinit__ (self , Py_ssize_t nbytes , SyclQueue queue = None ):
147
- self ._cinit(nbytes, " host" , queue)
385
+ def __cinit__ (self , other , SyclQueue queue = None ):
386
+ if isinstance (other, int ):
387
+ self ._cinit_alloc(< Py_ssize_t> other, b" host" , queue)
388
+ else :
389
+ self ._cinit_other(other)
148
390
149
391
def __getbuffer__ (self , Py_buffer *buffer , int flags ):
150
392
self ._getbuffer(buffer , flags)
151
393
152
394
153
395
cdef class MemoryUSMDevice(Memory):
154
396
155
- def __cinit__ (self , Py_ssize_t nbytes , SyclQueue queue = None ):
156
- self ._cinit(nbytes, " device" , queue)
397
+ def __cinit__ (self , other , SyclQueue queue = None ):
398
+ if isinstance (other, int ):
399
+ self ._cinit_alloc(< Py_ssize_t> other, b" device" , queue)
400
+ else :
401
+ self ._cinit_other(other)
0 commit comments