Skip to content

Commit 2475a90

Browse files
Added dpctl/tensor/_usmarray submodule
Added Cython extension class dpctl.tensor.usm_ndarray that represents strided layout array over SYCL USM memory chunk, supporting 3 USM types: 'device', 'shared', 'host'. The container implements constructor, certain properties and basic slicing for now. The container allocates memory using dpctl.memory memory buffers specific to USM type.
1 parent 584449d commit 2475a90

File tree

11 files changed

+1162
-14
lines changed

11 files changed

+1162
-14
lines changed

.flake8

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ per-file-ignores =
2222
dpctl/_sycl_queue_manager.pyx: E999, E225
2323
dpctl/memory/_memory.pyx: E999, E225, E226, E227
2424
dpctl/program/_program.pyx: E999, E225, E226, E227
25+
dpctl/tensor/_usmarray.pyx: E999, E225, E226, E227
2526
dpctl/tensor/numpy_usm_shared.py: F821
2627
examples/cython/sycl_buffer/_buffer_example.pyx: E999, E225, E402
2728
examples/cython/sycl_direct_linkage/_buffer_example.pyx: E999, E225, E402

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,3 +96,4 @@ dpctl/_sycl_event.h
9696
dpctl/_sycl_queue.h
9797
dpctl/_sycl_queue_manager.h
9898
dpctl/memory/_memory.h
99+
dpctl/tensor/_usmarray.h

dpctl/tensor/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,9 @@
2727
underlying memory buffer is allocated with a USM shared memory allocator.
2828
2929
"""
30+
31+
from dpctl.tensor._usmarray import usm_ndarray
32+
33+
__all__ = [
34+
"usm_ndarray",
35+
]

dpctl/tensor/_slicing.pxi

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
import numbers
2+
3+
4+
cdef object _basic_slice_meta(object ind, tuple shape,
5+
tuple strides, Py_ssize_t offset):
6+
"""
7+
8+
"""
9+
if ind is Ellipsis:
10+
return (shape, strides, offset)
11+
elif ind is None:
12+
return ((1,) + shape, (0,) + strides, offset)
13+
elif isinstance(ind, slice):
14+
sl_start, sl_stop, sl_step = ind.indices(shape[0])
15+
sh0 = (sl_stop - sl_start) // sl_step
16+
str0 = sl_step * strides[0]
17+
new_strides = strides if (sl_step == 1) else (str0,) + strides[1:]
18+
return (
19+
(sh0, ) + shape[1:],
20+
new_strides,
21+
offset + sl_start * strides[0]
22+
)
23+
elif isinstance(ind, numbers.Integral):
24+
if 0 <= ind < shape[0]:
25+
return (shape[1:], strides[1:], offset + ind * strides[0])
26+
elif -shape[0] <= ind < 0:
27+
return (shape[1:], strides[1:],
28+
offset + (shape[0] + ind) * strides[0])
29+
else:
30+
raise IndexError(
31+
"Index {0} is out of range for axes 0 with "
32+
"size {1}".format(ind, shape[0]))
33+
elif isinstance(ind, list):
34+
raise NotImplemented
35+
elif isinstance(ind, tuple):
36+
axes_referenced = 0
37+
ellipses_count = 0
38+
newaxis_count = 0
39+
explicit_index = 0
40+
for i in ind:
41+
if i is None:
42+
newaxis_count = newaxis_count + 1
43+
elif i is Ellipsis:
44+
ellipses_count = ellipses_count + 1
45+
elif isinstance(i, slice):
46+
axes_referenced = axes_referenced + 1
47+
elif isinstance(i, numbers.Integral):
48+
explicit_index = explicit_index + 1
49+
axes_referenced = axes_referenced + 1
50+
elif isinstance(i, list):
51+
raise NotImplemented
52+
else:
53+
raise TypeError
54+
if ellipses_count > 1:
55+
raise IndexError(
56+
"an index can only have a sinlge ellipsis ('...')")
57+
if axes_referenced > len(shape):
58+
raise IndexError(
59+
"too many indices for an array, array is "
60+
"{0}-dimensional, but {1} were indexed".format(
61+
len(shape), axes_referenced))
62+
if ellipses_count:
63+
ellipses_count = len(shape) - axes_referenced
64+
new_shape_len = (newaxis_count + ellipses_count
65+
+ axes_referenced - explicit_index)
66+
new_shape = list()
67+
new_strides = list()
68+
k = 0
69+
new_offset = offset
70+
for i in range(len(ind)):
71+
ind_i = ind[i]
72+
if (ind_i is Ellipsis):
73+
k_new = k + ellipses_count
74+
new_shape.extend(shape[k:k_new])
75+
new_strides.extend(strides[k:k_new])
76+
k = k_new
77+
elif ind_i is None:
78+
new_shape.append(1)
79+
new_strides.append(0)
80+
elif isinstance(ind_i, slice):
81+
k_new = k + 1
82+
sl_start, sl_stop, sl_step = ind_i.indices(shape[k])
83+
sh_i = (sl_stop - sl_start) // sl_step
84+
str_i = sl_step * strides[k]
85+
new_shape.append(sh_i)
86+
new_strides.append(str_i)
87+
new_offset = new_offset + sl_start * strides[k]
88+
k = k_new
89+
elif isinstance(ind_i, numbers.Integral):
90+
if 0 <= ind_i < shape[k]:
91+
k_new = k + 1
92+
new_offset = new_offset + ind_i * strides[k]
93+
k = k_new
94+
elif -shape[k] <= ind_i < 0:
95+
k_new = k + 1
96+
new_offset = new_offset + (shape[k] + ind_i) * strides[k]
97+
k = k_new
98+
else:
99+
raise IndexError(
100+
"Index {0} is out of range for "
101+
"axes {1} with size {2}".format(ind_i, k, shape[k]))
102+
new_shape.extend(shape[k:])
103+
new_strides.extend(strides[k:])
104+
return (tuple(new_shape), tuple(new_strides), new_offset)
105+
else:
106+
raise TypeError

dpctl/tensor/_stride_utils.pxi

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
# distutils: language = c++
2+
# cython: language_level=3
3+
4+
from cpython.mem cimport PyMem_Malloc
5+
from cpython.ref cimport Py_INCREF
6+
from cpython.tuple cimport PyTuple_New, PyTuple_SetItem
7+
8+
9+
cdef int ERROR_MALLOC = 1
10+
cdef int ERROR_INTERNAL = -1
11+
cdef int ERROR_INCORRECT_ORDER = 2
12+
cdef int ERROR_UNEXPECTED_STRIDES = 3
13+
14+
cdef int USM_ARRAY_C_CONTIGUOUS = 1
15+
cdef int USM_ARRAY_F_CONTIGUOUS = 2
16+
cdef int USM_ARRAY_WRITEABLE = 4
17+
18+
19+
cdef Py_ssize_t shape_to_elem_count(int nd, Py_ssize_t *shape_arr):
20+
"""
21+
Computes number of elements in an array.
22+
"""
23+
cdef Py_ssize_t count = 1
24+
for i in range(nd):
25+
count *= shape_arr[i]
26+
return count
27+
28+
29+
cdef int _from_input_shape_strides(
30+
int nd, object shape, object strides, int itemsize, char order,
31+
Py_ssize_t **shape_ptr, Py_ssize_t **strides_ptr,
32+
Py_ssize_t *nelems, Py_ssize_t *min_disp, Py_ssize_t *max_disp,
33+
int *contig):
34+
"""
35+
Arguments: nd, shape, strides, itemsize, order
36+
Modifies:
37+
shape_ptr - pointer to C array for shape values
38+
stride_ptr - pointer to C array for strides values
39+
nelems - Number of elements in array
40+
min_disp = min( dot(strides, index), index for shape)
41+
max_disp = max( dor(strides, index), index for shape)
42+
contig = enumation for array contiguity
43+
Returns: 0 on success, error code otherwise.
44+
On success pointers point to allocated arrays,
45+
Otherwise they are set to NULL
46+
"""
47+
cdef int i
48+
cdef int all_incr = 1
49+
cdef int all_decr = 1
50+
cdef Py_ssize_t elem_count = 1
51+
cdef Py_ssize_t min_shift = 0
52+
cdef Py_ssize_t max_shift = 0
53+
cdef Py_ssize_t str_i
54+
cdef Py_ssize_t* shape_arr
55+
cdef Py_ssize_t* strides_arr
56+
57+
# 0-d array
58+
if (nd == 0):
59+
contig[0] = USM_ARRAY_C_CONTIGUOUS
60+
nelems[0] = 1
61+
min_disp[0] = 0
62+
max_disp[0] = 0
63+
shape_ptr[0] = <Py_ssize_t *>(<size_t>0)
64+
strides_ptr[0] = <Py_ssize_t *>(<size_t>0)
65+
return 0
66+
67+
shape_arr = <Py_ssize_t*>PyMem_Malloc(nd * sizeof(Py_ssize_t))
68+
if (not shape_arr):
69+
return ERROR_MALLOC
70+
shape_ptr[0] = shape_arr
71+
for i in range(0, nd):
72+
shape_arr[i] = <Py_ssize_t> shape[i]
73+
elem_count *= shape_arr[i]
74+
if elem_count == 0:
75+
contig[0] = USM_ARRAY_C_CONTIGUOUS
76+
nelems[0] = 1
77+
min_disp[0] = 0
78+
max_disp[0] = 0
79+
strides_ptr[0] = <Py_ssize_t *>(<size_t>0)
80+
return 0
81+
nelems[0] = elem_count
82+
83+
if (strides is None):
84+
# no need to allocate and populate strides
85+
if (int(order) not in [ord('C'), ord('F'), ord('c'), ord('f')]):
86+
return ERROR_INCORRECT_ORDER
87+
if order == <char> ord('C') or order == <char> ord('c'):
88+
contig[0] = USM_ARRAY_C_CONTIGUOUS
89+
else:
90+
contig[0] = USM_ARRAY_F_CONTIGUOUS
91+
min_disp[0] = 0
92+
max_disp[0] = (elem_count - 1)
93+
strides_ptr[0] = <Py_ssize_t *>(<size_t>0)
94+
return 0
95+
elif ((isinstance(strides, (list, tuple)) or hasattr(strides, 'tolist'))
96+
and len(strides) == nd):
97+
strides_arr = <Py_ssize_t*>PyMem_Malloc(nd * sizeof(Py_ssize_t))
98+
if (not strides_arr):
99+
return ERROR_MALLOC
100+
strides_ptr[0] = strides_arr
101+
for i in range(0, nd):
102+
str_i = <Py_ssize_t> strides[i]
103+
strides_arr[i] = str_i
104+
if str_i > 0:
105+
max_shift += strides_arr[i] * (shape_arr[i] - 1)
106+
else:
107+
min_shift += strides_arr[i] * (shape_arr[i] - 1)
108+
min_disp[0] = min_shift
109+
max_disp[0] = max_shift
110+
if max_shift == min_shift + (elem_count - 1):
111+
if nd == 1:
112+
contig[0] = USM_ARRAY_C_CONTIGUOUS
113+
return 0
114+
for i in range(0, nd - 1):
115+
if all_incr:
116+
all_incr = strides_arr[i] < strides_arr[i + 1]
117+
if all_decr:
118+
all_decr = strides_arr[i] > strides_arr[i + 1]
119+
if all_incr:
120+
contig[0] = USM_ARRAY_C_CONTIGUOUS
121+
elif all_decr:
122+
contig[0] = USM_ARRAY_F_CONTIGUOUS
123+
else:
124+
contig[0] = 0
125+
return 0
126+
else:
127+
contig[0] = 0 # non-contiguous
128+
return 0
129+
else:
130+
return ERROR_UNEXPECTED_STRIDES
131+
# return ERROR_INTERNAL
132+
133+
134+
cdef object _make_int_tuple(int nd, Py_ssize_t *ary):
135+
"""
136+
Makes Python tuple from C array
137+
"""
138+
cdef tuple res
139+
cdef object tmp
140+
if (ary):
141+
res = PyTuple_New(nd)
142+
for i in range(nd):
143+
tmp = <object>ary[i]
144+
Py_INCREF(tmp) # SetItem steals the reference
145+
PyTuple_SetItem(res, i, tmp)
146+
return res
147+
else:
148+
return None
149+
150+
151+
cdef object _make_reversed_int_tuple(int nd, Py_ssize_t *ary):
152+
"""
153+
Makes Python reversed tuple from C array
154+
"""
155+
cdef tuple res
156+
cdef object tmp
157+
cdef int i
158+
cdef int nd_1
159+
if (ary):
160+
res = PyTuple_New(nd)
161+
nd_1 = nd - 1
162+
for i in range(nd):
163+
tmp = <object>ary[i]
164+
Py_INCREF(tmp) # SetItem steals the reference
165+
PyTuple_SetItem(res, nd_1 - i, tmp)
166+
return res
167+
else:
168+
return None
169+
170+
171+
cdef object _c_contig_strides(int nd, Py_ssize_t *shape):
172+
"""
173+
Makes Python tuple for C-contiguous array
174+
"""
175+
cdef tuple cc_strides = PyTuple_New(nd)
176+
cdef object si = 1
177+
cdef int i
178+
cdef int nd_1 = nd - 1
179+
for i in range(0, nd):
180+
Py_INCREF(si) # SetItem steals the reference
181+
PyTuple_SetItem(cc_strides, nd_1 - i, si)
182+
si = si * shape[nd_1 - i]
183+
return cc_strides
184+
185+
186+
cdef object _f_contig_strides(int nd, Py_ssize_t *shape):
187+
"""
188+
Makes Python t
189+
"""
190+
cdef tuple fc_strides = PyTuple_New(nd)
191+
cdef object si = 1
192+
for i in range(0, nd):
193+
Py_INCREF(si) # SetItem steals the reference
194+
PyTuple_SetItem(fc_strides, i, si)
195+
si = si * shape[i]
196+
return fc_strides

0 commit comments

Comments
 (0)