Skip to content

Commit 3fe2570

Browse files
Fixed issues in slicing and array construction
These were discovered while preparing for customer presentation
1 parent e68c16e commit 3fe2570

File tree

4 files changed

+123
-10
lines changed

4 files changed

+123
-10
lines changed

dpctl/tensor/_slicing.pxi

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,23 @@
1717
import numbers
1818

1919

20+
cdef Py_ssize_t _slice_len(
21+
Py_ssize_t sl_start,
22+
Py_ssize_t sl_stop,
23+
Py_ssize_t sl_step
24+
):
25+
"""
26+
Compute len(range(sl_start, sl_stop, sl_step))
27+
"""
28+
if sl_start == sl_stop:
29+
return 0
30+
if sl_step > 0:
31+
# 1 + argmax k such htat sl_start + sl_step*k < sl_stop
32+
return 1 + ((sl_stop - sl_start - 1) // sl_step)
33+
else:
34+
return 1 + ((sl_stop - sl_start + 1) // sl_step)
35+
36+
2037
cdef object _basic_slice_meta(object ind, tuple shape,
2138
tuple strides, Py_ssize_t offset):
2239
"""
@@ -33,9 +50,9 @@ cdef object _basic_slice_meta(object ind, tuple shape,
3350
return ((1,) + shape, (0,) + strides, offset)
3451
elif isinstance(ind, slice):
3552
sl_start, sl_stop, sl_step = ind.indices(shape[0])
36-
sh0 = (sl_stop - sl_start) // sl_step
53+
sh0 = _slice_len(sl_start, sl_stop, sl_step)
3754
str0 = sl_step * strides[0]
38-
new_strides = strides if (sl_step == 1) else (str0,) + strides[1:]
55+
new_strides = strides if (sl_step == 1 or sh0 == 0) else (str0,) + strides[1:]
3956
return (
4057
(sh0, ) + shape[1:],
4158
new_strides,
@@ -101,8 +118,8 @@ cdef object _basic_slice_meta(object ind, tuple shape,
101118
elif isinstance(ind_i, slice):
102119
k_new = k + 1
103120
sl_start, sl_stop, sl_step = ind_i.indices(shape[k])
104-
sh_i = (sl_stop - sl_start) // sl_step
105-
str_i = sl_step * strides[k]
121+
sh_i = _slice_len(sl_start, sl_stop, sl_step)
122+
str_i = (1 if sh_i == 0 else sl_step) * strides[k]
106123
new_shape.append(sh_i)
107124
new_strides.append(str_i)
108125
new_offset = new_offset + sl_start * strides[k]

dpctl/tensor/_stride_utils.pxi

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ cdef int _from_input_shape_strides(
7272

7373
# 0-d array
7474
if (nd == 0):
75-
contig[0] = USM_ARRAY_C_CONTIGUOUS
75+
contig[0] = (USM_ARRAY_C_CONTIGUOUS | USM_ARRAY_F_CONTIGUOUS)
7676
nelems[0] = 1
7777
min_disp[0] = 0
7878
max_disp[0] = 0
@@ -88,17 +88,28 @@ cdef int _from_input_shape_strides(
8888
shape_arr[i] = <Py_ssize_t> shape[i]
8989
elem_count *= shape_arr[i]
9090
if elem_count == 0:
91-
contig[0] = USM_ARRAY_C_CONTIGUOUS
91+
contig[0] = (USM_ARRAY_C_CONTIGUOUS | USM_ARRAY_F_CONTIGUOUS)
9292
nelems[0] = 1
9393
min_disp[0] = 0
9494
max_disp[0] = 0
95-
strides_ptr[0] = <Py_ssize_t *>(<size_t>0)
95+
if strides is None:
96+
strides_ptr[0] = <Py_ssize_t *>(<size_t>0)
97+
else:
98+
strides_arr = <Py_ssize_t*>PyMem_Malloc(nd * sizeof(Py_ssize_t))
99+
if (not strides_arr):
100+
PyMem_Free(shape_ptr[0]);
101+
shape_ptr[0] = <Py_ssize_t *>(<size_t>0)
102+
return ERROR_MALLOC
103+
strides_ptr[0] = strides_arr
104+
for i in range(0, nd):
105+
strides_arr[i] = <Py_ssize_t> strides[i]
96106
return 0
97107
nelems[0] = elem_count
98-
99108
if (strides is None):
100109
# no need to allocate and populate strides
101110
if (int(order) not in [ord('C'), ord('F'), ord('c'), ord('f')]):
111+
PyMem_Free(shape_ptr[0]);
112+
shape_ptr[0] = <Py_ssize_t *>(<size_t>0)
102113
return ERROR_INCORRECT_ORDER
103114
if order == <char> ord('C') or order == <char> ord('c'):
104115
contig[0] = USM_ARRAY_C_CONTIGUOUS
@@ -112,6 +123,8 @@ cdef int _from_input_shape_strides(
112123
and len(strides) == nd):
113124
strides_arr = <Py_ssize_t*>PyMem_Malloc(nd * sizeof(Py_ssize_t))
114125
if (not strides_arr):
126+
PyMem_Free(shape_ptr[0]);
127+
shape_ptr[0] = <Py_ssize_t *>(<size_t>0)
115128
return ERROR_MALLOC
116129
strides_ptr[0] = strides_arr
117130
for i in range(0, nd):
@@ -143,6 +156,8 @@ cdef int _from_input_shape_strides(
143156
contig[0] = 0 # non-contiguous
144157
return 0
145158
else:
159+
PyMem_Free(shape_ptr[0]);
160+
shape_ptr[0] = <Py_ssize_t *>(<size_t>0)
146161
return ERROR_UNEXPECTED_STRIDES
147162
# return ERROR_INTERNAL
148163

dpctl/tensor/_usmarray.pyx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,8 @@ cdef class usm_ndarray:
475475
cdef usm_ndarray res
476476

477477
res = usm_ndarray.__new__(
478-
usm_ndarray, _meta[0],
478+
usm_ndarray,
479+
_meta[0],
479480
dtype=_make_typestr(self.typenum_),
480481
strides=_meta[1],
481482
buffer=self.base_,

dpctl/tests/test_usm_ndarray_ctor.py

Lines changed: 81 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,14 @@
1717
import numbers
1818

1919
import numpy as np
20+
import numpy.lib.stride_tricks as np_st
2021
import pytest
2122

2223
import dpctl
2324

2425
# import dpctl.memory as dpmem
2526
import dpctl.tensor as dpt
27+
from dpctl.tensor._usmarray import Device
2628

2729

2830
@pytest.mark.parametrize(
@@ -112,6 +114,8 @@ def test_properties():
112114
(2, 2, None, 3, 4),
113115
(Ellipsis,),
114116
(None, slice(0, None, 2), Ellipsis, slice(0, None, 3)),
117+
(None, slice(1, None, 2), Ellipsis, slice(1, None, 3)),
118+
(None, slice(None, -1, -2), Ellipsis, slice(2, None, 3)),
115119
(
116120
slice(None, None, -1),
117121
slice(None, None, -1),
@@ -121,10 +125,86 @@ def test_properties():
121125
],
122126
)
123127
def test_basic_slice(ind):
124-
X = dpt.usm_ndarray((2 * 3, 2 * 4, 3 * 5, 3 * 7), dtype="u1")
128+
X = dpt.usm_ndarray((2 * 3, 2 * 4, 3 * 5, 2 * 7), dtype="u1")
125129
Xnp = np.empty(X.shape, dtype=X.dtype)
126130
S = X[ind]
127131
Snp = Xnp[ind]
128132
assert S.shape == Snp.shape
129133
assert S.strides == Snp.strides
130134
assert S.dtype == X.dtype
135+
136+
137+
def _from_numpy(np_ary, device=None, usm_type="shared"):
138+
if type(np_ary) is np.ndarray:
139+
if np_ary.flags["FORC"]:
140+
x = np_ary
141+
else:
142+
x = np.ascontiguous(np_ary)
143+
R = dpt.usm_ndarray(
144+
np_ary.shape,
145+
dtype=np_ary.dtype,
146+
buffer=usm_type,
147+
buffer_ctor_kwargs={
148+
"queue": Device.create_device(device).sycl_queue
149+
},
150+
)
151+
R.usm_data.copy_from_host(x.reshape((-1)).view("|u1"))
152+
return R
153+
else:
154+
raise ValueError("Expected numpy.ndarray, got {}".format(type(np_ary)))
155+
156+
157+
def _to_numpy(usm_ary):
158+
if type(usm_ary) is dpt.usm_ndarray:
159+
usm_buf = usm_ary.usm_data
160+
s = usm_buf.nbytes
161+
host_buf = usm_buf.copy_to_host().view(usm_ary.dtype)
162+
usm_ary_itemsize = usm_ary.itemsize
163+
R_offset = (
164+
usm_ary.__sycl_usm_array_interface__["offset"] * usm_ary_itemsize
165+
)
166+
R = np.ndarray((s,), dtype="u1", buffer=host_buf)
167+
R = R[R_offset:].view(usm_ary.dtype)
168+
R_strides = (usm_ary_itemsize * si for si in usm_ary.strides)
169+
return np_st.as_strided(R, shape=usm_ary.shape, strides=R_strides)
170+
else:
171+
raise ValueError(
172+
"Expected dpctl.tensor.usm_ndarray, got {}".format(type(usm_ary))
173+
)
174+
175+
176+
def test_slice_constructor_1d():
177+
Xh = np.arange(37, dtype="i4")
178+
Xusm = _from_numpy(Xh, device="gpu", usm_type="device")
179+
for ind in [
180+
slice(1, None, 2),
181+
slice(0, None, 3),
182+
slice(1, None, 3),
183+
slice(2, None, 3),
184+
slice(None, None, -1),
185+
slice(-2, 2, -2),
186+
slice(-1, 1, -2),
187+
slice(None, None, -13),
188+
]:
189+
assert np.array_equal(
190+
_to_numpy(Xusm[ind]), Xh[ind]
191+
), "Failed for {}".format(ind)
192+
193+
194+
def test_slice_constructor_3d():
195+
Xh = np.empty((37, 24, 35), dtype="i4")
196+
Xusm = _from_numpy(Xh, device="gpu", usm_type="device")
197+
for ind in [
198+
slice(1, None, 2),
199+
slice(0, None, 3),
200+
slice(1, None, 3),
201+
slice(2, None, 3),
202+
slice(None, None, -1),
203+
slice(-2, 2, -2),
204+
slice(-1, 1, -2),
205+
slice(None, None, -13),
206+
(slice(None, None, -2), Ellipsis, None, 15),
207+
]:
208+
assert np.array_equal(
209+
_to_numpy(Xusm[ind]), Xh[ind]
210+
), "Failed for {}".format(ind)

0 commit comments

Comments
 (0)