Skip to content

Commit 00ce83c

Browse files
Merge remote-tracking branch 'origin' into add_targets_args
2 parents 4e250d6 + 0dbd495 commit 00ce83c

File tree

14 files changed

+175
-324
lines changed

14 files changed

+175
-324
lines changed

.github/workflows/generate-coverage.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,7 @@ jobs:
9191
- name: Install dpctl dependencies
9292
shell: bash -l {0}
9393
run: |
94-
# TODO: unpin numpy when numpy#29167 resolved
95-
pip install numpy"<2.3.0" cython setuptools"<80" pytest pytest-cov scikit-build cmake coverage[toml] versioneer[toml]==0.29
94+
pip install numpy cython setuptools"<80" pytest pytest-cov scikit-build cmake coverage[toml] versioneer[toml]==0.29
9695
9796
- name: Build dpctl with coverage
9897
shell: bash -l {0}

.github/workflows/openssf-scorecard.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,6 @@ jobs:
6969

7070
# Upload the results to GitHub's code scanning dashboard.
7171
- name: "Upload to code-scanning"
72-
uses: github/codeql-action/upload-sarif@ce28f5bb42b7a9f2c824e633a3f6ee835bab6858 # v3.29.0
72+
uses: github/codeql-action/upload-sarif@d6bbdef45e766d081b84a2def353b0055f728d3e # v3.29.3
7373
with:
7474
sarif_file: results.sarif

.github/workflows/os-llvm-sycl-build.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,7 @@ jobs:
107107
- name: Install dpctl dependencies
108108
shell: bash -l {0}
109109
run: |
110-
# TODO: unpin numpy when numpy#29167 resolved
111-
pip install numpy"<2.3.0" cython setuptools"<80" pytest scikit-build cmake ninja versioneer[toml]==0.29
110+
pip install numpy cython setuptools"<80" pytest scikit-build cmake ninja versioneer[toml]==0.29
112111
113112
- name: Checkout repo
114113
uses: actions/[email protected]

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1414

1515
### Maintenance
1616

17+
## [0.20.2] - Jun. 26, 2025
18+
19+
### Maintenance
20+
21+
* Add Python 3.13 to package metadata [gh-2110](https://github.com/IntelPython/dpctl/pull/2110)
22+
* When building dpctl conda package for Python 3.13, restrict Cython to below 3.1.0, as this version and higher may cause crashes [gh-2112](https://github.com/IntelPython/dpctl/pull/2112)
23+
1724
## [0.20.1] - Jun. 06, 2025
1825

1926
### Fixed

conda-recipe/meta.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,12 @@ requirements:
4141
- {{ dep }}
4242
{% elif dep.startswith('build>=') %}
4343
- {{ 'python-' ~ dep }}
44+
{% elif dep.startswith('cython') %}
45+
{% if dep.split(';')[1] == "python_version<'3.13'" %}
46+
- {{ dep.split(';')[0] }} # [py<313]
47+
{% else %}
48+
- {{ dep.split(';')[0] }} # [py>=313]
49+
{% endif %}
4450
{% else %}
4551
- {{ dep|replace('_','-') }}
4652
{% endif %}

dpctl/tensor/_copy_utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@
4141
def _copy_to_numpy(ary):
4242
if not isinstance(ary, dpt.usm_ndarray):
4343
raise TypeError(f"Expected dpctl.tensor.usm_ndarray, got {type(ary)}")
44+
if ary.size == 0:
45+
# no data needs to be copied for zero sized array
46+
return np.ndarray(ary.shape, dtype=ary.dtype)
4447
nb = ary.usm_data.nbytes
4548
q = ary.sycl_queue
4649
hh = dpm.MemoryUSMHost(nb, queue=q)
@@ -739,6 +742,9 @@ def astype(
739742
order=copy_order,
740743
buffer_ctor_kwargs={"queue": usm_ary.sycl_queue},
741744
)
745+
# see #2121
746+
if ary_dtype == dpt.bool:
747+
usm_ary = dpt.not_equal(usm_ary, 0, order=copy_order)
742748
_copy_from_usm_ndarray_to_usm_ndarray(R, usm_ary)
743749
return R
744750

dpctl/tensor/_usmarray.pyx

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -382,8 +382,9 @@ cdef class usm_ndarray:
382382
else:
383383
self._cleanup()
384384
raise ValueError("buffer='{}' was not understood.".format(buffer))
385-
if (_offset + ary_min_displacement < 0 or
386-
(_offset + ary_max_displacement + 1) * itemsize > _buffer.nbytes):
385+
if (shape_to_elem_count(nd, shape_ptr) > 0 and
386+
(_offset + ary_min_displacement < 0 or
387+
(_offset + ary_max_displacement + 1) * itemsize > _buffer.nbytes)):
387388
self._cleanup()
388389
raise ValueError(("buffer='{}' can not accommodate "
389390
"the requested array.").format(buffer))
@@ -1324,8 +1325,8 @@ cdef class usm_ndarray:
13241325
allocated, or the non-partitioned parent device of the allocation
13251326
device.
13261327
1327-
See ``DLDeviceType`` for a list of devices supported by the DLPack
1328-
protocol.
1328+
See :class:`dpctl.tensor.DLDeviceType` for a list of devices supported
1329+
by the DLPack protocol.
13291330
13301331
Raises:
13311332
DLPackCreationError:

dpctl/tensor/libtensor/source/sorting/py_argsort_common.hpp

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -126,22 +126,43 @@ py_argsort(const dpctl::tensor::usm_ndarray &src,
126126
bool is_dst_c_contig = dst.is_c_contiguous();
127127

128128
if (is_src_c_contig && is_dst_c_contig) {
129-
static constexpr py::ssize_t zero_offset = py::ssize_t(0);
129+
if (sort_nelems > 1) {
130+
static constexpr py::ssize_t zero_offset = py::ssize_t(0);
130131

131-
auto fn = sort_contig_fns[src_typeid][dst_typeid];
132+
auto fn = sort_contig_fns[src_typeid][dst_typeid];
132133

133-
if (fn == nullptr) {
134-
throw py::value_error("Not implemented for dtypes of input arrays");
134+
if (fn == nullptr) {
135+
throw py::value_error(
136+
"Not implemented for dtypes of input arrays");
137+
}
138+
139+
sycl::event comp_ev =
140+
fn(exec_q, iter_nelems, sort_nelems, src.get_data(),
141+
dst.get_data(), zero_offset, zero_offset, zero_offset,
142+
zero_offset, depends);
143+
144+
sycl::event keep_args_alive_ev =
145+
dpctl::utils::keep_args_alive(exec_q, {src, dst}, {comp_ev});
146+
147+
return std::make_pair(keep_args_alive_ev, comp_ev);
135148
}
149+
else {
150+
assert(dst.get_size() == iter_nelems);
151+
int dst_elemsize = dst.get_elemsize();
152+
static constexpr int memset_val(0);
136153

137-
sycl::event comp_ev =
138-
fn(exec_q, iter_nelems, sort_nelems, src.get_data(), dst.get_data(),
139-
zero_offset, zero_offset, zero_offset, zero_offset, depends);
154+
sycl::event fill_ev = exec_q.submit([&](sycl::handler &cgh) {
155+
cgh.depends_on(depends);
140156

141-
sycl::event keep_args_alive_ev =
142-
dpctl::utils::keep_args_alive(exec_q, {src, dst}, {comp_ev});
157+
cgh.memset(reinterpret_cast<void *>(dst.get_data()), memset_val,
158+
iter_nelems * dst_elemsize);
159+
});
143160

144-
return std::make_pair(keep_args_alive_ev, comp_ev);
161+
sycl::event keep_args_alive_ev =
162+
dpctl::utils::keep_args_alive(exec_q, {src, dst}, {fill_ev});
163+
164+
return std::make_pair(keep_args_alive_ev, fill_ev);
165+
}
145166
}
146167

147168
throw py::value_error(

dpctl/tensor/libtensor/source/sorting/py_sort_common.hpp

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -127,23 +127,38 @@ py_sort(const dpctl::tensor::usm_ndarray &src,
127127
bool is_dst_c_contig = dst.is_c_contiguous();
128128

129129
if (is_src_c_contig && is_dst_c_contig) {
130-
static constexpr py::ssize_t zero_offset = py::ssize_t(0);
130+
if (sort_nelems > 1) {
131+
static constexpr py::ssize_t zero_offset = py::ssize_t(0);
131132

132-
auto fn = sort_contig_fns[src_typeid];
133+
auto fn = sort_contig_fns[src_typeid];
133134

134-
if (nullptr == fn) {
135-
throw py::value_error(
136-
"Not implemented for the dtype of input arrays");
137-
}
135+
if (nullptr == fn) {
136+
throw py::value_error(
137+
"Not implemented for the dtype of input arrays");
138+
}
139+
140+
sycl::event comp_ev =
141+
fn(exec_q, iter_nelems, sort_nelems, src.get_data(),
142+
dst.get_data(), zero_offset, zero_offset, zero_offset,
143+
zero_offset, depends);
138144

139-
sycl::event comp_ev =
140-
fn(exec_q, iter_nelems, sort_nelems, src.get_data(), dst.get_data(),
141-
zero_offset, zero_offset, zero_offset, zero_offset, depends);
145+
sycl::event keep_args_alive_ev =
146+
dpctl::utils::keep_args_alive(exec_q, {src, dst}, {comp_ev});
142147

143-
sycl::event keep_args_alive_ev =
144-
dpctl::utils::keep_args_alive(exec_q, {src, dst}, {comp_ev});
148+
return std::make_pair(keep_args_alive_ev, comp_ev);
149+
}
150+
else {
151+
assert(dst.get_size() == iter_nelems);
152+
int src_elemsize = src.get_elemsize();
145153

146-
return std::make_pair(keep_args_alive_ev, comp_ev);
154+
sycl::event copy_ev =
155+
exec_q.copy<char>(src.get_data(), dst.get_data(),
156+
src_elemsize * iter_nelems, depends);
157+
158+
return std::make_pair(
159+
dpctl::utils::keep_args_alive(exec_q, {src, dst}, {copy_ev}),
160+
copy_ev);
161+
}
147162
}
148163

149164
throw py::value_error(

dpctl/tests/elementwise/test_hyperbolic.py

Lines changed: 2 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,6 @@
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
1616

17-
import itertools
18-
import os
19-
import re
20-
2117
import numpy as np
2218
import pytest
2319
from numpy.testing import assert_allclose
@@ -34,7 +30,6 @@
3430
(np.arctanh, dpt.atanh),
3531
]
3632
_all_funcs = _hyper_funcs + _inv_hyper_funcs
37-
_dpt_funcs = [t[1] for t in _all_funcs]
3833

3934

4035
@pytest.mark.parametrize("np_call, dpt_call", _all_funcs)
@@ -45,17 +40,10 @@ def test_hyper_out_type(np_call, dpt_call, dtype):
4540

4641
a = 1 if np_call == np.arccosh else 0
4742

48-
X = dpt.asarray(a, dtype=dtype, sycl_queue=q)
49-
expected_dtype = np_call(np.array(a, dtype=dtype)).dtype
50-
expected_dtype = _map_to_device_dtype(expected_dtype, q.sycl_device)
51-
assert dpt_call(X).dtype == expected_dtype
52-
53-
X = dpt.asarray(a, dtype=dtype, sycl_queue=q)
43+
x = dpt.asarray(a, dtype=dtype, sycl_queue=q)
5444
expected_dtype = np_call(np.array(a, dtype=dtype)).dtype
5545
expected_dtype = _map_to_device_dtype(expected_dtype, q.sycl_device)
56-
Y = dpt.empty_like(X, dtype=expected_dtype)
57-
dpt_call(X, out=Y)
58-
assert_allclose(dpt.asnumpy(dpt_call(X)), dpt.asnumpy(Y))
46+
assert dpt_call(x).dtype == expected_dtype
5947

6048

6149
@pytest.mark.parametrize("np_call, dpt_call", _all_funcs)
@@ -119,79 +107,6 @@ def test_hyper_complex_contig(np_call, dpt_call, dtype):
119107
)
120108

121109

122-
@pytest.mark.parametrize("np_call, dpt_call", _all_funcs)
123-
@pytest.mark.parametrize("usm_type", ["device", "shared", "host"])
124-
def test_hyper_usm_type(np_call, dpt_call, usm_type):
125-
q = get_queue_or_skip()
126-
127-
arg_dt = np.dtype("f4")
128-
input_shape = (10, 10, 10, 10)
129-
X = dpt.empty(input_shape, dtype=arg_dt, usm_type=usm_type, sycl_queue=q)
130-
if np_call == np.arctanh:
131-
X[..., 0::2] = -0.4
132-
X[..., 1::2] = 0.3
133-
elif np_call == np.arccosh:
134-
X[..., 0::2] = 2.2
135-
X[..., 1::2] = 5.5
136-
else:
137-
X[..., 0::2] = -4.4
138-
X[..., 1::2] = 5.5
139-
140-
Y = dpt_call(X)
141-
assert Y.usm_type == X.usm_type
142-
assert Y.sycl_queue == X.sycl_queue
143-
assert Y.flags.c_contiguous
144-
145-
expected_Y = np_call(dpt.asnumpy(X))
146-
tol = 8 * dpt.finfo(Y.dtype).resolution
147-
assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol)
148-
149-
150-
@pytest.mark.parametrize("np_call, dpt_call", _all_funcs)
151-
@pytest.mark.parametrize("dtype", _all_dtypes)
152-
def test_hyper_order(np_call, dpt_call, dtype):
153-
q = get_queue_or_skip()
154-
skip_if_dtype_not_supported(dtype, q)
155-
156-
arg_dt = np.dtype(dtype)
157-
input_shape = (4, 4, 4, 4)
158-
X = dpt.empty(input_shape, dtype=arg_dt, sycl_queue=q)
159-
if np_call == np.arctanh:
160-
X[..., 0::2] = -0.4
161-
X[..., 1::2] = 0.3
162-
elif np_call == np.arccosh:
163-
X[..., 0::2] = 2.2
164-
X[..., 1::2] = 5.5
165-
else:
166-
X[..., 0::2] = -4.4
167-
X[..., 1::2] = 5.5
168-
169-
for perms in itertools.permutations(range(4)):
170-
U = dpt.permute_dims(X[:, ::-1, ::-1, :], perms)
171-
with np.errstate(all="ignore"):
172-
expected_Y = np_call(dpt.asnumpy(U))
173-
for ord in ["C", "F", "A", "K"]:
174-
Y = dpt_call(U, order=ord)
175-
tol = 8 * max(
176-
dpt.finfo(Y.dtype).resolution,
177-
np.finfo(expected_Y.dtype).resolution,
178-
)
179-
assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol)
180-
181-
182-
@pytest.mark.parametrize("callable", _dpt_funcs)
183-
@pytest.mark.parametrize("dtype", _all_dtypes)
184-
def test_hyper_error_dtype(callable, dtype):
185-
q = get_queue_or_skip()
186-
skip_if_dtype_not_supported(dtype, q)
187-
188-
x = dpt.ones(5, dtype=dtype)
189-
y = dpt.empty_like(x, dtype="int16")
190-
with pytest.raises(ValueError) as excinfo:
191-
callable(x, out=y)
192-
assert re.match("Output array of type.*is needed", str(excinfo.value))
193-
194-
195110
@pytest.mark.parametrize("np_call, dpt_call", _all_funcs)
196111
@pytest.mark.parametrize("dtype", ["f2", "f4", "f8"])
197112
def test_hyper_real_strided(np_call, dpt_call, dtype):
@@ -270,46 +185,3 @@ def test_hyper_real_special_cases(np_call, dpt_call, dtype):
270185

271186
tol = 8 * dpt.finfo(dtype).resolution
272187
assert_allclose(dpt.asnumpy(dpt_call(yf)), Y_np, atol=tol, rtol=tol)
273-
274-
275-
@pytest.mark.parametrize("np_call, dpt_call", _all_funcs)
276-
@pytest.mark.parametrize("dtype", ["c8", "c16"])
277-
def test_hyper_complex_special_cases_conj_property(np_call, dpt_call, dtype):
278-
q = get_queue_or_skip()
279-
skip_if_dtype_not_supported(dtype, q)
280-
281-
x = [np.nan, np.inf, -np.inf, +0.0, -0.0, +1.0, -1.0]
282-
xc = [complex(*val) for val in itertools.product(x, repeat=2)]
283-
284-
Xc_np = np.array(xc, dtype=dtype)
285-
Xc = dpt.asarray(Xc_np, dtype=dtype, sycl_queue=q)
286-
287-
tol = 50 * dpt.finfo(dtype).resolution
288-
Y = dpt_call(Xc)
289-
Yc = dpt_call(dpt.conj(Xc))
290-
291-
dpt.allclose(Y, dpt.conj(Yc), atol=tol, rtol=tol)
292-
293-
294-
@pytest.mark.skipif(
295-
os.name != "posix", reason="Known to fail on Windows due to bug in NumPy"
296-
)
297-
@pytest.mark.parametrize("np_call, dpt_call", _all_funcs)
298-
@pytest.mark.parametrize("dtype", ["c8", "c16"])
299-
def test_hyper_complex_special_cases(np_call, dpt_call, dtype):
300-
q = get_queue_or_skip()
301-
skip_if_dtype_not_supported(dtype, q)
302-
303-
x = [np.nan, np.inf, -np.inf, +0.0, -0.0, +1.0, -1.0]
304-
xc = [complex(*val) for val in itertools.product(x, repeat=2)]
305-
306-
Xc_np = np.array(xc, dtype=dtype)
307-
Xc = dpt.asarray(Xc_np, dtype=dtype, sycl_queue=q)
308-
309-
with np.errstate(all="ignore"):
310-
Ynp = np_call(Xc_np)
311-
312-
tol = 50 * dpt.finfo(dtype).resolution
313-
Y = dpt_call(Xc)
314-
assert_allclose(dpt.asnumpy(dpt.real(Y)), np.real(Ynp), atol=tol, rtol=tol)
315-
assert_allclose(dpt.asnumpy(dpt.imag(Y)), np.imag(Ynp), atol=tol, rtol=tol)

0 commit comments

Comments
 (0)