Skip to content

Commit ffb23cd

Browse files
authored
Merge pull request numpy#26147 from ngoldbaum/fix-stringdtype-views
BUG: introduce PyArray_SafeCast to fix issues around stringdtype views
2 parents 9704767 + 6373b85 commit ffb23cd

File tree

8 files changed

+87
-31
lines changed

8 files changed

+87
-31
lines changed

numpy/_core/_internal.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,7 @@ def _view_is_safe(oldtype, newtype):
560560
return
561561

562562
if newtype.hasobject or oldtype.hasobject:
563-
raise TypeError("Cannot change data-type for object array.")
563+
raise TypeError("Cannot change data-type for array of references.")
564564
return
565565

566566

numpy/_core/src/multiarray/convert_datatype.c

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -465,14 +465,17 @@ _get_cast_safety_from_castingimpl(PyArrayMethodObject *castingimpl,
465465
/*
466466
* Check for less harmful non-standard returns. The following two returns
467467
* should never happen:
468-
* 1. No-casting must imply a view offset of 0.
468+
* 1. No-casting must imply a view offset of 0 unless the DType
469+
defines a finalization function, which implies it stores data
470+
on the descriptor
469471
* 2. Equivalent-casting + 0 view offset is (usually) the definition
470472
* of a "no" cast. However, changing the order of fields can also
471473
* create descriptors that are not equivalent but views.
472474
* Note that unsafe casts can have a view offset. For example, in
473475
* principle, casting `<i8` to `<i4` is a cast with 0 offset.
474476
*/
475-
if (*view_offset != 0) {
477+
if ((*view_offset != 0 &&
478+
NPY_DT_SLOTS(NPY_DTYPE(from))->finalize_descr == NULL)) {
476479
assert(casting != NPY_NO_CASTING);
477480
}
478481
else {
@@ -648,6 +651,35 @@ PyArray_CanCastTo(PyArray_Descr *from, PyArray_Descr *to)
648651
}
649652

650653

654+
/*
655+
* This function returns true if the two types can be safely cast at
656+
* *minimum_safety* casting level. Sets the *view_offset* if that is set
657+
* for the cast. If ignore_error is set, the error indicator is cleared
658+
* if there are any errors in cast setup and returns false, otherwise
659+
* the error indicator is left set and returns -1.
660+
*/
661+
NPY_NO_EXPORT npy_intp
662+
PyArray_SafeCast(PyArray_Descr *type1, PyArray_Descr *type2,
663+
npy_intp* view_offset, NPY_CASTING minimum_safety,
664+
npy_intp ignore_error)
665+
{
666+
if (type1 == type2) {
667+
*view_offset = 0;
668+
return 1;
669+
}
670+
671+
NPY_CASTING safety = PyArray_GetCastInfo(type1, type2, NULL, view_offset);
672+
if (safety < 0) {
673+
if (ignore_error) {
674+
PyErr_Clear();
675+
return 0;
676+
}
677+
return -1;
678+
}
679+
return PyArray_MinCastSafety(safety, minimum_safety) == minimum_safety;
680+
}
681+
682+
651683
/* Provides an ordering for the dtype 'kind' character codes */
652684
NPY_NO_EXPORT int
653685
dtype_kind_to_ordering(char kind)

numpy/_core/src/multiarray/convert_datatype.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,11 @@ PyArray_GetCastInfo(
102102
PyArray_Descr *from, PyArray_Descr *to, PyArray_DTypeMeta *to_dtype,
103103
npy_intp *view_offset);
104104

105+
NPY_NO_EXPORT npy_intp
106+
PyArray_SafeCast(PyArray_Descr *type1, PyArray_Descr *type2,
107+
npy_intp* view_offset, NPY_CASTING minimum_safety,
108+
npy_intp ignore_errors);
109+
105110
NPY_NO_EXPORT int
106111
PyArray_CheckCastSafety(NPY_CASTING casting,
107112
PyArray_Descr *from, PyArray_Descr *to, PyArray_DTypeMeta *to_dtype);

numpy/_core/src/multiarray/ctors.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1913,8 +1913,10 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags)
19131913
}
19141914

19151915
arrflags = PyArray_FLAGS(arr);
1916-
/* If a guaranteed copy was requested */
1917-
copy = (flags & NPY_ARRAY_ENSURECOPY) ||
1916+
1917+
1918+
copy = /* If a guaranteed copy was requested */
1919+
(flags & NPY_ARRAY_ENSURECOPY) ||
19181920
/* If C contiguous was requested, and arr is not */
19191921
((flags & NPY_ARRAY_C_CONTIGUOUS) &&
19201922
(!(arrflags & NPY_ARRAY_C_CONTIGUOUS))) ||
@@ -1926,8 +1928,13 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags)
19261928
(!(arrflags & NPY_ARRAY_F_CONTIGUOUS))) ||
19271929
/* If a writeable array was requested, and arr is not */
19281930
((flags & NPY_ARRAY_WRITEABLE) &&
1929-
(!(arrflags & NPY_ARRAY_WRITEABLE))) ||
1930-
!PyArray_EquivTypes(oldtype, newtype);
1931+
(!(arrflags & NPY_ARRAY_WRITEABLE)));
1932+
1933+
if (!copy) {
1934+
npy_intp view_offset;
1935+
npy_intp is_safe = PyArray_SafeCast(oldtype, newtype, &view_offset, NPY_NO_CASTING, 1);
1936+
copy = !(is_safe && (view_offset != NPY_MIN_INTP));
1937+
}
19311938

19321939
if (copy) {
19331940
if (flags & NPY_ARRAY_ENSURENOCOPY ) {

numpy/_core/src/multiarray/multiarraymodule.c

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1449,23 +1449,6 @@ PyArray_EquivTypes(PyArray_Descr *type1, PyArray_Descr *type2)
14491449
return 1;
14501450
}
14511451

1452-
if (Py_TYPE(Py_TYPE(type1)) == &PyType_Type) {
1453-
/*
1454-
* 2021-12-17: This case is nonsense and should be removed eventually!
1455-
*
1456-
* boost::python has/had a bug effectively using EquivTypes with
1457-
* `type(arbitrary_obj)`. That is clearly wrong as that cannot be a
1458-
* `PyArray_Descr *`. We assume that `type(type(type(arbitrary_obj))`
1459-
* is always in practice `type` (this is the type of the metaclass),
1460-
* but for our descriptors, `type(type(descr))` is DTypeMeta.
1461-
*
1462-
* In that case, we just return False. There is a possibility that
1463-
* this actually _worked_ effectively (returning 1 sometimes).
1464-
* We ignore that possibility for simplicity; it really is not our bug.
1465-
*/
1466-
return 0;
1467-
}
1468-
14691452
/*
14701453
* Do not use PyArray_CanCastTypeTo because it supports legacy flexible
14711454
* dtypes as input.
@@ -1608,7 +1591,10 @@ _array_fromobject_generic(
16081591

16091592
/* One more chance for faster exit if user specified the dtype. */
16101593
oldtype = PyArray_DESCR(oparr);
1611-
if (PyArray_EquivTypes(oldtype, dtype)) {
1594+
npy_intp view_offset;
1595+
npy_intp is_safe = PyArray_SafeCast(oldtype, dtype, &view_offset, NPY_NO_CASTING, 1);
1596+
npy_intp view_safe = (is_safe && (view_offset != NPY_MIN_INTP));
1597+
if (view_safe) {
16121598
if (copy != NPY_COPY_ALWAYS && STRIDING_OK(oparr, order)) {
16131599
if (oldtype == dtype) {
16141600
Py_INCREF(op);

numpy/_core/src/multiarray/stringdtype/casts.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,10 @@ string_to_string_resolve_descriptors(PyObject *NPY_UNUSED(self),
7979
return NPY_UNSAFE_CASTING;
8080
}
8181

82-
*view_offset = 0;
82+
// views are only legal between descriptors that share allocators (e.g. the same object)
83+
if (descr0->allocator == descr1->allocator) {
84+
*view_offset = 0;
85+
};
8386

8487
return NPY_NO_CASTING;
8588
}

numpy/_core/src/umath/ufunc_object.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -789,9 +789,9 @@ check_for_trivial_loop(PyArrayMethodObject *ufuncimpl,
789789

790790
if (dtypes[i] != PyArray_DESCR(op[i])) {
791791
npy_intp view_offset;
792-
NPY_CASTING safety = PyArray_GetCastInfo(
793-
PyArray_DESCR(op[i]), dtypes[i], NULL, &view_offset);
794-
if (safety < 0 && PyErr_Occurred()) {
792+
npy_intp is_safe = PyArray_SafeCast(
793+
PyArray_DESCR(op[i]), dtypes[i], &view_offset, casting, 0);
794+
if (is_safe < 0 && PyErr_Occurred()) {
795795
/* A proper error during a cast check, should be rare */
796796
return -1;
797797
}
@@ -806,8 +806,8 @@ check_for_trivial_loop(PyArrayMethodObject *ufuncimpl,
806806
* can force cast to bool)
807807
*/
808808
}
809-
else if (PyArray_MinCastSafety(safety, casting) != casting) {
810-
return 0; /* the cast is not safe enough */
809+
else if (is_safe != 1) {
810+
return 0; /* there was a cast error or cast is not safe enough */
811811
}
812812
}
813813
if (must_copy) {

numpy/_core/tests/test_stringdtype.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,29 @@ def test_creation_functions():
458458
assert np.empty(3, dtype="T")[0] == ""
459459

460460

461+
def test_create_with_copy_none(string_list):
462+
arr = np.array(string_list, dtype=StringDType())
463+
# create another stringdtype array with an arena that has a different
464+
# in-memory layout than the first array
465+
arr_rev = np.array(string_list[::-1], dtype=StringDType())
466+
467+
# this should create a copy and the resulting array
468+
# shouldn't share an allocator or arena with arr_rev, despite
469+
# explicitly passing arr_rev.dtype
470+
arr_copy = np.array(arr, copy=None, dtype=arr_rev.dtype)
471+
np.testing.assert_array_equal(arr, arr_copy)
472+
assert arr_copy.base is None
473+
474+
with pytest.raises(ValueError, match="Unable to avoid copy"):
475+
np.array(arr, copy=False, dtype=arr_rev.dtype)
476+
477+
# because we're using arr's dtype instance, the view is safe
478+
arr_view = np.array(arr, copy=None, dtype=arr.dtype)
479+
np.testing.assert_array_equal(arr, arr)
480+
np.testing.assert_array_equal(arr_view[::-1], arr_rev)
481+
assert arr_view is arr
482+
483+
461484
@pytest.mark.parametrize(
462485
"strings",
463486
[

0 commit comments

Comments
 (0)