Skip to content

Commit b3bbdc9

Browse files
authored
Merge pull request numpy#25944 from ngoldbaum/stringdtype-copy-bug
BUG: avoid incorrect stringdtype allocator sharing from array copies
2 parents dbbf235 + 21ee00c commit b3bbdc9

File tree

8 files changed

+43
-45
lines changed

8 files changed

+43
-45
lines changed

numpy/_core/src/multiarray/arraytypes.c.src

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4233,7 +4233,7 @@ PyArray_DescrFromType(int type)
42334233
ret = NULL;
42344234
}
42354235
else if (type == NPY_VSTRING || type == NPY_VSTRINGLTR) {
4236-
ret = (PyArray_Descr *)new_stringdtype_instance(NULL, 1, NULL);
4236+
ret = (PyArray_Descr *)new_stringdtype_instance(NULL, 1);
42374237
}
42384238
// builtin legacy dtypes
42394239
else if (type < NPY_NTYPES_LEGACY) {

numpy/_core/src/multiarray/item_selection.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -337,14 +337,16 @@ PyArray_TakeFrom(PyArrayObject *self0, PyObject *indices0, int axis,
337337
goto fail;
338338
}
339339

340-
Py_XDECREF(indices);
341-
Py_XDECREF(self);
342340
if (out != NULL && out != obj) {
343-
Py_INCREF(out);
344-
PyArray_ResolveWritebackIfCopy(obj);
341+
if (PyArray_ResolveWritebackIfCopy(obj) < 0) {
342+
goto fail;
343+
}
345344
Py_DECREF(obj);
345+
Py_INCREF(out);
346346
obj = out;
347347
}
348+
Py_XDECREF(indices);
349+
Py_XDECREF(self);
348350
return (PyObject *)obj;
349351

350352
fail:

numpy/_core/src/multiarray/stringdtype/casts.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
if (given_descrs[1] == NULL) { \
3131
PyArray_Descr *new = \
3232
(PyArray_Descr *)new_stringdtype_instance( \
33-
NULL, 1, NULL); \
33+
NULL, 1); \
3434
if (new == NULL) { \
3535
return (NPY_CASTING)-1; \
3636
} \
@@ -121,7 +121,7 @@ string_to_string(PyArrayMethod_Context *context, char *const data[],
121121
}
122122
}
123123
else if (free_and_copy(iallocator, oallocator, s, os,
124-
"string to string cast") == -1) {
124+
"string to string cast") < 0) {
125125
goto fail;
126126
}
127127
}

numpy/_core/src/multiarray/stringdtype/dtype.c

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
* Internal helper to create new instances
2323
*/
2424
PyObject *
25-
new_stringdtype_instance(PyObject *na_object, int coerce, npy_string_allocator *allocator)
25+
new_stringdtype_instance(PyObject *na_object, int coerce)
2626
{
2727
PyObject *new =
2828
PyArrayDescr_Type.tp_new((PyTypeObject *)&PyArray_StringDType, NULL, NULL);
@@ -35,18 +35,12 @@ new_stringdtype_instance(PyObject *na_object, int coerce, npy_string_allocator *
3535
char *na_name_buf = NULL;
3636
char array_owned = 0;
3737

38+
npy_string_allocator *allocator = NpyString_new_allocator(PyMem_RawMalloc, PyMem_RawFree,
39+
PyMem_RawRealloc);
3840
if (allocator == NULL) {
39-
allocator = NpyString_new_allocator(PyMem_RawMalloc, PyMem_RawFree,
40-
PyMem_RawRealloc);
41-
if (allocator == NULL) {
42-
PyErr_SetString(PyExc_MemoryError,
43-
"Failed to create string allocator");
44-
goto fail;
45-
}
46-
}
47-
else {
48-
// indicating that this is a view
49-
array_owned = 2;
41+
PyErr_SetString(PyExc_MemoryError,
42+
"Failed to create string allocator");
43+
goto fail;
5044
}
5145

5246
npy_static_string default_string = {0, NULL};
@@ -119,7 +113,7 @@ new_stringdtype_instance(PyObject *na_object, int coerce, npy_string_allocator *
119113
snew->has_string_na = has_string_na;
120114
snew->coerce = coerce;
121115
snew->allocator = allocator;
122-
snew->array_owned = 2;
116+
snew->array_owned = 0;
123117
snew->na_name = na_name;
124118
snew->default_string = default_string;
125119

@@ -210,7 +204,7 @@ common_instance(PyArray_StringDTypeObject *dtype1, PyArray_StringDTypeObject *dt
210204
}
211205

212206
return (PyArray_StringDTypeObject *)new_stringdtype_instance(
213-
dtype1->na_object, dtype1->coerce, NULL);
207+
dtype1->na_object, dtype1->coerce);
214208
}
215209

216210
/*
@@ -274,7 +268,7 @@ string_discover_descriptor_from_pyobject(PyTypeObject *NPY_UNUSED(cls),
274268

275269
Py_DECREF(val);
276270

277-
PyArray_Descr *ret = (PyArray_Descr *)new_stringdtype_instance(NULL, 1, NULL);
271+
PyArray_Descr *ret = (PyArray_Descr *)new_stringdtype_instance(NULL, 1);
278272

279273
return ret;
280274
}
@@ -657,10 +651,8 @@ stringdtype_finalize_descr(PyArray_Descr *dtype)
657651
return dtype;
658652
}
659653
PyArray_StringDTypeObject *ret = (PyArray_StringDTypeObject *)new_stringdtype_instance(
660-
sdtype->na_object, sdtype->coerce, sdtype->allocator);
661-
if (ret->array_owned == 0) {
662-
ret->array_owned = 1;
663-
}
654+
sdtype->na_object, sdtype->coerce);
655+
ret->array_owned = 1;
664656
return (PyArray_Descr *)ret;
665657
}
666658

@@ -696,7 +688,7 @@ stringdtype_new(PyTypeObject *NPY_UNUSED(cls), PyObject *args, PyObject *kwds)
696688
return NULL;
697689
}
698690

699-
return new_stringdtype_instance(na_object, coerce, NULL);
691+
return new_stringdtype_instance(na_object, coerce);
700692
}
701693

702694
static void

numpy/_core/src/multiarray/stringdtype/dtype.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ extern "C" {
1414
#define ALIGNOF_NPY_PACKED_STATIC_STRING _Alignof(size_t)
1515

1616
NPY_NO_EXPORT PyObject *
17-
new_stringdtype_instance(PyObject *na_object, int coerce, npy_string_allocator *allocator);
17+
new_stringdtype_instance(PyObject *na_object, int coerce);
1818

1919
NPY_NO_EXPORT int
2020
init_string_dtype(void);

numpy/_core/src/multiarray/stringdtype/static_string.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -530,7 +530,7 @@ NpyString_pack_empty(npy_packed_static_string *out)
530530
{
531531
_npy_static_string_u *out_u = (_npy_static_string_u *)out;
532532
unsigned char *flags = &out_u->direct_buffer.size_and_flags;
533-
if (*flags == 0 || *flags & NPY_STRING_OUTSIDE_ARENA) {
533+
if (*flags & NPY_STRING_OUTSIDE_ARENA) {
534534
// This also sets short string size to 0.
535535
*flags = NPY_STRING_INITIALIZED | NPY_STRING_OUTSIDE_ARENA;
536536
}
@@ -685,9 +685,6 @@ NpyString_dup(const npy_packed_static_string *in,
685685
_npy_static_string_u *in_u = (_npy_static_string_u *)in;
686686
char *in_buf = NULL;
687687
npy_string_arena *arena = &in_allocator->arena;
688-
if (arena->buffer == NULL) {
689-
return -1;
690-
}
691688
int used_malloc = 0;
692689
if (in_allocator == out_allocator && !is_short_string(in)) {
693690
in_buf = in_allocator->malloc(size);

numpy/_core/src/umath/stringdtype_ufuncs.cpp

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ multiply_resolve_descriptors(
6060

6161
if (given_descrs[2] == NULL) {
6262
out_descr = (PyArray_Descr *)new_stringdtype_instance(
63-
odescr->na_object, odescr->coerce, NULL);
63+
odescr->na_object, odescr->coerce);
6464
if (out_descr == NULL) {
6565
return (NPY_CASTING)-1;
6666
}
@@ -273,8 +273,7 @@ binary_resolve_descriptors(struct PyArrayMethodObject_tag *NPY_UNUSED(method),
273273
if (given_descrs[2] == NULL) {
274274
out_descr = (PyArray_Descr *)new_stringdtype_instance(
275275
((PyArray_StringDTypeObject *)given_descrs[1])->na_object,
276-
((PyArray_StringDTypeObject *)given_descrs[1])->coerce,
277-
NULL);
276+
((PyArray_StringDTypeObject *)given_descrs[1])->coerce);
278277

279278
if (out_descr == NULL) {
280279
return (NPY_CASTING)-1;
@@ -1077,8 +1076,7 @@ strip_chars_resolve_descriptors(
10771076
if (given_descrs[2] == NULL) {
10781077
out_descr = (PyArray_Descr *)new_stringdtype_instance(
10791078
((PyArray_StringDTypeObject *)given_descrs[0])->na_object,
1080-
((PyArray_StringDTypeObject *)given_descrs[0])->coerce,
1081-
NULL);
1079+
((PyArray_StringDTypeObject *)given_descrs[0])->coerce);
10821080

10831081
if (out_descr == NULL) {
10841082
return (NPY_CASTING)-1;
@@ -1186,8 +1184,7 @@ strip_whitespace_resolve_descriptors(
11861184
if (given_descrs[1] == NULL) {
11871185
out_descr = (PyArray_Descr *)new_stringdtype_instance(
11881186
((PyArray_StringDTypeObject *)given_descrs[0])->na_object,
1189-
((PyArray_StringDTypeObject *)given_descrs[0])->coerce,
1190-
NULL);
1187+
((PyArray_StringDTypeObject *)given_descrs[0])->coerce);
11911188

11921189
if (out_descr == NULL) {
11931190
return (NPY_CASTING)-1;
@@ -1335,8 +1332,7 @@ replace_resolve_descriptors(struct PyArrayMethodObject_tag *NPY_UNUSED(method),
13351332
if (given_descrs[4] == NULL) {
13361333
out_descr = (PyArray_Descr *)new_stringdtype_instance(
13371334
((PyArray_StringDTypeObject *)given_descrs[0])->na_object,
1338-
((PyArray_StringDTypeObject *)given_descrs[0])->coerce,
1339-
NULL);
1335+
((PyArray_StringDTypeObject *)given_descrs[0])->coerce);
13401336

13411337
if (out_descr == NULL) {
13421338
return (NPY_CASTING)-1;
@@ -1478,7 +1474,7 @@ static NPY_CASTING expandtabs_resolve_descriptors(
14781474

14791475
if (given_descrs[2] == NULL) {
14801476
out_descr = (PyArray_Descr *)new_stringdtype_instance(
1481-
idescr->na_object, idescr->coerce, NULL);
1477+
idescr->na_object, idescr->coerce);
14821478
if (out_descr == NULL) {
14831479
return (NPY_CASTING)-1;
14841480
}

numpy/_core/tests/test_stringdtype.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1299,8 +1299,9 @@ def in_arena(self, a):
12991299

13001300
def test_setup(self):
13011301
is_short = self.is_short(self.a)
1302-
assert_array_equal(is_short, np.strings.str_len(self.a) <= 15)
1303-
assert_array_equal(self.in_arena(self.a), ~is_short)
1302+
length = np.strings.str_len(self.a)
1303+
assert_array_equal(is_short, (length > 0) & (length <= 15))
1304+
assert_array_equal(self.in_arena(self.a), [False, False, True, True])
13041305
assert_array_equal(self.is_on_heap(self.a), False)
13051306
assert_array_equal(self.is_missing(self.a), False)
13061307
view = self.get_view(self.a)
@@ -1329,6 +1330,9 @@ def test_copy(self):
13291330
c = self.a.copy()
13301331
assert_array_equal(self.get_flags(c), self.get_flags(self.a))
13311332
assert_array_equal(c, self.a)
1333+
offsets = self.get_view(c)['offset']
1334+
assert offsets[2] == 1
1335+
assert offsets[3] == 1 + len(self.s_medium) + self.sizeofstr // 2
13321336

13331337
def test_arena_use_with_setting(self):
13341338
c = np.zeros_like(self.a)
@@ -1363,11 +1367,15 @@ def test_arena_reuse_after_empty(self):
13631367

13641368
def test_arena_reuse_for_shorter(self):
13651369
c = self.a.copy()
1366-
in_arena = self.in_arena(self.a)
13671370
# A string slightly shorter than the shortest in the arena
13681371
# should be used for all strings in the arena.
13691372
c[:] = self.s_medium[:-1]
13701373
assert_array_equal(c, self.s_medium[:-1])
1374+
# first empty string in original was never initialized, so
1375+
# filling it in now leaves it initialized inside the arena.
1376+
# second string started as a short string so it can never live
1377+
# in the arena.
1378+
in_arena = np.array([True, False, True, True])
13711379
assert_array_equal(self.in_arena(c), in_arena)
13721380
# But when a short string is replaced, it will go on the heap.
13731381
assert_array_equal(self.is_short(c), False)
@@ -1387,6 +1395,9 @@ def test_arena_reuse_if_possible(self):
13871395
c[:] = self.s_medium + "±"
13881396
assert_array_equal(c, self.s_medium + "±")
13891397
in_arena_exp = np.strings.str_len(self.a) >= len(self.s_medium) + 1
1398+
# first entry started uninitialized and empty, so filling it leaves
1399+
# it in the arena
1400+
in_arena_exp[0] = True
13901401
assert not np.all(in_arena_exp == self.in_arena(self.a))
13911402
assert_array_equal(self.in_arena(c), in_arena_exp)
13921403
assert_array_equal(self.is_short(c), False)

0 commit comments

Comments
 (0)