Skip to content

Commit 9011bf9

Browse files
committed
refactor so npy_packed_static_string is an opaque struct
1 parent f9682a5 commit 9011bf9

File tree

6 files changed

+132
-120
lines changed

6 files changed

+132
-120
lines changed

stringdtype/stringdtype/src/casts.c

Lines changed: 12 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,10 @@ string_to_string(PyArrayMethod_Context *context, char *const data[],
9090
if (in != out) {
9191
if (in_hasnull && !out_hasnull && NpyString_isnull(s)) {
9292
// lossy but this is an unsafe cast so this is OK
93-
NpyString_free(os, odescr->allocator);
94-
if (NpyString_newsize(in_na_name->buf, in_na_name->size, os,
95-
odescr->allocator) < 0) {
93+
if (NpyString_pack(odescr->allocator, os, in_na_name->buf,
94+
in_na_name->size) < 0) {
9695
gil_error(PyExc_MemoryError,
97-
"Failed to allocate string in string to string "
96+
"Failed to pack string in string to string "
9897
"cast.");
9998
goto fail;
10099
}
@@ -570,11 +569,6 @@ bool_to_string(PyArrayMethod_Context *context, char *const data[],
570569

571570
while (N--) {
572571
npy_packed_static_string *out_pss = (npy_packed_static_string *)out;
573-
if (NpyString_free(out_pss, allocator) < 0) {
574-
gil_error(PyExc_MemoryError,
575-
"Failed to deallocate string in bool to string cast");
576-
goto fail;
577-
}
578572
char *ret_val = NULL;
579573
size_t size = 0;
580574
if ((npy_bool)(*in) == 1) {
@@ -590,11 +584,9 @@ bool_to_string(PyArrayMethod_Context *context, char *const data[],
590584
"invalid value encountered in bool to string cast");
591585
goto fail;
592586
}
593-
if (NpyString_newsize(ret_val, size, out_pss, allocator) < 0) {
594-
// execution should never get here because this will be a small
595-
// string on all platforms
587+
if (NpyString_pack(allocator, out_pss, ret_val, size) < 0) {
596588
gil_error(PyExc_MemoryError,
597-
"Failed to allocate string in bool to string cast");
589+
"Failed to pack string in bool to string cast");
598590
goto fail;
599591
}
600592
in += in_stride;
@@ -706,19 +698,14 @@ pyobj_to_string(PyObject *obj, char *out, npy_string_allocator *allocator)
706698
Py_ssize_t length;
707699
const char *cstr_val = PyUnicode_AsUTF8AndSize(pystr_val, &length);
708700
if (cstr_val == NULL) {
701+
Py_DECREF(pystr_val);
709702
return -1;
710703
}
711704
npy_packed_static_string *out_ss = (npy_packed_static_string *)out;
712-
if (NpyString_free(out_ss, allocator) < 0) {
705+
if (NpyString_pack(allocator, out_ss, cstr_val, length) < 0) {
713706
gil_error(PyExc_MemoryError,
714-
"Failed to deallocate string when converting from python "
707+
"Failed to pack string while converting from python "
715708
"string");
716-
return -1;
717-
}
718-
if (NpyString_newsize(cstr_val, length, out_ss, allocator) < 0) {
719-
PyErr_SetString(PyExc_MemoryError,
720-
"Failed to allocate numpy string when converting from "
721-
"python string.");
722709
Py_DECREF(pystr_val);
723710
return -1;
724711
}
@@ -1260,15 +1247,13 @@ datetime_to_string(PyArrayMethod_Context *context, char *const data[],
12601247

12611248
while (N--) {
12621249
npy_packed_static_string *out_pss = (npy_packed_static_string *)out;
1263-
if (NpyString_free(out_pss, allocator) < 0) {
1250+
if (*in == NPY_DATETIME_NAT &&
1251+
NpyString_pack_null(allocator, out_pss) < 0) {
12641252
gil_error(
12651253
PyExc_MemoryError,
12661254
"Failed to deallocate string in datetime to string cast");
12671255
goto fail;
12681256
}
1269-
if (*in == NPY_DATETIME_NAT) {
1270-
*out_pss = *NPY_NULL_STRING;
1271-
}
12721257
else {
12731258
if (NpyDatetime_ConvertDatetime64ToDatetimeStruct(dt_meta, *in,
12741259
&dts) < 0) {
@@ -1284,8 +1269,8 @@ datetime_to_string(PyArrayMethod_Context *context, char *const data[],
12841269
goto fail;
12851270
}
12861271

1287-
if (NpyString_newsize(datetime_buf, strlen(datetime_buf), out_pss,
1288-
allocator) < 0) {
1272+
if (NpyString_pack(allocator, out_pss, datetime_buf,
1273+
strlen(datetime_buf)) < 0) {
12891274
PyErr_SetString(PyExc_MemoryError,
12901275
"Failed to allocate string when converting "
12911276
"from a datetime.");

stringdtype/stringdtype/src/dtype.c

Lines changed: 57 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -11,21 +11,18 @@ PyTypeObject *StringScalar_Type = NULL;
1111
PyObject *
1212
new_stringdtype_instance(PyObject *na_object, int coerce)
1313
{
14-
npy_string_allocator *allocator = NULL;
15-
PyThread_type_lock *allocator_lock = NULL;
16-
npy_packed_static_string packed_na_name = *NPY_EMPTY_STRING;
17-
npy_packed_static_string packed_default_string = *NPY_EMPTY_STRING;
18-
1914
PyObject *new =
2015
PyArrayDescr_Type.tp_new((PyTypeObject *)&StringDType, NULL, NULL);
2116

2217
if (new == NULL) {
2318
return NULL;
2419
}
2520

21+
npy_string_allocator *allocator = NULL;
22+
PyThread_type_lock *allocator_lock = NULL;
23+
2624
allocator = NpyString_new_allocator(PyMem_RawMalloc, PyMem_RawFree,
2725
PyMem_RawRealloc);
28-
2926
if (allocator == NULL) {
3027
PyErr_SetString(PyExc_MemoryError,
3128
"Failed to create string allocator");
@@ -38,6 +35,13 @@ new_stringdtype_instance(PyObject *na_object, int coerce)
3835
goto fail;
3936
}
4037

38+
char packed_na_name[SIZEOF_NPY_PACKED_STATIC_STRING] = {0};
39+
NpyString_pack(allocator, (npy_packed_static_string *)&packed_na_name, "",
40+
0);
41+
char packed_default_string[SIZEOF_NPY_PACKED_STATIC_STRING] = {0};
42+
NpyString_pack(allocator,
43+
(npy_packed_static_string *)&packed_default_string, "", 0);
44+
4145
Py_XINCREF(na_object);
4246
((StringDTypeObject *)new)->na_object = na_object;
4347
int hasnull = na_object != NULL;
@@ -49,8 +53,10 @@ new_stringdtype_instance(PyObject *na_object, int coerce)
4953
has_string_na = 1;
5054
Py_ssize_t size = 0;
5155
const char *buf = PyUnicode_AsUTF8AndSize(na_object, &size);
52-
if (NpyString_newsize(buf, (size_t)size, &packed_default_string,
53-
allocator) < 0) {
56+
if (NpyString_newsize(
57+
buf, (size_t)size,
58+
(npy_packed_static_string *)&packed_default_string,
59+
allocator) < 0) {
5460
PyErr_SetString(PyExc_MemoryError,
5561
"Failed to allocate string while creating "
5662
"StringDType instance.");
@@ -86,7 +92,8 @@ new_stringdtype_instance(PyObject *na_object, int coerce)
8692
Py_DECREF(na_pystr);
8793
goto fail;
8894
}
89-
if (NpyString_newsize(utf8_ptr, (size_t)size, &packed_na_name,
95+
if (NpyString_newsize(utf8_ptr, (size_t)size,
96+
(npy_packed_static_string *)&packed_na_name,
9097
allocator) < 0) {
9198
PyErr_SetString(PyExc_MemoryError,
9299
"Failed to allocate string while creating "
@@ -101,16 +108,20 @@ new_stringdtype_instance(PyObject *na_object, int coerce)
101108

102109
snew->has_nan_na = has_nan_na;
103110
snew->has_string_na = has_string_na;
104-
snew->packed_default_string = packed_default_string;
105-
snew->packed_na_name = packed_na_name;
111+
memcpy(snew->packed_default_string, packed_default_string,
112+
SIZEOF_NPY_PACKED_STATIC_STRING);
113+
memcpy(snew->packed_na_name, packed_na_name,
114+
SIZEOF_NPY_PACKED_STATIC_STRING);
106115
snew->coerce = coerce;
107116
snew->allocator_lock = allocator_lock;
108117
snew->allocator = allocator;
109118
snew->array_owned = 0;
110119

111120
npy_static_string default_string = {0, NULL};
112-
if (NpyString_load(allocator, &snew->packed_default_string,
113-
&default_string) == -1) {
121+
if (NpyString_load(
122+
allocator,
123+
(npy_packed_static_string *)&snew->packed_default_string,
124+
&default_string) == -1) {
114125
PyErr_SetString(PyExc_MemoryError,
115126
"Failed to load packed string while "
116127
"creating StringDType instance.");
@@ -119,7 +130,9 @@ new_stringdtype_instance(PyObject *na_object, int coerce)
119130
}
120131

121132
npy_static_string na_name = {0, NULL};
122-
if (NpyString_load(allocator, &snew->packed_na_name, &na_name) == -1) {
133+
if (NpyString_load(allocator,
134+
(npy_packed_static_string *)&snew->packed_na_name,
135+
&na_name) == -1) {
123136
PyErr_SetString(PyExc_MemoryError,
124137
"Failed to load packed string while "
125138
"creating StringDType instance.");
@@ -132,8 +145,8 @@ new_stringdtype_instance(PyObject *na_object, int coerce)
132145
snew->default_string = default_string;
133146

134147
PyArray_Descr *base = (PyArray_Descr *)new;
135-
base->elsize = sizeof(npy_static_string);
136-
base->alignment = _Alignof(npy_static_string);
148+
base->elsize = SIZEOF_NPY_PACKED_STATIC_STRING;
149+
base->alignment = ALIGNOF_NPY_PACKED_STATIC_STRING;
137150
base->flags |= NPY_NEEDS_INIT;
138151
base->flags |= NPY_LIST_PICKLE;
139152
base->flags |= NPY_ITEM_REFCOUNT;
@@ -150,8 +163,9 @@ new_stringdtype_instance(PyObject *na_object, int coerce)
150163
// this only makes sense if the allocator isn't attached to new yet
151164
Py_DECREF(new);
152165
if (allocator != NULL) {
153-
NpyString_free(&packed_na_name, allocator);
154-
NpyString_free(&packed_default_string, allocator);
166+
NpyString_free((npy_packed_static_string *)&packed_na_name, allocator);
167+
NpyString_free((npy_packed_static_string *)&packed_default_string,
168+
allocator);
155169
NpyString_free_allocator(allocator);
156170
}
157171
if (allocator_lock != NULL) {
@@ -289,21 +303,18 @@ stringdtype_setitem(StringDTypeObject *descr, PyObject *obj, char **dataptr)
289303

290304
NPY_STRING_ACQUIRE_ALLOCATOR(descr);
291305

292-
// free if dataptr holds preexisting string data,
293-
// npy_string_free does a NULL check and checks for small strings
294-
if (NpyString_free(sdata, descr->allocator) < 0) {
295-
PyErr_SetString(PyExc_MemoryError,
296-
"String deallocation failed in StringDType setitem");
297-
goto fail;
298-
}
299-
300306
// borrow reference
301307
PyObject *na_object = descr->na_object;
302308

303309
// setting NA *must* check pointer equality since NA types might not
304310
// allow equality
305311
if (na_object != NULL && obj == na_object) {
306-
*sdata = *NPY_NULL_STRING;
312+
if (NpyString_pack_null(descr->allocator, sdata) < 0) {
313+
PyErr_SetString(PyExc_MemoryError,
314+
"Failed to pack null string during StringDType "
315+
"setitem");
316+
goto fail;
317+
}
307318
}
308319
else {
309320
PyObject *val_obj = get_value(obj, descr->coerce);
@@ -319,9 +330,9 @@ stringdtype_setitem(StringDTypeObject *descr, PyObject *obj, char **dataptr)
319330
goto fail;
320331
}
321332

322-
if (NpyString_newsize(val, length, sdata, descr->allocator) < 0) {
333+
if (NpyString_pack(descr->allocator, sdata, val, length) < 0) {
323334
PyErr_SetString(PyExc_MemoryError,
324-
"Failed to allocate string during StringDType "
335+
"Failed to pack string during StringDType "
325336
"setitem");
326337
Py_DECREF(val_obj);
327338
goto fail;
@@ -482,12 +493,13 @@ _compare(void *a, void *b, StringDTypeObject *descr_a,
482493
// PyArray_ArgFunc
483494
// The max element is the one with the highest unicode code point.
484495
int
485-
argmax(void *data, npy_intp n, npy_intp *max_ind, void *arr)
496+
argmax(char *data, npy_intp n, npy_intp *max_ind, void *arr)
486497
{
487-
npy_packed_static_string *dptr = (npy_packed_static_string *)data;
498+
PyArray_Descr *descr = PyArray_DESCR(arr);
499+
npy_intp elsize = descr->elsize;
488500
*max_ind = 0;
489501
for (int i = 1; i < n; i++) {
490-
if (compare(&dptr[i], &dptr[*max_ind], arr) > 0) {
502+
if (compare(data + i * elsize, data + (*max_ind) * elsize, arr) > 0) {
491503
*max_ind = i;
492504
}
493505
}
@@ -497,12 +509,13 @@ argmax(void *data, npy_intp n, npy_intp *max_ind, void *arr)
497509
// PyArray_ArgFunc
498510
// The min element is the one with the lowest unicode code point.
499511
int
500-
argmin(void *data, npy_intp n, npy_intp *min_ind, void *arr)
512+
argmin(char *data, npy_intp n, npy_intp *min_ind, void *arr)
501513
{
502-
npy_packed_static_string *dptr = (npy_packed_static_string *)data;
514+
PyArray_Descr *descr = PyArray_DESCR(arr);
515+
npy_intp elsize = descr->elsize;
503516
*min_ind = 0;
504517
for (int i = 1; i < n; i++) {
505-
if (compare(&dptr[i], &dptr[*min_ind], arr) < 0) {
518+
if (compare(data + i * elsize, data + (*min_ind) * elsize, arr) < 0) {
506519
*min_ind = i;
507520
}
508521
}
@@ -525,13 +538,10 @@ stringdtype_clear_loop(void *NPY_UNUSED(traverse_context),
525538
NPY_STRING_ACQUIRE_ALLOCATOR(sdescr);
526539
while (size--) {
527540
npy_packed_static_string *sdata = (npy_packed_static_string *)data;
528-
if (data != NULL) {
529-
if (NpyString_free(sdata, sdescr->allocator) < 0) {
530-
gil_error(PyExc_MemoryError,
531-
"String deallocation failed in clear loop");
532-
goto fail;
533-
}
534-
memset(data, 0, sizeof(npy_packed_static_string));
541+
if (data != NULL && NpyString_free(sdata, sdescr->allocator) < 0) {
542+
gil_error(PyExc_MemoryError,
543+
"String deallocation failed in clear loop");
544+
goto fail;
535545
}
536546
data += stride;
537547
}
@@ -703,8 +713,11 @@ stringdtype_dealloc(StringDTypeObject *self)
703713
if (self->allocator != NULL) {
704714
// can we assume the destructor for an instance will only get called
705715
// inside of one C thread?
706-
NpyString_free(&self->packed_default_string, self->allocator);
707-
NpyString_free(&self->packed_na_name, self->allocator);
716+
NpyString_free(
717+
(npy_packed_static_string *)&self->packed_default_string,
718+
self->allocator);
719+
NpyString_free((npy_packed_static_string *)&self->packed_na_name,
720+
self->allocator);
708721
NpyString_free_allocator(self->allocator);
709722
PyThread_free_lock(self->allocator_lock);
710723
}

stringdtype/stringdtype/src/dtype.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,14 @@
5555
NPY_STRING_RELEASE_ALLOCATOR(descr3); \
5656
}
5757

58+
// not publicly exposed by the static string library so we need to define
59+
// this here so we can define `elsize` and `alignment` on the descr
60+
//
61+
// if the layout of npy_packed_static_string ever changes in the future
62+
// this may need to be updated.
63+
#define SIZEOF_NPY_PACKED_STATIC_STRING 2 * sizeof(size_t)
64+
#define ALIGNOF_NPY_PACKED_STATIC_STRING _Alignof(size_t)
65+
5866
typedef struct {
5967
PyArray_Descr base;
6068
PyObject *na_object;
@@ -63,9 +71,9 @@ typedef struct {
6371
int has_string_na;
6472
int array_owned;
6573
npy_static_string default_string;
66-
npy_packed_static_string packed_default_string;
74+
char packed_default_string[SIZEOF_NPY_PACKED_STATIC_STRING];
6775
npy_static_string na_name;
68-
npy_packed_static_string packed_na_name;
76+
char packed_na_name[SIZEOF_NPY_PACKED_STATIC_STRING];
6977
PyThread_type_lock *allocator_lock;
7078
// the allocator should only be directly accessed after
7179
// acquiring the allocator_lock and the lock should

0 commit comments

Comments
 (0)