Skip to content

Commit 1bb3bdf

Browse files
committed
refactor to avoid get_loop use in string to unicode cast
1 parent fb1b5ba commit 1bb3bdf

File tree

5 files changed

+52
-74
lines changed

5 files changed

+52
-74
lines changed

stringdtype/stringdtype/src/casts.c

Lines changed: 4 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -314,52 +314,10 @@ utf8_char_to_ucs4_code(unsigned char *c, size_t len, Py_UCS4 *code)
314314
}
315315
}
316316

317-
typedef struct s2u_auxdata {
318-
NpyAuxData base;
319-
char *na_name;
320-
size_t len;
321-
} s2u_auxdata;
322-
323-
static void
324-
s2u_auxdata_free(s2u_auxdata *s2u_auxdata)
325-
{
326-
PyMem_Free(s2u_auxdata->na_name);
327-
PyMem_Free(s2u_auxdata);
328-
}
329-
330-
static s2u_auxdata *
331-
get_s2u_auxdata(PyArray_Descr *from_dt)
332-
{
333-
s2u_auxdata *res = PyMem_Calloc(1, sizeof(s2u_auxdata));
334-
res->base.free = (void *)s2u_auxdata_free;
335-
336-
PyObject *na_pystr =
337-
PyObject_Str(((StringDTypeObject *)from_dt)->na_object);
338-
339-
if (na_pystr == NULL) {
340-
NPY_AUXDATA_FREE((NpyAuxData *)res);
341-
return NULL;
342-
}
343-
344-
Py_ssize_t size = 0;
345-
346-
const char *utf8_ptr = PyUnicode_AsUTF8AndSize(na_pystr, &size);
347-
348-
res->na_name = PyMem_Malloc((size_t)size);
349-
350-
memcpy(res->na_name, utf8_ptr, (size_t)size);
351-
352-
Py_DECREF(na_pystr);
353-
354-
res->len = (size_t)size;
355-
356-
return res;
357-
}
358-
359317
static int
360318
string_to_unicode(PyArrayMethod_Context *context, char *const data[],
361319
npy_intp const dimensions[], npy_intp const strides[],
362-
NpyAuxData *auxdata)
320+
NpyAuxData *NPY_UNUSED(auxdata))
363321
{
364322
StringDTypeObject *descr = (StringDTypeObject *)context->descriptors[0];
365323
int has_null = descr->na_object != NULL;
@@ -383,9 +341,8 @@ string_to_unicode(PyArrayMethod_Context *context, char *const data[],
383341
if (ss_isnull(s)) {
384342
if (has_null && !has_string_na) {
385343
// lossy but not much else we can do
386-
this_string =
387-
(unsigned char *)((s2u_auxdata *)auxdata)->na_name;
388-
n_bytes = ((s2u_auxdata *)auxdata)->len;
344+
this_string = (unsigned char *)descr->na_name.buf;
345+
n_bytes = descr->na_name.len;
389346
}
390347
else {
391348
this_string = (unsigned char *)(default_string.buf);
@@ -426,31 +383,9 @@ string_to_unicode(PyArrayMethod_Context *context, char *const data[],
426383
return 0;
427384
}
428385

429-
static int
430-
string_to_unicode_get_loop(PyArrayMethod_Context *context, int aligned,
431-
int NPY_UNUSED(move_references),
432-
const npy_intp *strides,
433-
PyArrayMethod_StridedLoop **out_loop,
434-
NpyAuxData **out_transferdata,
435-
NPY_ARRAYMETHOD_FLAGS *flags)
436-
{
437-
s2u_auxdata *s2u_auxdata = get_s2u_auxdata(context->descriptors[0]);
438-
439-
if (s2u_auxdata == NULL) {
440-
return -1;
441-
}
442-
*out_transferdata = (NpyAuxData *)s2u_auxdata;
443-
444-
*out_loop = (PyArrayMethod_StridedLoop *)string_to_unicode;
445-
446-
*flags = NPY_METH_NO_FLOATINGPOINT_ERRORS;
447-
448-
return 0;
449-
}
450-
451386
static PyType_Slot s2u_slots[] = {
452387
{NPY_METH_resolve_descriptors, &string_to_unicode_resolve_descriptors},
453-
{_NPY_METH_get_loop, &string_to_unicode_get_loop},
388+
{NPY_METH_strided_loop, &string_to_unicode},
454389
{0, NULL}};
455390

456391
static char *s2u_name = "cast_StringDType_to_Unicode";

stringdtype/stringdtype/src/dtype.c

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ new_stringdtype_instance(PyObject *na_object, int coerce)
2020

2121
Py_XINCREF(na_object);
2222
((StringDTypeObject *)new)->na_object = na_object;
23+
ss na_name = NULL_STRING;
2324
int hasnull = na_object != NULL;
2425
int has_nan_na = 0;
2526
int has_string_na = 0;
@@ -30,9 +31,19 @@ new_stringdtype_instance(PyObject *na_object, int coerce)
3031
has_string_na = 1;
3132
Py_ssize_t size = 0;
3233
const char *buf = PyUnicode_AsUTF8AndSize(na_object, &size);
33-
default_string.len = size;
34-
// discards const, how to avoid?
35-
default_string.buf = (char *)buf;
34+
default_string = NULL_STRING;
35+
int res = ssnewlen(buf, (size_t)size, &default_string);
36+
if (res == -1) {
37+
PyErr_NoMemory();
38+
Py_DECREF(new);
39+
return NULL;
40+
}
41+
else if (res == -2) {
42+
// this should never happen
43+
assert(0);
44+
Py_DECREF(new);
45+
return NULL;
46+
}
3647
}
3748
else {
3849
// treat as nan-like if != comparison returns a object whose truth
@@ -53,10 +64,32 @@ new_stringdtype_instance(PyObject *na_object, int coerce)
5364
}
5465
Py_DECREF(eq);
5566
}
67+
PyObject *na_pystr = PyObject_Str(na_object);
68+
if (na_pystr == NULL) {
69+
Py_DECREF(new);
70+
return NULL;
71+
}
72+
73+
Py_ssize_t size = 0;
74+
const char *utf8_ptr = PyUnicode_AsUTF8AndSize(na_pystr, &size);
75+
int res = ssnewlen(utf8_ptr, (size_t)size, &na_name);
76+
if (res == -1) {
77+
PyErr_NoMemory();
78+
Py_DECREF(new);
79+
return NULL;
80+
}
81+
else if (res == -2) {
82+
// this should never happen
83+
assert(0);
84+
Py_DECREF(new);
85+
return NULL;
86+
}
87+
Py_DECREF(na_pystr);
5688
}
5789
((StringDTypeObject *)new)->has_nan_na = has_nan_na;
5890
((StringDTypeObject *)new)->has_string_na = has_string_na;
5991
((StringDTypeObject *)new)->default_string = default_string;
92+
((StringDTypeObject *)new)->na_name = na_name;
6093
((StringDTypeObject *)new)->coerce = coerce;
6194

6295
PyArray_Descr *base = (PyArray_Descr *)new;
@@ -538,6 +571,9 @@ stringdtype_new(PyTypeObject *NPY_UNUSED(cls), PyObject *args, PyObject *kwds)
538571
static void
539572
stringdtype_dealloc(StringDTypeObject *self)
540573
{
574+
Py_XDECREF(self->na_object);
575+
ssfree(&self->default_string);
576+
ssfree(&self->na_name);
541577
PyArrayDescr_Type.tp_dealloc((PyObject *)self);
542578
}
543579

stringdtype/stringdtype/src/dtype.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ typedef struct {
2626
int has_nan_na;
2727
int has_string_na;
2828
ss default_string;
29+
ss na_name;
2930
} StringDTypeObject;
3031

3132
typedef struct {

stringdtype/stringdtype/src/static_string.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22

33
#include "static_string.h"
44

5-
// defined this way so it has an in-memory representation that is distinct
6-
// from NULL, allowing us to use NULL to represent a sentinel value
5+
// defined this way so EMPTY_STRING has an in-memory representation that is
6+
// distinct from a zero-filled struct, allowing us to use a NULL_STRING
7+
// to represent a sentinel value
78
const ss EMPTY_STRING = {0, "\0"};
9+
const ss NULL_STRING = {0, NULL};
810

911
int
1012
ssnewlen(const char *init, size_t len, ss *to_init)

stringdtype/stringdtype/src/static_string.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@ typedef struct ss {
99
char *buf;
1010
} ss;
1111

12+
// represents the empty string and can be passed safely to ss API functions
1213
extern const ss EMPTY_STRING;
14+
// represents a sentinel value, *CANNOT* be passed safely to ss API functions,
15+
// use ss_isnull to check if a value is null before working with it.
16+
extern const ss NULL_STRING;
1317

1418
// Allocates a new buffer for *to_init*, filling with the copied contents of
1519
// *init* and sets *to_init->len* to *len*. Returns -1 if malloc fails and -2

0 commit comments

Comments
 (0)