Skip to content

Commit f86e5ce

Browse files
committed
refactor to use a packing/unpacking API
1 parent 97f3da5 commit f86e5ce

File tree

7 files changed

+522
-471
lines changed

7 files changed

+522
-471
lines changed

stringdtype/stringdtype/src/casts.c

Lines changed: 218 additions & 208 deletions
Large diffs are not rendered by default.

stringdtype/stringdtype/src/dtype.c

Lines changed: 46 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ new_stringdtype_instance(PyObject *na_object, int coerce)
2020

2121
Py_XINCREF(na_object);
2222
((StringDTypeObject *)new)->na_object = na_object;
23-
npy_static_string na_name = NPY_EMPTY_STRING;
24-
npy_static_string default_string = NPY_EMPTY_STRING;
23+
npy_packed_static_string packed_na_name = *NPY_EMPTY_STRING;
24+
npy_packed_static_string packed_default_string = *NPY_EMPTY_STRING;
2525
int hasnull = na_object != NULL;
2626
int has_nan_na = 0;
2727
int has_string_na = 0;
@@ -31,8 +31,8 @@ new_stringdtype_instance(PyObject *na_object, int coerce)
3131
has_string_na = 1;
3232
Py_ssize_t size = 0;
3333
const char *buf = PyUnicode_AsUTF8AndSize(na_object, &size);
34-
default_string = NPY_EMPTY_STRING;
35-
int res = npy_string_newsize(buf, (size_t)size, &default_string);
34+
int res = npy_string_newsize(buf, (size_t)size,
35+
&packed_default_string);
3636
if (res == -1) {
3737
PyErr_NoMemory();
3838
Py_DECREF(new);
@@ -72,7 +72,8 @@ new_stringdtype_instance(PyObject *na_object, int coerce)
7272

7373
Py_ssize_t size = 0;
7474
const char *utf8_ptr = PyUnicode_AsUTF8AndSize(na_pystr, &size);
75-
int res = npy_string_newsize(utf8_ptr, (size_t)size, &na_name);
75+
// discard const to initialize buffer
76+
int res = npy_string_newsize(utf8_ptr, (size_t)size, &packed_na_name);
7677
if (res == -1) {
7778
PyErr_NoMemory();
7879
Py_DECREF(new);
@@ -86,11 +87,23 @@ new_stringdtype_instance(PyObject *na_object, int coerce)
8687
}
8788
Py_DECREF(na_pystr);
8889
}
89-
((StringDTypeObject *)new)->has_nan_na = has_nan_na;
90-
((StringDTypeObject *)new)->has_string_na = has_string_na;
91-
((StringDTypeObject *)new)->default_string = default_string;
92-
((StringDTypeObject *)new)->na_name = na_name;
93-
((StringDTypeObject *)new)->coerce = coerce;
90+
91+
StringDTypeObject *snew = (StringDTypeObject *)new;
92+
93+
snew->has_nan_na = has_nan_na;
94+
snew->has_string_na = has_string_na;
95+
snew->packed_default_string = packed_default_string;
96+
snew->packed_na_name = packed_na_name;
97+
snew->coerce = coerce;
98+
99+
npy_static_string default_string = {0, NULL};
100+
npy_load_string(&snew->packed_default_string, &default_string);
101+
102+
npy_static_string na_name = {0, NULL};
103+
npy_load_string(&snew->packed_na_name, &na_name);
104+
105+
snew->na_name = na_name;
106+
snew->default_string = default_string;
94107

95108
PyArray_Descr *base = (PyArray_Descr *)new;
96109
base->elsize = sizeof(npy_static_string);
@@ -194,10 +207,10 @@ string_discover_descriptor_from_pyobject(PyTypeObject *NPY_UNUSED(cls),
194207
int
195208
stringdtype_setitem(StringDTypeObject *descr, PyObject *obj, char **dataptr)
196209
{
197-
npy_static_string *sdata = (npy_static_string *)dataptr;
210+
npy_packed_static_string *sdata = (npy_packed_static_string *)dataptr;
198211

199212
// free if dataptr holds preexisting string data,
200-
// npy_string_free does a NULL check
213+
// npy_string_free does a NULL check and checks for small strings
201214
npy_string_free(sdata);
202215

203216
// borrow reference
@@ -206,7 +219,7 @@ stringdtype_setitem(StringDTypeObject *descr, PyObject *obj, char **dataptr)
206219
// setting NA *must* check pointer equality since NA types might not
207220
// allow equality
208221
if (na_object != NULL && obj == na_object) {
209-
*sdata = NPY_NULL_STRING;
222+
*sdata = *NPY_NULL_STRING;
210223
}
211224
else {
212225
PyObject *val_obj = get_value(obj, descr->coerce);
@@ -244,10 +257,11 @@ static PyObject *
244257
stringdtype_getitem(StringDTypeObject *descr, char **dataptr)
245258
{
246259
PyObject *val_obj = NULL;
247-
npy_static_string *sdata = (npy_static_string *)dataptr;
260+
npy_packed_static_string *psdata = (npy_packed_static_string *)dataptr;
261+
npy_static_string sdata = {0, NULL};
248262
int hasnull = descr->na_object != NULL;
249263

250-
if (npy_string_isnull(sdata)) {
264+
if (npy_load_string(psdata, &sdata)) {
251265
if (hasnull) {
252266
PyObject *na_object = descr->na_object;
253267
Py_INCREF(na_object);
@@ -258,9 +272,7 @@ stringdtype_getitem(StringDTypeObject *descr, char **dataptr)
258272
}
259273
}
260274
else {
261-
const char *data = npy_string_buf(sdata);
262-
size_t size = npy_string_size(sdata);
263-
val_obj = PyUnicode_FromStringAndSize(data, size);
275+
val_obj = PyUnicode_FromStringAndSize(sdata.buf, sdata.size);
264276
if (val_obj == NULL) {
265277
return NULL;
266278
}
@@ -285,7 +297,7 @@ stringdtype_getitem(StringDTypeObject *descr, char **dataptr)
285297
npy_bool
286298
nonzero(void *data, void *NPY_UNUSED(arr))
287299
{
288-
return npy_string_size((npy_static_string *)data) != 0;
300+
return npy_string_size((npy_packed_static_string *)data) != 0;
289301
}
290302

291303
// Implementation of PyArray_CompareFunc.
@@ -309,11 +321,13 @@ _compare(void *a, void *b, StringDTypeObject *descr)
309321
return 0;
310322
}
311323
}
312-
const npy_static_string *default_string = &descr->default_string;
313-
const npy_static_string *ss_a = (npy_static_string *)a;
314-
const npy_static_string *ss_b = (npy_static_string *)b;
315-
int a_is_null = npy_string_isnull(ss_a);
316-
int b_is_null = npy_string_isnull(ss_b);
324+
npy_static_string *default_string = &descr->default_string;
325+
const npy_packed_static_string *ps_a = (npy_packed_static_string *)a;
326+
npy_static_string s_a = {0, NULL};
327+
int a_is_null = npy_load_string(ps_a, &s_a);
328+
const npy_packed_static_string *ps_b = (npy_packed_static_string *)b;
329+
npy_static_string s_b = {0, NULL};
330+
int b_is_null = npy_load_string(ps_b, &s_b);
317331
if (NPY_UNLIKELY(a_is_null || b_is_null)) {
318332
if (hasnull && !has_string_na) {
319333
if (has_nan_na) {
@@ -334,22 +348,22 @@ _compare(void *a, void *b, StringDTypeObject *descr)
334348
}
335349
else {
336350
if (a_is_null) {
337-
ss_a = default_string;
351+
s_a = *default_string;
338352
}
339353
if (b_is_null) {
340-
ss_b = default_string;
354+
s_b = *default_string;
341355
}
342356
}
343357
}
344-
return npy_string_cmp(ss_a, ss_b);
358+
return npy_string_cmp(&s_a, &s_b);
345359
}
346360

347361
// PyArray_ArgFunc
348362
// The max element is the one with the highest unicode code point.
349363
int
350364
argmax(void *data, npy_intp n, npy_intp *max_ind, void *arr)
351365
{
352-
npy_static_string *dptr = (npy_static_string *)data;
366+
npy_packed_static_string *dptr = (npy_packed_static_string *)data;
353367
*max_ind = 0;
354368
for (int i = 1; i < n; i++) {
355369
if (compare(&dptr[i], &dptr[*max_ind], arr) > 0) {
@@ -364,7 +378,7 @@ argmax(void *data, npy_intp n, npy_intp *max_ind, void *arr)
364378
int
365379
argmin(void *data, npy_intp n, npy_intp *min_ind, void *arr)
366380
{
367-
npy_static_string *dptr = (npy_static_string *)data;
381+
npy_packed_static_string *dptr = (npy_packed_static_string *)data;
368382
*min_ind = 0;
369383
for (int i = 1; i < n; i++) {
370384
if (compare(&dptr[i], &dptr[*min_ind], arr) < 0) {
@@ -389,8 +403,8 @@ stringdtype_clear_loop(void *NPY_UNUSED(traverse_context),
389403
{
390404
while (size--) {
391405
if (data != NULL) {
392-
npy_string_free((npy_static_string *)data);
393-
memset(data, 0, sizeof(npy_static_string));
406+
npy_string_free((npy_packed_static_string *)data);
407+
memset(data, 0, sizeof(npy_packed_static_string));
394408
}
395409
data += stride;
396410
}
@@ -419,7 +433,7 @@ stringdtype_fill_zero_loop(void *NPY_UNUSED(traverse_context),
419433
NpyAuxData *NPY_UNUSED(auxdata))
420434
{
421435
while (size--) {
422-
*(npy_static_string *)(data) = NPY_EMPTY_STRING;
436+
*(npy_packed_static_string *)(data) = *NPY_EMPTY_STRING;
423437
data += stride;
424438
}
425439
return 0;
@@ -568,8 +582,6 @@ static void
568582
stringdtype_dealloc(StringDTypeObject *self)
569583
{
570584
Py_XDECREF(self->na_object);
571-
npy_string_free(&self->default_string);
572-
npy_string_free(&self->na_name);
573585
PyArrayDescr_Type.tp_dealloc((PyObject *)self);
574586
}
575587

stringdtype/stringdtype/src/dtype.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ typedef struct {
2626
int has_nan_na;
2727
int has_string_na;
2828
npy_static_string default_string;
29+
npy_packed_static_string packed_default_string;
2930
npy_static_string na_name;
31+
npy_packed_static_string packed_na_name;
3032
} StringDTypeObject;
3133

3234
typedef struct {

stringdtype/stringdtype/src/main.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ _memory_usage(PyObject *NPY_UNUSED(self), PyObject *obj)
5858
npy_intp count = *innersizeptr;
5959

6060
while (count--) {
61-
size_t size = npy_string_size(((npy_static_string *)in));
61+
size_t size = npy_string_size(((npy_packed_static_string *)in));
6262
if (size > NPY_SHORT_STRING_MAX_SIZE) {
6363
memory_usage += size;
6464
}
@@ -77,7 +77,7 @@ _memory_usage(PyObject *NPY_UNUSED(self), PyObject *obj)
7777
static PyMethodDef string_methods[] = {
7878
{"_memory_usage", _memory_usage, METH_O,
7979
"get memory usage for an array"},
80-
{NULL},
80+
{NULL, NULL, 0, NULL},
8181
};
8282

8383
static struct PyModuleDef moduledef = {

0 commit comments

Comments
 (0)