Skip to content

Commit 73ae9f9

Browse files
committed
replace ss with npy_static_string
1 parent 1bb3bdf commit 73ae9f9

File tree

8 files changed

+271
-264
lines changed

8 files changed

+271
-264
lines changed

stringdtype/stringdtype/src/casts.c

Lines changed: 42 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -69,16 +69,16 @@ string_to_string(PyArrayMethod_Context *NPY_UNUSED(context),
6969
npy_intp in_stride = strides[0];
7070
npy_intp out_stride = strides[1];
7171

72-
const ss *s = NULL;
73-
ss *os = NULL;
72+
const npy_static_string *s = NULL;
73+
npy_static_string *os = NULL;
7474

7575
while (N--) {
76-
s = (ss *)in;
77-
os = (ss *)out;
76+
s = (npy_static_string *)in;
77+
os = (npy_static_string *)out;
7878
if (in != out) {
79-
ssfree(os);
80-
if (ssdup(s, os) < 0) {
81-
gil_error(PyExc_MemoryError, "ssdup failed");
79+
npy_string_free(os);
80+
if (npy_string_dup(s, os) < 0) {
81+
gil_error(PyExc_MemoryError, "npy_string_dup failed");
8282
return -1;
8383
}
8484
}
@@ -207,10 +207,10 @@ unicode_to_string(PyArrayMethod_Context *context, char *const data[],
207207
gil_error(PyExc_TypeError, "Invalid unicode code point found");
208208
return -1;
209209
}
210-
ss *out_ss = (ss *)out;
211-
ssfree(out_ss);
212-
if (ssnewemptylen(out_num_bytes, out_ss) < 0) {
213-
gil_error(PyExc_MemoryError, "ssnewemptylen failed");
210+
npy_static_string *out_ss = (npy_static_string *)out;
211+
npy_string_free(out_ss);
212+
if (npy_string_newemptylen(out_num_bytes, out_ss) < 0) {
213+
gil_error(PyExc_MemoryError, "npy_string_newemptylen failed");
214214
return -1;
215215
}
216216
char *out_buf = out_ss->buf;
@@ -322,7 +322,7 @@ string_to_unicode(PyArrayMethod_Context *context, char *const data[],
322322
StringDTypeObject *descr = (StringDTypeObject *)context->descriptors[0];
323323
int has_null = descr->na_object != NULL;
324324
int has_string_na = descr->has_string_na;
325-
ss default_string = descr->default_string;
325+
npy_static_string default_string = descr->default_string;
326326
npy_intp N = dimensions[0];
327327
char *in = data[0];
328328
Py_UCS4 *out = (Py_UCS4 *)data[1];
@@ -332,13 +332,13 @@ string_to_unicode(PyArrayMethod_Context *context, char *const data[],
332332
// max number of 4 byte UCS4 characters that can fit in the output
333333
long max_out_size = (context->descriptors[1]->elsize) / 4;
334334

335-
const ss *s = NULL;
335+
const npy_static_string *s = NULL;
336336

337337
while (N--) {
338-
s = (ss *)in;
338+
s = (npy_static_string *)in;
339339
unsigned char *this_string = NULL;
340340
size_t n_bytes;
341-
if (ss_isnull(s)) {
341+
if (npy_string_isnull(s)) {
342342
if (has_null && !has_string_na) {
343343
// lossy but not much else we can do
344344
this_string = (unsigned char *)descr->na_name.buf;
@@ -421,7 +421,7 @@ string_to_bool(PyArrayMethod_Context *context, char *const data[],
421421
StringDTypeObject *descr = (StringDTypeObject *)context->descriptors[0];
422422
int has_null = descr->na_object != NULL;
423423
int has_string_na = descr->has_string_na;
424-
ss default_string = descr->default_string;
424+
npy_static_string default_string = descr->default_string;
425425

426426
npy_intp N = dimensions[0];
427427
char *in = data[0];
@@ -430,11 +430,11 @@ string_to_bool(PyArrayMethod_Context *context, char *const data[],
430430
npy_intp in_stride = strides[0];
431431
npy_intp out_stride = strides[1];
432432

433-
const ss *s = NULL;
433+
const npy_static_string *s = NULL;
434434

435435
while (N--) {
436-
s = (ss *)in;
437-
if (ss_isnull(s)) {
436+
s = (npy_static_string *)in;
437+
if (npy_string_isnull(s)) {
438438
if (has_null && !has_string_na) {
439439
// numpy treats NaN as truthy, following python
440440
*out = (npy_bool)1;
@@ -479,17 +479,17 @@ bool_to_string(PyArrayMethod_Context *NPY_UNUSED(context), char *const data[],
479479
npy_intp out_stride = strides[1];
480480

481481
while (N--) {
482-
ss *out_ss = (ss *)out;
483-
ssfree(out_ss);
482+
npy_static_string *out_ss = (npy_static_string *)out;
483+
npy_string_free(out_ss);
484484
if ((npy_bool)(*in) == 1) {
485-
if (ssnewlen("True", 4, out_ss) < 0) {
486-
gil_error(PyExc_MemoryError, "ssnewlen failed");
485+
if (npy_string_newlen("True", 4, out_ss) < 0) {
486+
gil_error(PyExc_MemoryError, "npy_string_newlen failed");
487487
return -1;
488488
}
489489
}
490490
else if ((npy_bool)(*in) == 0) {
491-
if (ssnewlen("False", 5, out_ss) < 0) {
492-
gil_error(PyExc_MemoryError, "ssnewlen failed");
491+
if (npy_string_newlen("False", 5, out_ss) < 0) {
492+
gil_error(PyExc_MemoryError, "npy_string_newlen failed");
493493
return -1;
494494
}
495495
}
@@ -517,8 +517,8 @@ static char *b2s_name = "cast_Bool_to_StringDType";
517517
static PyObject *
518518
string_to_pylong(char *in, int hasnull)
519519
{
520-
const ss *s = (ss *)in;
521-
if (ss_isnull(s)) {
520+
const npy_static_string *s = (npy_static_string *)in;
521+
if (npy_string_isnull(s)) {
522522
if (hasnull) {
523523
PyErr_SetString(PyExc_ValueError,
524524
"Arrays with missing data cannot be converted to "
@@ -585,10 +585,10 @@ pyobj_to_string(PyObject *obj, char *out)
585585
if (cstr_val == NULL) {
586586
return -1;
587587
}
588-
ss *out_ss = (ss *)out;
589-
ssfree(out_ss);
590-
if (ssnewlen(cstr_val, length, out_ss) < 0) {
591-
PyErr_SetString(PyExc_MemoryError, "ssnewlen failed");
588+
npy_static_string *out_ss = (npy_static_string *)out;
589+
npy_string_free(out_ss);
590+
if (npy_string_newlen(cstr_val, length, out_ss) < 0) {
591+
PyErr_SetString(PyExc_MemoryError, "npy_string_newlen failed");
592592
Py_DECREF(pystr_val);
593593
return -1;
594594
}
@@ -769,8 +769,8 @@ STRING_INT_CASTS(ulonglong, uint, ulonglong, NPY_ULONGLONG, llu, npy_ulonglong,
769769
static PyObject *
770770
string_to_pyfloat(char *in, int hasnull)
771771
{
772-
const ss *s = (ss *)in;
773-
if (ss_isnull(s)) {
772+
const npy_static_string *s = (npy_static_string *)in;
773+
if (npy_string_isnull(s)) {
774774
if (hasnull) {
775775
PyErr_SetString(PyExc_ValueError,
776776
"Arrays with missing data cannot be converted to "
@@ -975,7 +975,7 @@ string_to_datetime(PyArrayMethod_Context *context, char *const data[],
975975
StringDTypeObject *descr = (StringDTypeObject *)context->descriptors[0];
976976
int has_null = descr->na_object != NULL;
977977
int has_string_na = descr->has_string_na;
978-
ss default_string = descr->default_string;
978+
npy_static_string default_string = descr->default_string;
979979

980980
npy_intp N = dimensions[0];
981981
char *in = data[0];
@@ -984,7 +984,7 @@ string_to_datetime(PyArrayMethod_Context *context, char *const data[],
984984
npy_intp in_stride = strides[0];
985985
npy_intp out_stride = strides[1] / sizeof(npy_datetime);
986986

987-
const ss *s = NULL;
987+
const npy_static_string *s = NULL;
988988
npy_datetimestruct dts;
989989
NPY_DATETIMEUNIT in_unit = -1;
990990
PyArray_DatetimeMetaData in_meta = {0, 1};
@@ -995,8 +995,8 @@ string_to_datetime(PyArrayMethod_Context *context, char *const data[],
995995
&(((PyArray_DatetimeDTypeMetaData *)dt_descr->c_metadata)->meta);
996996

997997
while (N--) {
998-
s = (ss *)in;
999-
if (ss_isnull(s)) {
998+
s = (npy_static_string *)in;
999+
if (npy_string_isnull(s)) {
10001000
if (has_null && !has_string_na) {
10011001
*out = NPY_DATETIME_NAT;
10021002
goto next_step;
@@ -1051,8 +1051,8 @@ datetime_to_string(PyArrayMethod_Context *context, char *const data[],
10511051
char datetime_buf[NPY_DATETIME_MAX_ISO8601_STRLEN];
10521052

10531053
while (N--) {
1054-
ss *out_ss = (ss *)out;
1055-
ssfree(out_ss);
1054+
npy_static_string *out_ss = (npy_static_string *)out;
1055+
npy_string_free(out_ss);
10561056
if (*in == NPY_DATETIME_NAT) {
10571057
/* convert to NA */
10581058
out_ss = NULL;
@@ -1072,8 +1072,9 @@ datetime_to_string(PyArrayMethod_Context *context, char *const data[],
10721072
return -1;
10731073
}
10741074

1075-
if (ssnewlen(datetime_buf, strlen(datetime_buf), out_ss) < 0) {
1076-
PyErr_SetString(PyExc_MemoryError, "ssnewlen failed");
1075+
if (npy_string_newlen(datetime_buf, strlen(datetime_buf), out_ss) <
1076+
0) {
1077+
PyErr_SetString(PyExc_MemoryError, "npy_string_newlen failed");
10771078
return -1;
10781079
}
10791080
}

stringdtype/stringdtype/src/dtype.c

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,19 @@ new_stringdtype_instance(PyObject *na_object, int coerce)
2020

2121
Py_XINCREF(na_object);
2222
((StringDTypeObject *)new)->na_object = na_object;
23-
ss na_name = NULL_STRING;
23+
npy_static_string na_name = NULL_STRING;
2424
int hasnull = na_object != NULL;
2525
int has_nan_na = 0;
2626
int has_string_na = 0;
27-
ss default_string = EMPTY_STRING;
27+
npy_static_string default_string = EMPTY_STRING;
2828
if (hasnull) {
2929
// first check for a string
3030
if (PyUnicode_Check(na_object)) {
3131
has_string_na = 1;
3232
Py_ssize_t size = 0;
3333
const char *buf = PyUnicode_AsUTF8AndSize(na_object, &size);
3434
default_string = NULL_STRING;
35-
int res = ssnewlen(buf, (size_t)size, &default_string);
35+
int res = npy_string_newlen(buf, (size_t)size, &default_string);
3636
if (res == -1) {
3737
PyErr_NoMemory();
3838
Py_DECREF(new);
@@ -72,7 +72,7 @@ new_stringdtype_instance(PyObject *na_object, int coerce)
7272

7373
Py_ssize_t size = 0;
7474
const char *utf8_ptr = PyUnicode_AsUTF8AndSize(na_pystr, &size);
75-
int res = ssnewlen(utf8_ptr, (size_t)size, &na_name);
75+
int res = npy_string_newlen(utf8_ptr, (size_t)size, &na_name);
7676
if (res == -1) {
7777
PyErr_NoMemory();
7878
Py_DECREF(new);
@@ -93,8 +93,8 @@ new_stringdtype_instance(PyObject *na_object, int coerce)
9393
((StringDTypeObject *)new)->coerce = coerce;
9494

9595
PyArray_Descr *base = (PyArray_Descr *)new;
96-
base->elsize = sizeof(ss);
97-
base->alignment = _Alignof(ss);
96+
base->elsize = sizeof(npy_static_string);
97+
base->alignment = _Alignof(npy_static_string);
9898
base->flags |= NPY_NEEDS_INIT;
9999
base->flags |= NPY_LIST_PICKLE;
100100
base->flags |= NPY_ITEM_REFCOUNT;
@@ -194,20 +194,20 @@ string_discover_descriptor_from_pyobject(PyTypeObject *NPY_UNUSED(cls),
194194
int
195195
stringdtype_setitem(StringDTypeObject *descr, PyObject *obj, char **dataptr)
196196
{
197-
ss *sdata = (ss *)dataptr;
197+
npy_static_string *sdata = (npy_static_string *)dataptr;
198198

199199
// free if dataptr holds preexisting string data,
200-
// ssfree does a NULL check
201-
ssfree(sdata);
200+
// npy_string_free does a NULL check
201+
npy_string_free(sdata);
202202

203203
// borrow reference
204204
PyObject *na_object = descr->na_object;
205205

206206
// setting NA *must* check pointer equality since NA types might not
207207
// allow equality
208208
if (na_object != NULL && obj == na_object) {
209-
// do nothing, ssfree already NULLed the struct ssdata points to
210-
// so it already contains a NA value
209+
// do nothing, npy_string_free already NULLed the struct ssdata points
210+
// to so it already contains a NA value
211211
}
212212
else {
213213
PyObject *val_obj = get_value(obj, descr->coerce);
@@ -224,7 +224,7 @@ stringdtype_setitem(StringDTypeObject *descr, PyObject *obj, char **dataptr)
224224
}
225225

226226
// copies contents of val into item_val->buf
227-
int res = ssnewlen(val, length, sdata);
227+
int res = npy_string_newlen(val, length, sdata);
228228

229229
if (res == -1) {
230230
PyErr_NoMemory();
@@ -246,10 +246,10 @@ static PyObject *
246246
stringdtype_getitem(StringDTypeObject *descr, char **dataptr)
247247
{
248248
PyObject *val_obj = NULL;
249-
ss *sdata = (ss *)dataptr;
249+
npy_static_string *sdata = (npy_static_string *)dataptr;
250250
int hasnull = descr->na_object != NULL;
251251

252-
if (ss_isnull(sdata)) {
252+
if (npy_string_isnull(sdata)) {
253253
if (hasnull) {
254254
PyObject *na_object = descr->na_object;
255255
Py_INCREF(na_object);
@@ -287,7 +287,7 @@ stringdtype_getitem(StringDTypeObject *descr, char **dataptr)
287287
npy_bool
288288
nonzero(void *data, void *NPY_UNUSED(arr))
289289
{
290-
return ((ss *)data)->len != 0;
290+
return ((npy_static_string *)data)->len != 0;
291291
}
292292

293293
// Implementation of PyArray_CompareFunc.
@@ -311,11 +311,11 @@ _compare(void *a, void *b, StringDTypeObject *descr)
311311
return 0;
312312
}
313313
}
314-
const ss *default_string = &descr->default_string;
315-
const ss *ss_a = (ss *)a;
316-
const ss *ss_b = (ss *)b;
317-
int a_is_null = ss_isnull(ss_a);
318-
int b_is_null = ss_isnull(ss_b);
314+
const npy_static_string *default_string = &descr->default_string;
315+
const npy_static_string *ss_a = (npy_static_string *)a;
316+
const npy_static_string *ss_b = (npy_static_string *)b;
317+
int a_is_null = npy_string_isnull(ss_a);
318+
int b_is_null = npy_string_isnull(ss_b);
319319
if (NPY_UNLIKELY(a_is_null || b_is_null)) {
320320
if (hasnull && !has_string_na) {
321321
if (has_nan_na) {
@@ -343,15 +343,15 @@ _compare(void *a, void *b, StringDTypeObject *descr)
343343
}
344344
}
345345
}
346-
return sscmp(ss_a, ss_b);
346+
return npy_string_cmp(ss_a, ss_b);
347347
}
348348

349349
// PyArray_ArgFunc
350350
// The max element is the one with the highest unicode code point.
351351
int
352352
argmax(void *data, npy_intp n, npy_intp *max_ind, void *arr)
353353
{
354-
ss *dptr = (ss *)data;
354+
npy_static_string *dptr = (npy_static_string *)data;
355355
*max_ind = 0;
356356
for (int i = 1; i < n; i++) {
357357
if (compare(&dptr[i], &dptr[*max_ind], arr) > 0) {
@@ -366,7 +366,7 @@ argmax(void *data, npy_intp n, npy_intp *max_ind, void *arr)
366366
int
367367
argmin(void *data, npy_intp n, npy_intp *min_ind, void *arr)
368368
{
369-
ss *dptr = (ss *)data;
369+
npy_static_string *dptr = (npy_static_string *)data;
370370
*min_ind = 0;
371371
for (int i = 1; i < n; i++) {
372372
if (compare(&dptr[i], &dptr[*min_ind], arr) < 0) {
@@ -391,8 +391,8 @@ stringdtype_clear_loop(void *NPY_UNUSED(traverse_context),
391391
{
392392
while (size--) {
393393
if (data != NULL) {
394-
ssfree((ss *)data);
395-
memset(data, 0, sizeof(ss));
394+
npy_string_free((npy_static_string *)data);
395+
memset(data, 0, sizeof(npy_static_string));
396396
}
397397
data += stride;
398398
}
@@ -421,7 +421,7 @@ stringdtype_fill_zero_loop(void *NPY_UNUSED(traverse_context),
421421
NpyAuxData *NPY_UNUSED(auxdata))
422422
{
423423
while (size--) {
424-
if (ssnewlen("", 0, (ss *)(data)) < 0) {
424+
if (npy_string_newlen("", 0, (npy_static_string *)(data)) < 0) {
425425
return -1;
426426
}
427427
data += stride;
@@ -572,8 +572,8 @@ static void
572572
stringdtype_dealloc(StringDTypeObject *self)
573573
{
574574
Py_XDECREF(self->na_object);
575-
ssfree(&self->default_string);
576-
ssfree(&self->na_name);
575+
npy_string_free(&self->default_string);
576+
npy_string_free(&self->na_name);
577577
PyArrayDescr_Type.tp_dealloc((PyObject *)self);
578578
}
579579

stringdtype/stringdtype/src/dtype.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ typedef struct {
2525
int coerce;
2626
int has_nan_na;
2727
int has_string_na;
28-
ss default_string;
29-
ss na_name;
28+
npy_static_string default_string;
29+
npy_static_string na_name;
3030
} StringDTypeObject;
3131

3232
typedef struct {

stringdtype/stringdtype/src/main.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,7 @@ _memory_usage(PyObject *NPY_UNUSED(self), PyObject *obj)
5858
npy_intp count = *innersizeptr;
5959

6060
while (count--) {
61-
// +1 byte for the null terminator
62-
memory_usage += ((ss *)in)->len + 1;
61+
memory_usage += ((npy_static_string *)in)->len;
6362
in += stride;
6463
}
6564

0 commit comments

Comments
 (0)