Skip to content

Commit 30a0b3a

Browse files
committed
make StringDType a subtype of dtypemeta to store the NA object in the C struct
1 parent 1f1e5b9 commit 30a0b3a

File tree

4 files changed

+74
-75
lines changed

4 files changed

+74
-75
lines changed

stringdtype/stringdtype/src/dtype.c

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,8 @@ get_value(PyObject *scalar, PyObject *na_object)
131131
static int
132132
stringdtype_setitem(StringDTypeObject *descr, PyObject *obj, char **dataptr)
133133
{
134-
PyObject *na_object =
135-
PyDict_GetItemString(Py_TYPE(descr)->tp_dict, "na_object");
134+
// borrow reference
135+
PyObject *na_object = ((StringDType_type *)Py_TYPE(descr))->na_object;
136136
PyObject *val_obj = get_value(obj, na_object);
137137

138138
if (val_obj == NULL) {
@@ -187,8 +187,7 @@ stringdtype_getitem(StringDTypeObject *descr, char **dataptr)
187187
ss *sdata = (ss *)dataptr;
188188

189189
if (ss_isnull(sdata)) {
190-
PyObject *na_object =
191-
PyDict_GetItemString(Py_TYPE(descr)->tp_dict, "na_object");
190+
PyObject *na_object = ((StringDType_type *)Py_TYPE(descr))->na_object;
192191
Py_INCREF(na_object);
193192
val_obj = na_object;
194193
}
@@ -381,8 +380,8 @@ static PyObject *
381380
stringdtype_repr(StringDTypeObject *self)
382381
{
383382
PyObject *ret = NULL;
384-
PyObject *na_object =
385-
PyDict_GetItemString(Py_TYPE(self)->tp_dict, "na_object");
383+
// borrow reference
384+
PyObject *na_object = ((StringDType_type *)Py_TYPE(self))->na_object;
386385

387386
if (na_object != NA_OBJ) {
388387
ret = PyUnicode_FromString("PandasStringDType()");
@@ -475,8 +474,8 @@ static PyMethodDef StringDType_methods[] = {
475474
* PyArray_DTypeMeta, which is a larger struct than a typical type.
476475
* (This should get a bit nicer eventually with Python >3.11.)
477476
*/
478-
PyArray_DTypeMeta StringDType = {
479-
{{
477+
StringDType_type StringDType = {
478+
{{{
480479
PyVarObject_HEAD_INIT(NULL, 0).tp_name =
481480
"stringdtype.StringDType",
482481
.tp_basicsize = sizeof(StringDTypeObject),
@@ -485,7 +484,7 @@ PyArray_DTypeMeta StringDType = {
485484
.tp_repr = (reprfunc)stringdtype_repr,
486485
.tp_str = (reprfunc)stringdtype_repr,
487486
.tp_methods = StringDType_methods,
488-
}},
487+
}}},
489488
/* rest, filled in during DTypeMeta initialization */
490489
};
491490

@@ -499,8 +498,8 @@ PyArray_DTypeMeta StringDType = {
499498
* of copy/paste
500499
*/
501500

502-
PyArray_DTypeMeta PandasStringDType = {
503-
{{
501+
StringDType_type PandasStringDType = {
502+
{{{
504503
PyVarObject_HEAD_INIT(NULL, 0).tp_name =
505504
"stringdtype.PandasStringDType",
506505
.tp_basicsize = sizeof(StringDTypeObject),
@@ -509,7 +508,7 @@ PyArray_DTypeMeta PandasStringDType = {
509508
.tp_repr = (reprfunc)stringdtype_repr,
510509
.tp_str = (reprfunc)stringdtype_repr,
511510
.tp_methods = StringDType_methods,
512-
}},
511+
}}},
513512
/* rest, filled in during DTypeMeta initialization */
514513
};
515514

@@ -528,24 +527,29 @@ init_string_dtype(void)
528527
((PyObject *)&StringDType)->ob_type = &PyArrayDTypeMeta_Type;
529528
((PyTypeObject *)&StringDType)->tp_base = &PyArrayDescr_Type;
530529
((PyTypeObject *)&StringDType)->tp_dict = PyDict_New();
530+
// set as C attribute for fast access
531+
Py_INCREF(NA_OBJ);
532+
StringDType.na_object = NA_OBJ;
533+
// also add to tp_dict for easy python-level access
531534
PyDict_SetItemString(((PyTypeObject *)&StringDType)->tp_dict, "na_object",
532535
NA_OBJ);
533536
if (PyType_Ready((PyTypeObject *)&StringDType) < 0) {
534537
return -1;
535538
}
536539

537-
if (PyArrayInitDTypeMeta_FromSpec(&StringDType, &StringDType_DTypeSpec) <
538-
0) {
540+
if (PyArrayInitDTypeMeta_FromSpec((PyArray_DTypeMeta *)&StringDType,
541+
&StringDType_DTypeSpec) < 0) {
539542
return -1;
540543
}
541544

542-
PyArray_Descr *singleton = PyArray_GetDefaultDescr(&StringDType);
545+
PyArray_Descr *singleton =
546+
PyArray_GetDefaultDescr((PyArray_DTypeMeta *)&StringDType);
543547

544548
if (singleton == NULL) {
545549
return -1;
546550
}
547551

548-
StringDType.singleton = singleton;
552+
StringDType.base.singleton = singleton;
549553

550554
/* and once again for PandasStringDType */
551555

@@ -569,24 +573,30 @@ init_string_dtype(void)
569573
((PyObject *)&PandasStringDType)->ob_type = &PyArrayDTypeMeta_Type;
570574
((PyTypeObject *)&PandasStringDType)->tp_base = &PyArrayDescr_Type;
571575
((PyTypeObject *)&PandasStringDType)->tp_dict = PyDict_New();
576+
// C attribute for fast access
577+
Py_INCREF(pandas_na_obj);
578+
PandasStringDType.na_object = pandas_na_obj;
579+
// Add to tp_dict too for easy python-level access
572580
PyDict_SetItemString(((PyTypeObject *)&PandasStringDType)->tp_dict,
573581
"na_object", pandas_na_obj);
574582
if (PyType_Ready((PyTypeObject *)&PandasStringDType) < 0) {
575583
return -1;
576584
}
577585

578-
if (PyArrayInitDTypeMeta_FromSpec(&PandasStringDType,
579-
&PandasStringDType_DTypeSpec) < 0) {
586+
if (PyArrayInitDTypeMeta_FromSpec(
587+
(PyArray_DTypeMeta *)&PandasStringDType,
588+
&PandasStringDType_DTypeSpec) < 0) {
580589
return -1;
581590
}
582591

583-
singleton = PyArray_GetDefaultDescr(&PandasStringDType);
592+
singleton = PyArray_GetDefaultDescr(
593+
(PyArray_DTypeMeta *)&PandasStringDType);
584594

585595
if (singleton == NULL) {
586596
return -1;
587597
}
588598

589-
PandasStringDType.singleton = singleton;
599+
PandasStringDType.base.singleton = singleton;
590600
}
591601
else {
592602
PandasStringDType = StringDType;

stringdtype/stringdtype/src/dtype.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,13 @@ typedef struct {
1818
PyArray_Descr base;
1919
} StringDTypeObject;
2020

21-
extern PyArray_DTypeMeta StringDType;
22-
extern PyArray_DTypeMeta PandasStringDType;
21+
typedef struct {
22+
PyArray_DTypeMeta base;
23+
PyObject *na_object;
24+
} StringDType_type;
25+
26+
extern StringDType_type StringDType;
27+
extern StringDType_type PandasStringDType;
2328
extern PyTypeObject *StringScalar_Type;
2429
extern PyTypeObject *PandasStringScalar_Type;
2530
extern PyObject *NA_OBJ;

stringdtype/stringdtype/src/main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ _memory_usage(PyObject *NPY_UNUSED(self), PyObject *obj)
2323
PyArray_Descr *descr = PyArray_DESCR(arr);
2424
PyArray_DTypeMeta *dtype = NPY_DTYPE(descr);
2525

26-
if (dtype != &StringDType) {
26+
if (dtype != (PyArray_DTypeMeta *)&StringDType) {
2727
PyErr_SetString(PyExc_TypeError,
2828
"can only be called with a StringDType array");
2929
return NULL;

stringdtype/stringdtype/src/umath.c

Lines changed: 36 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -13,37 +13,6 @@
1313
#include "string.h"
1414
#include "umath.h"
1515

16-
static NPY_CASTING
17-
binary_resolve_descriptors(struct PyArrayMethodObject_tag *NPY_UNUSED(method),
18-
PyArray_DTypeMeta *dtypes[],
19-
PyArray_Descr *given_descrs[],
20-
PyArray_Descr *loop_descrs[],
21-
npy_intp *NPY_UNUSED(view_offset))
22-
{
23-
// technically incorrect to cast to StringDType if we have a
24-
// PandasStringDType but they have the same layout so this should be fine.
25-
PyObject *na_obj1 = PyDict_GetItemString(
26-
((PyTypeObject *)dtypes[0])->tp_dict, "na_object");
27-
PyObject *na_obj2 = PyDict_GetItemString(
28-
((PyTypeObject *)dtypes[1])->tp_dict, "na_object");
29-
30-
if (na_obj1 != na_obj2) {
31-
PyErr_SetString(PyExc_TypeError,
32-
"Can only do binary operations with identical "
33-
"StringDType instances.");
34-
return (NPY_CASTING)-1;
35-
}
36-
37-
Py_INCREF(given_descrs[0]);
38-
loop_descrs[0] = given_descrs[0];
39-
Py_INCREF(given_descrs[1]);
40-
loop_descrs[1] = given_descrs[1];
41-
Py_INCREF(given_descrs[1]);
42-
loop_descrs[2] = given_descrs[1];
43-
44-
return NPY_NO_CASTING;
45-
}
46-
4716
static int
4817
add_strided_loop(PyArrayMethod_Context *NPY_UNUSED(context),
4918
char *const data[], npy_intp const dimensions[],
@@ -343,10 +312,16 @@ init_ufunc(PyObject *numpy, const char *ufunc_name, PyArray_DTypeMeta **dtypes,
343312
.dtypes = dtypes,
344313
};
345314

346-
PyType_Slot slots[] = {{NPY_METH_resolve_descriptors, resolve_func},
347-
{NPY_METH_strided_loop, loop_func},
348-
{0, NULL}};
349-
spec.slots = slots;
315+
if (resolve_func == NULL) {
316+
PyType_Slot slots[] = {{NPY_METH_strided_loop, loop_func}, {0, NULL}};
317+
spec.slots = slots;
318+
}
319+
else {
320+
PyType_Slot slots[] = {{NPY_METH_resolve_descriptors, resolve_func},
321+
{NPY_METH_strided_loop, loop_func},
322+
{0, NULL}};
323+
spec.slots = slots;
324+
}
350325

351326
if (PyUFunc_AddLoopFromSpec(ufunc, &spec) < 0) {
352327
Py_DECREF(ufunc);
@@ -405,7 +380,8 @@ init_ufuncs(void)
405380
return -1;
406381
}
407382

408-
PyArray_DTypeMeta *eq_dtypes[] = {&StringDType, &StringDType,
383+
PyArray_DTypeMeta *eq_dtypes[] = {(PyArray_DTypeMeta *)&StringDType,
384+
(PyArray_DTypeMeta *)&StringDType,
409385
&PyArray_BoolDType};
410386

411387
if (init_ufunc(numpy, "equal", eq_dtypes,
@@ -416,8 +392,10 @@ init_ufuncs(void)
416392
}
417393

418394
PyArray_DTypeMeta *promoter_dtypes[2][3] = {
419-
{&StringDType, &PyArray_UnicodeDType, &PyArray_BoolDType},
420-
{&PyArray_UnicodeDType, &StringDType, &PyArray_BoolDType},
395+
{(PyArray_DTypeMeta *)&StringDType, &PyArray_UnicodeDType,
396+
&PyArray_BoolDType},
397+
{&PyArray_UnicodeDType, (PyArray_DTypeMeta *)&StringDType,
398+
&PyArray_BoolDType},
421399
};
422400

423401
if (add_promoter(numpy, "equal", promoter_dtypes[0]) < 0) {
@@ -428,7 +406,8 @@ init_ufuncs(void)
428406
goto error;
429407
}
430408

431-
PyArray_DTypeMeta *isnan_dtypes[] = {&StringDType, &PyArray_BoolDType};
409+
PyArray_DTypeMeta *isnan_dtypes[] = {(PyArray_DTypeMeta *)&StringDType,
410+
&PyArray_BoolDType};
432411

433412
if (init_ufunc(numpy, "isnan", isnan_dtypes,
434413
&string_isnan_resolve_descriptors,
@@ -437,24 +416,29 @@ init_ufuncs(void)
437416
goto error;
438417
}
439418

440-
PyArray_DTypeMeta *minmax_dtypes[] = {&StringDType, &StringDType,
441-
&StringDType};
442-
if (init_ufunc(numpy, "maximum", minmax_dtypes,
443-
&binary_resolve_descriptors, &maximum_strided_loop,
444-
"string_maximum", 2, 1, NPY_NO_CASTING, 0) < 0) {
419+
PyArray_DTypeMeta *minmax_dtypes[] = {
420+
(PyArray_DTypeMeta *)&StringDType,
421+
(PyArray_DTypeMeta *)&StringDType,
422+
(PyArray_DTypeMeta *)&StringDType,
423+
};
424+
if (init_ufunc(numpy, "maximum", minmax_dtypes, NULL,
425+
&maximum_strided_loop, "string_maximum", 2, 1,
426+
NPY_NO_CASTING, 0) < 0) {
445427
goto error;
446428
}
447-
if (init_ufunc(numpy, "minimum", minmax_dtypes,
448-
&binary_resolve_descriptors, &minimum_strided_loop,
449-
"string_minimum", 2, 1, NPY_NO_CASTING, 0) < 0) {
429+
if (init_ufunc(numpy, "minimum", minmax_dtypes, NULL,
430+
&minimum_strided_loop, "string_minimum", 2, 1,
431+
NPY_NO_CASTING, 0) < 0) {
450432
goto error;
451433
}
452434

453-
PyArray_DTypeMeta *add_types[] = {&StringDType, &StringDType,
454-
&StringDType};
455-
if (init_ufunc(numpy, "add", add_types, &binary_resolve_descriptors,
456-
&add_strided_loop, "string_add", 2, 1, NPY_NO_CASTING,
457-
0) < 0) {
435+
PyArray_DTypeMeta *add_dtypes[] = {
436+
(PyArray_DTypeMeta *)&StringDType,
437+
(PyArray_DTypeMeta *)&StringDType,
438+
(PyArray_DTypeMeta *)&StringDType,
439+
};
440+
if (init_ufunc(numpy, "add", add_dtypes, NULL, &add_strided_loop,
441+
"string_add", 2, 1, NPY_NO_CASTING, 0) < 0) {
458442
goto error;
459443
}
460444

0 commit comments

Comments
 (0)